diff --git a/.ci/premerge_advisor_explain.py b/.ci/premerge_advisor_explain.py index 155e91bef55f8..bd65eb3d1588b 100644 --- a/.ci/premerge_advisor_explain.py +++ b/.ci/premerge_advisor_explain.py @@ -129,7 +129,8 @@ def main( # If the job succeeds and there is not an existing comment, we # should not write one to reduce noise. comments = [] - with open("comments", "w") as comment_file_handle: + comments_file_name = f"comments-{platform.system()}-{platform.machine()}" + with open(comments_file_name, "w") as comment_file_handle: json.dump(comments, comment_file_handle) diff --git a/.github/workflows/issue-write-test.yaml b/.github/workflows/issue-write-test.yaml new file mode 100644 index 0000000000000..a54e716d1dee9 --- /dev/null +++ b/.github/workflows/issue-write-test.yaml @@ -0,0 +1,33 @@ +name: Test Issue Write + +permissions: + contents: read + +on: + pull_request: + paths: + - '.github/workflows/issue-write-test.yaml' + - '.github/workflows/issue-write.yml' + +jobs: + test-issue-write: + name: "Test Issue Write" + runs-on: ubuntu-24.04 + if: github.repository == 'llvm/llvm-project' + steps: + - name: Write Comment + run: | + echo '[{"body": "This is a comment for testing the issue write workflow"}]' > comments-foo + echo '[{"body": "This is another comment for testing the issue write workflow that was placed in a separate file"}]' > comments-bar + - name: Upload Comment + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: workflow-args-foo + path: | + comments-foo + - name: Upload Comment + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: workflow-args-bar + path: | + comments-bar diff --git a/.github/workflows/issue-write.yml b/.github/workflows/issue-write.yml index ece6081ce9ba6..eebaf89e027be 100644 --- a/.github/workflows/issue-write.yml +++ b/.github/workflows/issue-write.yml @@ -8,6 +8,7 @@ on: - "PR Request Release Note" - "Code lint" - "CI Checks" + - "Test Issue Write" types: - completed @@ -40,13 +41,18 @@ jobs: artifact-name: workflow-args - name: 'Comment on PR' - if: steps.download-artifact.outputs.artifact-id != '' + if: steps.download-artifact.outputs.artifact-ids != '' uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | var fs = require('fs'); - const comments = JSON.parse(fs.readFileSync('./comments')); + var comments = [] + for (local_file of fs.readdirSync('.')) { + if (local_file.startsWith("comments")) { + comments.push(...JSON.parse(fs.readFileSync(local_file))) + } + } if (!comments || comments.length == 0) { return; } @@ -155,5 +161,5 @@ jobs: - name: Dump comments file if: >- always() && - steps.download-artifact.outputs.artifact-id != '' + steps.download-artifact.outputs.artifact-ids != '' run: cat comments diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml index 10f7f6a827b30..4ea5397edbeac 100644 --- a/.github/workflows/premerge.yaml +++ b/.github/workflows/premerge.yaml @@ -124,9 +124,9 @@ jobs: if: ${{ always() && !startsWith(matrix.runs-on, 'depot-ubuntu-24.04-arm') }} continue-on-error: true with: - name: workflow-args + name: workflow-args-x86-linux path: | - comments + comments-Linux-x86_64 premerge-checks-windows: name: Build and Test Windows @@ -185,6 +185,14 @@ jobs: path: artifacts/ retention-days: 5 include-hidden-files: 'true' + - name: Upload Comment + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + if: always() + continue-on-error: true + with: + name: workflow-args-windows + path: | + comments-Windows-x86_64 premerge-check-macos: name: MacOS Premerge Checks diff --git a/.github/workflows/test-unprivileged-download-artifact.yml b/.github/workflows/test-unprivileged-download-artifact.yml index 0831135297b63..ad41cdfdb7525 100644 --- a/.github/workflows/test-unprivileged-download-artifact.yml +++ b/.github/workflows/test-unprivileged-download-artifact.yml @@ -21,15 +21,23 @@ jobs: if: github.repository_owner == 'llvm' runs-on: ubuntu-24.04 steps: - - name: Create Test File + - name: Create Test Files run: | - echo "test" > comment - - name: Upload Test File + echo "foo" > comment1 + echo "bar" > comment2 + - name: Upload Test File 1 uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: - name: workflow-args + name: artifact-name-1 path: | - comment + comment1 + - name: Upload Test File 2 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: artifact-name-2 + path: | + comment2 + test-download: name: Test Unprivileged Download Artifact @@ -47,8 +55,10 @@ jobs: id: download-artifact with: run-id: ${{ github.run_id }} - artifact-name: workflow-args + artifact-name: artifact-name- - name: Assert That Contents are the Same run: | - cat comment - [[ "$(cat comment)" == "test" ]] + cat comment1 + [[ "$(cat comment1)" == "foo" ]] + cat comment2 + [[ "$(cat comment2)" == "bar" ]] diff --git a/.github/workflows/unprivileged-download-artifact/action.yml b/.github/workflows/unprivileged-download-artifact/action.yml index 72815b26bcf41..173b8ca93252f 100644 --- a/.github/workflows/unprivileged-download-artifact/action.yml +++ b/.github/workflows/unprivileged-download-artifact/action.yml @@ -19,9 +19,9 @@ outputs: The filename of the downloaded artifact or the empty string if the artifact was not found. value: ${{ steps.download-artifact.outputs.filename }} - artifact-id: + artifact-ids: description: "The id of the artifact being downloaded." - value: ${{ steps.artifact-url.outputs.id }} + value: ${{ steps.artifact-url.outputs.ids }} runs: @@ -36,46 +36,67 @@ runs: response = await github.rest.actions.listArtifactsForRepo({ owner: context.repo.owner, repo: context.repo.repo, - name: "${{ inputs.artifact-name }}" }) } else { response = await github.rest.actions.listWorkflowRunArtifacts({ owner: context.repo.owner, repo: context.repo.repo, run_id: "${{ inputs.run-id }}", - name: "${{ inputs.artifact-name }}" }) } console.log(response) + artifacts_to_download = [] for (artifact of response.data.artifacts) { + if (artifact.name.startsWith("${{ inputs.artifact-name }}")) { + artifacts_to_download.push(artifact) + } + } + + for (artifact of artifacts_to_download) { console.log(artifact); } - if (response.data.artifacts.length == 0) { - console.log("Could not find artifact ${{ inputs.artifact-name }} for workflow run ${{ inputs.run-id }}") + if (artifacts_to_download.length == 0) { + console.log("Could not find artifacts starting with name ${{ inputs.artifact-name }} for workflow run ${{ inputs.run-id }}") return; } - const url_response = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: response.data.artifacts[0].id, - archive_format: "zip" - }) + artifact_ids = [] + artifact_urls = [] + artifact_names = [] + for (artifact_to_download of artifacts_to_download) { + const url_response = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: artifact_to_download.id, + archive_format: "zip" + }) + + artifact_ids.push(artifact_to_download.id) + artifact_urls.push('"' + url_response.url + '"') + artifact_names.push('"' + artifact_to_download.name + '"') + } - core.setOutput("url", url_response.url); - core.setOutput("id", response.data.artifacts[0].id); + core.setOutput("urls", artifact_urls.join(" ")); + core.setOutput("ids", artifact_ids.join(" ")); + core.setOutput("names", artifact_names.join(" ")); - shell: bash - if: steps.artifact-url.outputs.url != '' + if: steps.artifact-url.outputs.urls != '' id: download-artifact run: | - curl -L -o ${{ inputs.artifact-name }}.zip "${{ steps.artifact-url.outputs.url }}" - echo "filename=${{ inputs.artifact-name }}.zip" >> $GITHUB_OUTPUT + artifact_urls=(${{ steps.artifact-url.outputs.urls }}) + artifact_names=(${{ steps.artifact-url.outputs.names }}) + for i in "${!artifact_urls[@]}"; do + curl -L -o "${artifact_names[$i]}.zip" "${artifact_urls[$i]}" + done - shell: bash - if: steps.download-artifact.outputs.filename != '' + if: steps.artifact-url.outputs.names != '' run: | - unzip ${{ steps.download-artifact.outputs.filename }} + artifact_names=(${{ steps.artifact-url.outputs.names }}) + for name in "${artifact_names[@]}"; do + unzip "${name}.zip" + done diff --git a/clang-tools-extra/clangd/ScanningProjectModules.cpp b/clang-tools-extra/clangd/ScanningProjectModules.cpp index 672e99632019d..6a21ad2920764 100644 --- a/clang-tools-extra/clangd/ScanningProjectModules.cpp +++ b/clang-tools-extra/clangd/ScanningProjectModules.cpp @@ -8,8 +8,8 @@ #include "ProjectModules.h" #include "support/Logger.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" +#include "clang/DependencyScanning/DependencyScanningService.h" +#include "clang/Tooling/DependencyScanningTool.h" namespace clang::clangd { namespace { @@ -36,8 +36,8 @@ class ModuleDependencyScanner { std::shared_ptr CDB, const ThreadsafeFS &TFS) : CDB(CDB), TFS(TFS), - Service(tooling::dependencies::ScanningMode::CanonicalPreprocessing, - tooling::dependencies::ScanningOutputFormat::P1689) {} + Service(dependencies::ScanningMode::CanonicalPreprocessing, + dependencies::ScanningOutputFormat::P1689) {} /// The scanned modules dependency information for a specific source file. struct ModuleDependencyInfo { @@ -81,7 +81,7 @@ class ModuleDependencyScanner { // Whether the scanner has scanned the project globally. bool GlobalScanned = false; - clang::tooling::dependencies::DependencyScanningService Service; + clang::dependencies::DependencyScanningService Service; // TODO: Add a scanning cache. diff --git a/clang-tools-extra/clangd/test/lit.cfg.py b/clang-tools-extra/clangd/test/lit.cfg.py index 0199275c70e1e..05a0f5e7383e9 100644 --- a/clang-tools-extra/clangd/test/lit.cfg.py +++ b/clang-tools-extra/clangd/test/lit.cfg.py @@ -1,6 +1,3 @@ -import os -import shutil - import lit.llvm import lit.util @@ -8,21 +5,10 @@ lit.llvm.llvm_config.clang_setup() lit.llvm.llvm_config.use_default_substitutions() -# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. -# See https://github.com/llvm/llvm-project/issues/106636 for more details. -# -# We prefer the lit internal shell which provides a better user experience on failures -# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 -# env var. -use_lit_shell = True -lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") -if lit_shell_env: - use_lit_shell = lit.util.pythonize_bool(lit_shell_env) - config.name = "Clangd" config.suffixes = [".test"] config.excludes = ["Inputs"] -config.test_format = lit.formats.ShTest(not use_lit_shell) +config.test_format = lit.formats.ShTest(not lit.llvm.llvm_config.use_lit_shell) config.test_source_root = config.clangd_source_dir + "/test" config.test_exec_root = config.clangd_binary_dir + "/test" @@ -55,13 +41,6 @@ def calculate_arch_features(arch_string): if lit.util.pythonize_bool(config.have_benchmarks): config.available_features.add("have-benchmarks") -# This is needed to avoid running a single test (system-include-extractor.test) -# on a single buildbot (clangd-ubuntu-tsan) and likely should not be needed. We -# are able to unconditionally assume a chmod binary exists for check-llvm. -# TODO(boomanaiden154): Fix this after investigating the bot setup. -if shutil.which("chmod"): - config.available_features.add("chmod") - # It is not realistically possible to account for all options that could # possibly be present in system and user configuration files, so disable # default configs for the test runs. diff --git a/clang-tools-extra/clangd/test/system-include-extractor.test b/clang-tools-extra/clangd/test/system-include-extractor.test index 36e4c581ecad1..3314be806a801 100644 --- a/clang-tools-extra/clangd/test/system-include-extractor.test +++ b/clang-tools-extra/clangd/test/system-include-extractor.test @@ -1,7 +1,7 @@ # RUN: rm -rf %t.dir && mkdir -p %t.dir # The mock driver below is a shell script: -# REQUIRES: shell, chmod +# REQUIRES: shell # Create a bin directory to store the mock-driver and add it to the path # RUN: mkdir -p %t.dir/bin diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index cf8bdd2a429df..216b5fdb69ff7 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1001,6 +1001,10 @@ TARGET_BUILTIN(__builtin_darn_32, "i", "", "isa-v30-instructions") TARGET_BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "", "vsx") TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx") +// AMO builtins +TARGET_BUILTIN(__builtin_amo_lwat, "UiUi*UiIi", "", "isa-v30-instructions") +TARGET_BUILTIN(__builtin_amo_ldat, "ULiULi*ULiIi", "", "isa-v30-instructions") + // Set the floating point rounding mode BUILTIN(__builtin_setrnd, "di", "") diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 560f94ff2427e..a4b7215d6334d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -585,13 +585,14 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psadbw256 : X86Builtin< "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; - def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; - def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } - let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def permdf256 + : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; + def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long " + "int>, _Constant int)">; def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 6c445253d518b..c60ca507ff917 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -21,6 +21,7 @@ #include "llvm/Frontend/Debug/Options.h" #include "llvm/Frontend/Driver/CodeGenOptions.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Hash.h" #include "llvm/Support/Regex.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" @@ -514,6 +515,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// binary metadata pass should not be instrumented. std::vector SanitizeMetadataIgnorelistFiles; + /// Hash algorithm to use for KCFI type IDs. + llvm::KCFIHashAlgorithm SanitizeKcfiHash; + /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h similarity index 89% rename from clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h rename to clang/include/clang/DependencyScanning/DependencyScannerImpl.h index b94d1b472f920..750688702ec23 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h +++ b/clang/include/clang/DependencyScanning/DependencyScannerImpl.h @@ -1,4 +1,4 @@ -//===- DependencyScannerImpl.h - Implements dependency scanning *- C++ -*--===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,18 +9,18 @@ #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H +#include "clang/DependencyScanning/DependencyScanningFilesystem.h" +#include "clang/DependencyScanning/ModuleDepCollector.h" #include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Serialization/ObjectFilePCHContainerReader.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" -#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" namespace clang { class DiagnosticConsumer; -namespace tooling { namespace dependencies { class DependencyScanningService; class DependencyScanningWorker; @@ -38,7 +38,8 @@ class DependencyScanningAction { std::optional ModuleName = std::nullopt) : Service(Service), WorkingDirectory(WorkingDirectory), Consumer(Consumer), Controller(Controller), DepFS(std::move(DepFS)) {} - bool runInvocation(std::unique_ptr Invocation, + bool runInvocation(std::string Executable, + std::unique_ptr Invocation, IntrusiveRefCntPtr FS, std::shared_ptr PCHContainerOps, DiagnosticConsumer *DiagConsumer); @@ -46,22 +47,7 @@ class DependencyScanningAction { bool hasScanned() const { return Scanned; } bool hasDiagConsumerFinished() const { return DiagConsumerFinished; } - /// Take the cc1 arguments corresponding to the most recent invocation used - /// with this action. Any modifications implied by the discovered dependencies - /// will have already been applied. - std::vector takeLastCC1Arguments() { - std::vector Result; - std::swap(Result, LastCC1Arguments); // Reset LastCC1Arguments to empty. - return Result; - } - private: - void setLastCC1Arguments(CompilerInvocation &&CI) { - if (MDC) - MDC->applyDiscoveredDependencies(CI); - LastCC1Arguments = CI.getCC1CommandLine(); - } - DependencyScanningService &Service; StringRef WorkingDirectory; DependencyConsumer &Consumer; @@ -69,7 +55,6 @@ class DependencyScanningAction { IntrusiveRefCntPtr DepFS; std::optional ScanInstanceStorage; std::shared_ptr MDC; - std::vector LastCC1Arguments; bool Scanned = false; bool DiagConsumerFinished = false; }; @@ -206,7 +191,6 @@ class CompilerInstanceWithContext { llvm::Error handleReturnStatus(bool Success); }; } // namespace dependencies -} // namespace tooling } // namespace clang #endif diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h similarity index 98% rename from clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h rename to clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h index 2b21be7712693..2162222a66643 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h @@ -1,4 +1,4 @@ -//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H -#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H +#ifndef LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H +#define LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H #include "clang/Basic/LLVM.h" #include "clang/Lex/DependencyDirectivesScanner.h" @@ -21,7 +21,6 @@ #include namespace clang { -namespace tooling { namespace dependencies { using DependencyDirectivesTy = @@ -521,7 +520,6 @@ class DependencyScanningWorkerFilesystem }; } // end namespace dependencies -} // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H +#endif // LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/DependencyScanning/DependencyScanningService.h similarity index 89% rename from clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h rename to clang/include/clang/DependencyScanning/DependencyScanningService.h index 4e97c7bc9f36e..371b862996706 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/DependencyScanning/DependencyScanningService.h @@ -1,4 +1,4 @@ -//===- DependencyScanningService.h - clang-scan-deps service ===-*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H -#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H +#ifndef LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H +#define LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H -#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" -#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h" +#include "clang/DependencyScanning/DependencyScanningFilesystem.h" +#include "clang/DependencyScanning/InProcessModuleCache.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/Support/Chrono.h" namespace clang { -namespace tooling { namespace dependencies { /// The mode in which the dependency scanner will operate to find the @@ -125,7 +124,6 @@ class DependencyScanningService { }; } // end namespace dependencies -} // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H +#endif // LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGSERVICE_H diff --git a/clang/include/clang/DependencyScanning/DependencyScanningUtils.h b/clang/include/clang/DependencyScanning/DependencyScanningUtils.h new file mode 100644 index 0000000000000..80b73cefc942f --- /dev/null +++ b/clang/include/clang/DependencyScanning/DependencyScanningUtils.h @@ -0,0 +1,170 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGUTILS_H +#define LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGUTILS_H + +#include "clang/DependencyScanning/DependencyScannerImpl.h" +#include "clang/DependencyScanning/DependencyScanningWorker.h" +#include "clang/DependencyScanning/ModuleDepCollector.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include +#include + +namespace clang { +namespace dependencies { + +/// Graph of modular dependencies. +using ModuleDepsGraph = std::vector; + +/// The full dependencies and module graph for a specific input. +struct TranslationUnitDeps { + /// The graph of direct and transitive modular dependencies. + ModuleDepsGraph ModuleGraph; + + /// The identifier of the C++20 module this translation unit exports. + /// + /// If the translation unit is not a module then \c ID.ModuleName is empty. + clang::dependencies::ModuleID ID; + + /// A collection of absolute paths to files that this translation unit + /// directly depends on, not including transitive dependencies. + std::vector FileDeps; + + /// A collection of prebuilt modules this translation unit directly depends + /// on, not including transitive dependencies. + std::vector PrebuiltModuleDeps; + + /// A list of modules this translation unit directly depends on, not including + /// transitive dependencies. + /// + /// This may include modules with a different context hash when it can be + /// determined that the differences are benign for this compilation. + std::vector ClangModuleDeps; + + /// A list of module names that are visible to this translation unit. This + /// includes both direct and transitive module dependencies. + std::vector VisibleModules; + + /// A list of the C++20 named modules this translation unit depends on. + std::vector NamedModuleDeps; + + /// The sequence of commands required to build the translation unit. Commands + /// should be executed in order. + /// + /// FIXME: If we add support for multi-arch builds in clang-scan-deps, we + /// should make the dependencies between commands explicit to enable parallel + /// builds of each architecture. + std::vector Commands; + + /// Deprecated driver command-line. This will be removed in a future version. + std::vector DriverCommandLine; +}; + +class FullDependencyConsumer : public clang::dependencies::DependencyConsumer { +public: + FullDependencyConsumer( + const llvm::DenseSet &AlreadySeen) + : AlreadySeen(AlreadySeen) {} + + void handleBuildCommand(clang::dependencies::Command Cmd) override { + Commands.push_back(std::move(Cmd)); + } + + void handleDependencyOutputOpts(const DependencyOutputOptions &) override {} + + void handleFileDependency(StringRef File) override { + Dependencies.push_back(std::string(File)); + } + + void handlePrebuiltModuleDependency( + clang::dependencies::PrebuiltModuleDep PMD) override { + PrebuiltModuleDeps.emplace_back(std::move(PMD)); + } + + void handleModuleDependency(clang::dependencies::ModuleDeps MD) override { + ClangModuleDeps[MD.ID] = std::move(MD); + } + + void handleDirectModuleDependency(clang::dependencies::ModuleID ID) override { + DirectModuleDeps.push_back(ID); + } + + void handleVisibleModule(std::string ModuleName) override { + VisibleModules.push_back(ModuleName); + } + + void handleContextHash(std::string Hash) override { + ContextHash = std::move(Hash); + } + + void handleProvidedAndRequiredStdCXXModules( + std::optional Provided, + std::vector Requires) override { + ModuleName = Provided ? Provided->ModuleName : ""; + llvm::transform(Requires, std::back_inserter(NamedModuleDeps), + [](const auto &Module) { return Module.ModuleName; }); + } + + TranslationUnitDeps takeTranslationUnitDeps(); + +private: + std::vector Dependencies; + std::vector PrebuiltModuleDeps; + llvm::MapVector + ClangModuleDeps; + std::string ModuleName; + std::vector NamedModuleDeps; + std::vector DirectModuleDeps; + std::vector VisibleModules; + std::vector Commands; + std::string ContextHash; + const llvm::DenseSet &AlreadySeen; +}; + +/// A callback to lookup module outputs for "-fmodule-file=", "-o" etc. +using LookupModuleOutputCallback = + llvm::function_ref; + +/// A simple dependency action controller that uses a callback. If no callback +/// is provided, it is assumed that looking up module outputs is unreachable. +class CallbackActionController + : public clang::dependencies::DependencyActionController { +public: + virtual ~CallbackActionController(); + + static std::string + lookupUnreachableModuleOutput(const clang::dependencies::ModuleDeps &MD, + clang::dependencies::ModuleOutputKind Kind) { + llvm::report_fatal_error("unexpected call to lookupModuleOutput"); + }; + + CallbackActionController(LookupModuleOutputCallback LMO) + : LookupModuleOutput(std::move(LMO)) { + if (!LookupModuleOutput) { + LookupModuleOutput = lookupUnreachableModuleOutput; + } + } + + std::string + lookupModuleOutput(const clang::dependencies::ModuleDeps &MD, + clang::dependencies::ModuleOutputKind Kind) override { + return LookupModuleOutput(MD, Kind); + } + +private: + LookupModuleOutputCallback LookupModuleOutput; +}; + +} // end namespace dependencies +} // end namespace clang + +#endif // LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGUTILS_H diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h similarity index 93% rename from clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h rename to clang/include/clang/DependencyScanning/DependencyScanningWorker.h index 65c943ec06484..9585691607ca9 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/DependencyScanning/DependencyScanningWorker.h @@ -1,4 +1,4 @@ -//===- DependencyScanningWorker.h - clang-scan-deps worker ===---*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H -#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H +#ifndef LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H +#define LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/LLVM.h" +#include "clang/DependencyScanning/DependencyScanningService.h" +#include "clang/DependencyScanning/ModuleDepCollector.h" #include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" -#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBufferRef.h" @@ -25,7 +25,6 @@ namespace clang { class DependencyOutputOptions; -namespace tooling { namespace dependencies { class DependencyScanningWorkerFilesystem; @@ -180,7 +179,6 @@ class DependencyScanningWorker { }; } // end namespace dependencies -} // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H +#endif // LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGWORKER_H diff --git a/clang/include/clang/Tooling/DependencyScanning/InProcessModuleCache.h b/clang/include/clang/DependencyScanning/InProcessModuleCache.h similarity index 82% rename from clang/include/clang/Tooling/DependencyScanning/InProcessModuleCache.h rename to clang/include/clang/DependencyScanning/InProcessModuleCache.h index 213e60b39c199..0585348fa7d1d 100644 --- a/clang/include/clang/Tooling/DependencyScanning/InProcessModuleCache.h +++ b/clang/include/clang/DependencyScanning/InProcessModuleCache.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_INPROCESSMODULECACHE_H -#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_INPROCESSMODULECACHE_H +#ifndef LLVM_CLANG_DEPENDENCYSCANNING_INPROCESSMODULECACHE_H +#define LLVM_CLANG_DEPENDENCYSCANNING_INPROCESSMODULECACHE_H #include "clang/Serialization/ModuleCache.h" #include "llvm/ADT/StringMap.h" @@ -16,8 +16,8 @@ #include namespace clang { -namespace tooling { namespace dependencies { + struct ModuleCacheEntry { std::shared_mutex CompilationMutex; std::atomic Timestamp = 0; @@ -30,8 +30,8 @@ struct ModuleCacheEntries { IntrusiveRefCntPtr makeInProcessModuleCache(ModuleCacheEntries &Entries); + } // namespace dependencies -} // namespace tooling } // namespace clang -#endif +#endif // LLVM_CLANG_DEPENDENCYSCANNING_INPROCESSMODULECACHE_H diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/DependencyScanning/ModuleDepCollector.h similarity index 95% rename from clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h rename to clang/include/clang/DependencyScanning/ModuleDepCollector.h index b0a91b60ff6da..8f665daf03c69 100644 --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/DependencyScanning/ModuleDepCollector.h @@ -1,4 +1,4 @@ -//===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,18 +6,18 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H -#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H +#ifndef LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H +#define LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H #include "clang/Basic/LLVM.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" +#include "clang/DependencyScanning/DependencyScanningService.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Serialization/ASTReader.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringSet.h" @@ -28,7 +28,6 @@ #include namespace clang { -namespace tooling { namespace dependencies { class DependencyActionController; @@ -109,7 +108,7 @@ struct ModuleID { std::tie(Other.ModuleName, Other.ContextHash); } - bool operator<(const ModuleID& Other) const { + bool operator<(const ModuleID &Other) const { return std::tie(ModuleName, ContextHash) < std::tie(Other.ModuleName, Other.ContextHash); } @@ -264,10 +263,11 @@ class ModuleDepCollectorPP final : public PPCallbacks { /// Traverses the affecting modules and updates \c MD with references to the /// parent \c ModuleDepCollector info. - void addAllAffectingClangModules(const Module *M, ModuleDeps &MD, + void + addAllAffectingClangModules(const Module *M, ModuleDeps &MD, llvm::DenseSet &AddedModules); void addAffectingClangModule(const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules); + llvm::DenseSet &AddedModules); /// Add discovered module dependency for the given module. void addOneModuleDep(const Module *M, const ModuleID ID, ModuleDeps &MD); @@ -406,16 +406,15 @@ bool areOptionsInStableDir(const ArrayRef Directories, const HeaderSearchOptions &HSOpts); } // end namespace dependencies -} // end namespace tooling } // end namespace clang namespace llvm { -inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) { +inline hash_code hash_value(const clang::dependencies::ModuleID &ID) { return hash_combine(ID.ModuleName, ID.ContextHash); } -template <> struct DenseMapInfo { - using ModuleID = clang::tooling::dependencies::ModuleID; +template <> struct DenseMapInfo { + using ModuleID = clang::dependencies::ModuleID; static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; } static inline ModuleID getTombstoneKey() { return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash @@ -427,4 +426,4 @@ template <> struct DenseMapInfo { }; } // namespace llvm -#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H +#endif // LLVM_CLANG_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index d31bd7d6be322..c6841937c8d39 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -2719,6 +2719,16 @@ def fsanitize_kcfi_arity : Flag<["-"], "fsanitize-kcfi-arity">, Group, HelpText<"Embed function arity information into the KCFI patchable function prefix">, MarshallingInfoFlag>; +def fsanitize_kcfi_hash_EQ + : Joined<["-"], "fsanitize-kcfi-hash=">, + HelpText<"Select hash algorithm for KCFI type IDs (xxHash64, FNV-1a)">, + Visibility<[ClangOption, CC1Option]>, + Values<"xxHash64,FNV-1a">, + NormalizedValuesScope<"llvm">, + NormalizedValues<["KCFIHashAlgorithm::xxHash64", + "KCFIHashAlgorithm::FNV1a"]>, + MarshallingInfoEnum, + "KCFIHashAlgorithm::xxHash64">; defm sanitize_stats : BoolOption<"f", "sanitize-stats", CodeGenOpts<"SanitizeStats">, DefaultFalse, PosFlag, diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanningTool.h similarity index 51% rename from clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h rename to clang/include/clang/Tooling/DependencyScanningTool.h index ed562f46cfdaa..0ac142a3fc673 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanningTool.h @@ -1,4 +1,4 @@ -//===- DependencyScanningTool.h - clang-scan-deps service -----------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGTOOL_H -#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGTOOL_H +#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNINGTOOL_H +#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNINGTOOL_H -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" -#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" +#include "clang/DependencyScanning/DependencyScanningService.h" +#include "clang/DependencyScanning/DependencyScanningUtils.h" +#include "clang/DependencyScanning/DependencyScanningWorker.h" +#include "clang/DependencyScanning/ModuleDepCollector.h" #include "clang/Tooling/JSONCompilationDatabase.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" @@ -25,61 +26,10 @@ namespace clang { namespace tooling { namespace dependencies { -/// A callback to lookup module outputs for "-fmodule-file=", "-o" etc. -using LookupModuleOutputCallback = - llvm::function_ref; - -/// Graph of modular dependencies. -using ModuleDepsGraph = std::vector; - -/// The full dependencies and module graph for a specific input. -struct TranslationUnitDeps { - /// The graph of direct and transitive modular dependencies. - ModuleDepsGraph ModuleGraph; - - /// The identifier of the C++20 module this translation unit exports. - /// - /// If the translation unit is not a module then \c ID.ModuleName is empty. - ModuleID ID; - - /// A collection of absolute paths to files that this translation unit - /// directly depends on, not including transitive dependencies. - std::vector FileDeps; - - /// A collection of prebuilt modules this translation unit directly depends - /// on, not including transitive dependencies. - std::vector PrebuiltModuleDeps; - - /// A list of modules this translation unit directly depends on, not including - /// transitive dependencies. - /// - /// This may include modules with a different context hash when it can be - /// determined that the differences are benign for this compilation. - std::vector ClangModuleDeps; - - /// A list of module names that are visible to this translation unit. This - /// includes both direct and transitive module dependencies. - std::vector VisibleModules; - - /// A list of the C++20 named modules this translation unit depends on. - std::vector NamedModuleDeps; - - /// The sequence of commands required to build the translation unit. Commands - /// should be executed in order. - /// - /// FIXME: If we add support for multi-arch builds in clang-scan-deps, we - /// should make the dependencies between commands explicit to enable parallel - /// builds of each architecture. - std::vector Commands; - - /// Deprecated driver command-line. This will be removed in a future version. - std::vector DriverCommandLine; -}; - struct P1689Rule { std::string PrimaryOutput; - std::optional Provides; - std::vector Requires; + std::optional Provides; + std::vector Requires; }; /// The high-level implementation of the dependency discovery tool that runs on @@ -90,9 +40,10 @@ class DependencyScanningTool { /// /// @param Service The parent service. Must outlive the tool. /// @param FS The filesystem for the tool to use. Defaults to the physical FS. - DependencyScanningTool(DependencyScanningService &Service, - llvm::IntrusiveRefCntPtr FS = - llvm::vfs::createPhysicalFileSystem()); + DependencyScanningTool( + clang::dependencies::DependencyScanningService &Service, + llvm::IntrusiveRefCntPtr FS = + llvm::vfs::createPhysicalFileSystem()); /// Print out the dependency information into a string using the dependency /// file format that is specified in the options (-MD is the default) and @@ -145,10 +96,11 @@ class DependencyScanningTool { /// /// \returns a \c StringError with the diagnostic output if clang errors /// occurred, \c TranslationUnitDeps otherwise. - llvm::Expected getTranslationUnitDependencies( + llvm::Expected + getTranslationUnitDependencies( const std::vector &CommandLine, StringRef CWD, - const llvm::DenseSet &AlreadySeen, - LookupModuleOutputCallback LookupModuleOutput, + const llvm::DenseSet &AlreadySeen, + clang::dependencies::LookupModuleOutputCallback LookupModuleOutput, std::optional TUBuffer = std::nullopt); /// Given a compilation context specified via the Clang driver command-line, @@ -157,10 +109,12 @@ class DependencyScanningTool { /// TODO: this method should be removed as soon as Swift and our C-APIs adopt /// CompilerInstanceWithContext. We are keeping it here so that it is easier /// to coordinate with Swift and C-API changes. - llvm::Expected getModuleDependencies( + llvm::Expected + getModuleDependencies( StringRef ModuleName, const std::vector &CommandLine, - StringRef CWD, const llvm::DenseSet &AlreadySeen, - LookupModuleOutputCallback LookupModuleOutput); + StringRef CWD, + const llvm::DenseSet &AlreadySeen, + clang::dependencies::LookupModuleOutputCallback LookupModuleOutput); /// The following three methods provide a new interface to perform /// by name dependency scan. The new interface's intention is to improve @@ -190,9 +144,11 @@ class DependencyScanningTool { /// arguments for dependencies. /// @return An instance of \c TranslationUnitDeps if the scan is successful. /// Otherwise it returns an error. - llvm::Expected computeDependenciesByNameWithContext( - StringRef ModuleName, const llvm::DenseSet &AlreadySeen, - LookupModuleOutputCallback LookupModuleOutput); + llvm::Expected + computeDependenciesByNameWithContext( + StringRef ModuleName, + const llvm::DenseSet &AlreadySeen, + clang::dependencies::LookupModuleOutputCallback LookupModuleOutput); /// @brief This method finializes the compiler instance. It finalizes the /// diagnostics and deletes the compiler instance. Call this method @@ -203,96 +159,13 @@ class DependencyScanningTool { llvm::vfs::FileSystem &getWorkerVFS() const { return Worker.getVFS(); } private: - DependencyScanningWorker Worker; -}; - -class FullDependencyConsumer : public DependencyConsumer { -public: - FullDependencyConsumer(const llvm::DenseSet &AlreadySeen) - : AlreadySeen(AlreadySeen) {} - - void handleBuildCommand(Command Cmd) override { - Commands.push_back(std::move(Cmd)); - } - - void handleDependencyOutputOpts(const DependencyOutputOptions &) override {} - - void handleFileDependency(StringRef File) override { - Dependencies.push_back(std::string(File)); - } - - void handlePrebuiltModuleDependency(PrebuiltModuleDep PMD) override { - PrebuiltModuleDeps.emplace_back(std::move(PMD)); - } - - void handleModuleDependency(ModuleDeps MD) override { - ClangModuleDeps[MD.ID] = std::move(MD); - } - - void handleDirectModuleDependency(ModuleID ID) override { - DirectModuleDeps.push_back(ID); - } - - void handleVisibleModule(std::string ModuleName) override { - VisibleModules.push_back(ModuleName); - } - - void handleContextHash(std::string Hash) override { - ContextHash = std::move(Hash); - } - - void handleProvidedAndRequiredStdCXXModules( - std::optional Provided, - std::vector Requires) override { - ModuleName = Provided ? Provided->ModuleName : ""; - llvm::transform(Requires, std::back_inserter(NamedModuleDeps), - [](const auto &Module) { return Module.ModuleName; }); - } - - TranslationUnitDeps takeTranslationUnitDeps(); - -private: - std::vector Dependencies; - std::vector PrebuiltModuleDeps; - llvm::MapVector ClangModuleDeps; - std::string ModuleName; - std::vector NamedModuleDeps; - std::vector DirectModuleDeps; - std::vector VisibleModules; - std::vector Commands; - std::string ContextHash; - const llvm::DenseSet &AlreadySeen; -}; - -/// A simple dependency action controller that uses a callback. If no callback -/// is provided, it is assumed that looking up module outputs is unreachable. -class CallbackActionController : public DependencyActionController { -public: - virtual ~CallbackActionController(); - - static std::string lookupUnreachableModuleOutput(const ModuleDeps &MD, - ModuleOutputKind Kind) { - llvm::report_fatal_error("unexpected call to lookupModuleOutput"); - }; - - CallbackActionController(LookupModuleOutputCallback LMO) - : LookupModuleOutput(std::move(LMO)) { - if (!LookupModuleOutput) { - LookupModuleOutput = lookupUnreachableModuleOutput; - } - } - - std::string lookupModuleOutput(const ModuleDeps &MD, - ModuleOutputKind Kind) override { - return LookupModuleOutput(MD, Kind); - } - -private: - LookupModuleOutputCallback LookupModuleOutput; + clang::dependencies::DependencyScanningWorker Worker; + std::unique_ptr + DiagPrinterWithOS; }; } // end namespace dependencies } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGTOOL_H +#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNINGTOOL_H diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 382273e768919..4a789fe3a6af4 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -5116,6 +5116,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::make_pair(0, static_cast(LaneOffset + Index)); }); + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Control) { + // permute4x64 operates on 4 64-bit elements + // For element i (0-3), extract bits [2*i+1:2*i] from Control + unsigned Index = (Control >> (2 * DstIdx)) & 0x3; + return std::make_pair(0, static_cast(Index)); + }); + case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index c1fb95c084d73..11c5e1c6e90f4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13269,6 +13269,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_permdf256: + case X86::BI__builtin_ia32_permdi256: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) { + // permute4x64 operates on 4 64-bit elements + // For element i (0-3), extract bits [2*i+1:2*i] from Control + unsigned Index = (Control >> (2 * DstIdx)) & 0x3; + return std::make_pair(0, static_cast(Index)); + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_vpermilvarps: case X86::BI__builtin_ia32_vpermilvarps256: case X86::BI__builtin_ia32_vpermilvarps512: { diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt index e90b009da606a..2fc69e4e4fa6f 100644 --- a/clang/lib/CMakeLists.txt +++ b/clang/lib/CMakeLists.txt @@ -18,6 +18,7 @@ add_subdirectory(Serialization) add_subdirectory(Frontend) add_subdirectory(FrontendTool) add_subdirectory(Tooling) +add_subdirectory(DependencyScanning) add_subdirectory(DirectoryWatcher) add_subdirectory(Index) add_subdirectory(IndexSerialization) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index ec609db8d3a3c..af3480d5755f1 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -655,6 +656,11 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII)); + const llvm::TargetOptions &Options = TM->Options; + CodeGenPasses.add(new RuntimeLibraryInfoWrapper( + TargetTriple, Options.ExceptionModel, Options.FloatABIType, + Options.EABIVersion, Options.MCOptions.ABIName, Options.VecLib)); + // Normal mode, emit a .s or .o file by running the code generator. Note, // this also adds codegenerator level optimization passes. CodeGenFileType CGFT = getCodeGenFileType(Action); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a6a1b84e278b9..319e10c93c517 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -66,11 +66,12 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Hash.h" #include "llvm/Support/TimeProfiler.h" -#include "llvm/Support/xxhash.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/Triple.h" #include "llvm/TargetParser/X86TargetParser.h" +#include "llvm/Transforms/Instrumentation/KCFI.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include #include @@ -1272,6 +1273,12 @@ void CodeGenModule::Release() { CodeGenOpts.PatchableFunctionEntryOffset); if (CodeGenOpts.SanitizeKcfiArity) getModule().addModuleFlag(llvm::Module::Override, "kcfi-arity", 1); + // Store the hash algorithm choice for use in LLVM passes + getModule().addModuleFlag( + llvm::Module::Override, "kcfi-hash", + llvm::MDString::get( + getLLVMContext(), + llvm::stringifyKCFIHashAlgorithm(CodeGenOpts.SanitizeKcfiHash))); } if (CodeGenOpts.CFProtectionReturn && @@ -2450,8 +2457,8 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T, StringRef Salt) { if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) Out << ".generalized"; - return llvm::ConstantInt::get(Int32Ty, - static_cast(llvm::xxHash64(OutName))); + return llvm::ConstantInt::get( + Int32Ty, llvm::getKCFITypeID(OutName, getCodeGenOpts().SanitizeKcfiHash)); } void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, @@ -3205,7 +3212,8 @@ void CodeGenModule::finalizeKCFITypes() { continue; std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" + - Name + ", " + Twine(Type->getZExtValue()) + "\n") + Name + ", " + Twine(Type->getZExtValue()) + " # " + + Twine(Type->getSExtValue()) + "\n") .str(); M.appendModuleInlineAsm(Asm); } diff --git a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt b/clang/lib/DependencyScanning/CMakeLists.txt similarity index 93% rename from clang/lib/Tooling/DependencyScanning/CMakeLists.txt rename to clang/lib/DependencyScanning/CMakeLists.txt index 76bdc50097fff..2976f7c236f2e 100644 --- a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt +++ b/clang/lib/DependencyScanning/CMakeLists.txt @@ -9,7 +9,7 @@ add_clang_library(clangDependencyScanning DependencyScanningFilesystem.cpp DependencyScanningService.cpp DependencyScanningWorker.cpp - DependencyScanningTool.cpp + DependencyScanningUtils.cpp DependencyScannerImpl.cpp InProcessModuleCache.cpp ModuleDepCollector.cpp diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp similarity index 98% rename from clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp rename to clang/lib/DependencyScanning/DependencyScannerImpl.cpp index bdb457401bc73..3ca9ce140e887 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp @@ -6,17 +6,16 @@ // //===----------------------------------------------------------------------===// -#include "DependencyScannerImpl.h" +#include "clang/DependencyScanning/DependencyScannerImpl.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/DiagnosticSerialization.h" +#include "clang/DependencyScanning/DependencyScanningWorker.h" #include "clang/Driver/Driver.h" #include "clang/Frontend/FrontendActions.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/TargetParser/Host.h" using namespace clang; -using namespace tooling; using namespace dependencies; namespace { @@ -638,7 +637,7 @@ dependencies::initializeScanInstanceDependencyCollector( } bool DependencyScanningAction::runInvocation( - std::unique_ptr Invocation, + std::string Executable, std::unique_ptr Invocation, IntrusiveRefCntPtr FS, std::shared_ptr PCHContainerOps, DiagnosticConsumer *DiagConsumer) { @@ -654,9 +653,12 @@ bool DependencyScanningAction::runInvocation( if (Scanned) { // Scanning runs once for the first -cc1 invocation in a chain of driver // jobs. For any dependent jobs, reuse the scanning result and just - // update the LastCC1Arguments to correspond to the new invocation. + // update the new invocation. // FIXME: to support multi-arch builds, each arch requires a separate scan - setLastCC1Arguments(std::move(OriginalInvocation)); + if (MDC) + MDC->applyDiscoveredDependencies(OriginalInvocation); + Consumer.handleBuildCommand( + {Executable, OriginalInvocation.getCC1CommandLine()}); return true; } @@ -701,8 +703,12 @@ bool DependencyScanningAction::runInvocation( // ExecuteAction is responsible for calling finish. DiagConsumerFinished = true; - if (Result) - setLastCC1Arguments(std::move(OriginalInvocation)); + if (Result) { + if (MDC) + MDC->applyDiscoveredDependencies(OriginalInvocation); + Consumer.handleBuildCommand( + {Executable, OriginalInvocation.getCC1CommandLine()}); + } return Result; } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp similarity index 99% rename from clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp rename to clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp index 266944ee730cb..24a794e4a6a22 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp @@ -1,4 +1,4 @@ -//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// +//===- DependencyScanningFilesystem.cpp - Optimized Scanning FS -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" +#include "clang/DependencyScanning/DependencyScanningFilesystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Threading.h" #include using namespace clang; -using namespace tooling; using namespace dependencies; llvm::ErrorOr diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp b/clang/lib/DependencyScanning/DependencyScanningService.cpp similarity index 82% rename from clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp rename to clang/lib/DependencyScanning/DependencyScanningService.cpp index 7f40c99f07287..72f359e56d116 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningService.cpp @@ -1,4 +1,4 @@ -//===- DependencyScanningService.cpp - clang-scan-deps service ------------===// +//===- DependencyScanningService.cpp - Scanning Service -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" +#include "clang/DependencyScanning/DependencyScanningService.h" using namespace clang; -using namespace tooling; using namespace dependencies; DependencyScanningService::DependencyScanningService( diff --git a/clang/lib/DependencyScanning/DependencyScanningUtils.cpp b/clang/lib/DependencyScanning/DependencyScanningUtils.cpp new file mode 100644 index 0000000000000..e27c597a14fcc --- /dev/null +++ b/clang/lib/DependencyScanning/DependencyScanningUtils.cpp @@ -0,0 +1,38 @@ +//===- DependencyScanningUtils.cpp - Common Scanning Utilities ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/DependencyScanning/DependencyScanningUtils.h" + +using namespace clang; +using namespace dependencies; + +TranslationUnitDeps FullDependencyConsumer::takeTranslationUnitDeps() { + TranslationUnitDeps TU; + + TU.ID.ContextHash = std::move(ContextHash); + TU.ID.ModuleName = std::move(ModuleName); + TU.NamedModuleDeps = std::move(NamedModuleDeps); + TU.FileDeps = std::move(Dependencies); + TU.PrebuiltModuleDeps = std::move(PrebuiltModuleDeps); + TU.VisibleModules = std::move(VisibleModules); + TU.Commands = std::move(Commands); + + for (auto &&M : ClangModuleDeps) { + auto &MD = M.second; + // TODO: Avoid handleModuleDependency even being called for modules + // we've already seen. + if (AlreadySeen.count(M.first)) + continue; + TU.ModuleGraph.push_back(std::move(MD)); + } + TU.ClangModuleDeps = std::move(DirectModuleDeps); + + return TU; +} + +CallbackActionController::~CallbackActionController() {} diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp similarity index 90% rename from clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp rename to clang/lib/DependencyScanning/DependencyScanningWorker.cpp index 421a94307a9e5..333edd4862336 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningWorker.cpp @@ -1,4 +1,4 @@ -//===- DependencyScanningWorker.cpp - clang-scan-deps worker --------------===// +//===- DependencyScanningWorker.cpp - Thread-Safe Scanning Worker ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" -#include "DependencyScannerImpl.h" +#include "clang/DependencyScanning/DependencyScanningWorker.h" #include "clang/Basic/DiagnosticFrontend.h" +#include "clang/DependencyScanning/DependencyScannerImpl.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Tool.h" using namespace clang; -using namespace tooling; using namespace dependencies; DependencyScanningWorker::DependencyScanningWorker( @@ -76,18 +75,14 @@ static bool createAndRunToolInvocation( DependencyScanningAction &Action, IntrusiveRefCntPtr FS, std::shared_ptr &PCHContainerOps, - DiagnosticsEngine &Diags, DependencyConsumer &Consumer) { + DiagnosticsEngine &Diags) { auto Invocation = createCompilerInvocation(CommandLine, Diags); if (!Invocation) return false; - if (!Action.runInvocation(std::move(Invocation), std::move(FS), - PCHContainerOps, Diags.getClient())) - return false; - - std::vector Args = Action.takeLastCC1Arguments(); - Consumer.handleBuildCommand({CommandLine[0], std::move(Args)}); - return true; + return Action.runInvocation(CommandLine[0], std::move(Invocation), + std::move(FS), PCHContainerOps, + Diags.getClient()); } bool DependencyScanningWorker::scanDependencies( @@ -112,9 +107,9 @@ bool DependencyScanningWorker::scanDependencies( bool Success = false; if (CommandLine[1] == "-cc1") { - Success = createAndRunToolInvocation( - CommandLine, Action, FS, PCHContainerOps, - *DiagEngineWithCmdAndOpts.DiagEngine, Consumer); + Success = + createAndRunToolInvocation(CommandLine, Action, FS, PCHContainerOps, + *DiagEngineWithCmdAndOpts.DiagEngine); } else { Success = forEachDriverJob( CommandLine, *DiagEngineWithCmdAndOpts.DiagEngine, FS, @@ -128,7 +123,7 @@ bool DependencyScanningWorker::scanDependencies( return true; } - // Insert -cc1 comand line options into Argv + // Insert -cc1 command line options into Argv std::vector Argv; Argv.push_back(Cmd.getExecutable()); llvm::append_range(Argv, Cmd.getArguments()); @@ -139,7 +134,7 @@ bool DependencyScanningWorker::scanDependencies( // dependency scanning filesystem. return createAndRunToolInvocation( std::move(Argv), Action, FS, PCHContainerOps, - *DiagEngineWithCmdAndOpts.DiagEngine, Consumer); + *DiagEngineWithCmdAndOpts.DiagEngine); }); } diff --git a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/DependencyScanning/InProcessModuleCache.cpp similarity index 95% rename from clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp rename to clang/lib/DependencyScanning/InProcessModuleCache.cpp index d1e543b438225..1dd2d34032a96 100644 --- a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp +++ b/clang/lib/DependencyScanning/InProcessModuleCache.cpp @@ -1,4 +1,4 @@ -//===----------------------------------------------------------------------===// +//===- InProcessModuleCache.cpp - Implicit Module Cache ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h" +#include "clang/DependencyScanning/InProcessModuleCache.h" #include "clang/Serialization/InMemoryModuleCache.h" #include "llvm/Support/AdvisoryLock.h" @@ -15,7 +15,6 @@ #include using namespace clang; -using namespace tooling; using namespace dependencies; namespace { diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/DependencyScanning/ModuleDepCollector.cpp similarity index 99% rename from clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp rename to clang/lib/DependencyScanning/ModuleDepCollector.cpp index 3a99f8c882b8f..39bd2e2ab0032 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/DependencyScanning/ModuleDepCollector.cpp @@ -6,18 +6,17 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" +#include "clang/DependencyScanning/ModuleDepCollector.h" #include "clang/Basic/MakeSupport.h" +#include "clang/DependencyScanning/DependencyScanningWorker.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/Preprocessor.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/BLAKE3.h" #include using namespace clang; -using namespace tooling; using namespace dependencies; void ModuleDeps::forEachFileDep(llvm::function_ref Cb) const { diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 87ccd40372681..69ada73342127 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -436,15 +436,21 @@ void RocmInstallationDetector::detectDeviceLibrary() { if (HasDeviceLibrary) return; - // Find device libraries in a legacy ROCm directory structure - // ${ROCM_ROOT}/amdgcn/bitcode/* + // Find device libraries in a ROCm directory structure auto &ROCmDirs = getInstallationPathCandidates(); for (const auto &Candidate : ROCmDirs) { + // Legacy: ${ROCM_PATH}/amdgcn/bitcode/* LibDevicePath = Candidate.Path; llvm::sys::path::append(LibDevicePath, "amdgcn", "bitcode"); HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); if (HasDeviceLibrary) return; + // TheRock: ${ROCM_PATH}/lib/llvm/amdgcn/bitcode/* + LibDevicePath = Candidate.Path; + llvm::sys::path::append(LibDevicePath, "lib", "llvm", "amdgcn", "bitcode"); + HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); + if (HasDeviceLibrary) + return; } } diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 33fff7645df65..038859a513eb8 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -119,6 +119,7 @@ set(opencl_files set(ppc_files altivec.h + amo.h ) set(ppc_htm_files diff --git a/clang/lib/Headers/amo.h b/clang/lib/Headers/amo.h new file mode 100644 index 0000000000000..62ee0e6c191cd --- /dev/null +++ b/clang/lib/Headers/amo.h @@ -0,0 +1,97 @@ +/*===---- amo.h - PowerPC Atomic Memory Operations ------------------------===*\ + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * +\*===----------------------------------------------------------------------===*/ + +/* This header provides compatibility for GCC's AMO functions. + * The functions here call Clang's underlying AMO builtins. + */ + +#ifndef _AMO_H +#define _AMO_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* AMO Load Operation Codes (FC values) */ +enum { + _AMO_LD_ADD = 0x00, /* Fetch and Add */ + _AMO_LD_XOR = 0x01, /* Fetch and XOR */ + _AMO_LD_IOR = 0x02, /* Fetch and OR */ + _AMO_LD_AND = 0x03, /* Fetch and AND */ + _AMO_LD_UMAX = 0x04, /* Fetch and Maximum Unsigned */ + _AMO_LD_SMAX = 0x05, /* Fetch and Maximum Signed */ + _AMO_LD_UMIN = 0x06, /* Fetch and Minimum Unsigned */ + _AMO_LD_SMIN = 0x07, /* Fetch and Minimum Signed */ + _AMO_LD_SWAP = 0x08 /* Swap */ +}; + +/* 32-bit unsigned AMO load operations */ +static inline uint32_t amo_lwat_add(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_ADD); +} + +static inline uint32_t amo_lwat_xor(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_XOR); +} + +static inline uint32_t amo_lwat_ior(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_IOR); +} + +static inline uint32_t amo_lwat_and(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_AND); +} + +static inline uint32_t amo_lwat_umax(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_UMAX); +} + +static inline uint32_t amo_lwat_umin(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_UMIN); +} + +static inline uint32_t amo_lwat_swap(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_SWAP); +} + +/* 64-bit unsigned AMO load operations */ +static inline uint64_t amo_ldat_add(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_ADD); +} + +static inline uint64_t amo_ldat_xor(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_XOR); +} + +static inline uint64_t amo_ldat_ior(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_IOR); +} + +static inline uint64_t amo_ldat_and(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_AND); +} + +static inline uint64_t amo_ldat_umax(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_UMAX); +} + +static inline uint64_t amo_ldat_umin(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_UMIN); +} + +static inline uint64_t amo_ldat_swap(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_SWAP); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _AMO_H */ diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index bfa458d207b46..3d6615b6b5395 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -87,6 +87,8 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: + case PPC::BI__builtin_amo_lwat: + case PPC::BI__builtin_amo_ldat: return true; } return false; @@ -253,6 +255,19 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, case PPC::BI__builtin_##Name: \ return BuiltinPPCMMACall(TheCall, BuiltinID, Types); #include "clang/Basic/BuiltinsPPC.def" + case PPC::BI__builtin_amo_lwat: + case PPC::BI__builtin_amo_ldat: { + llvm::APSInt Result; + if (SemaRef.BuiltinConstantArg(TheCall, 2, Result)) + return true; + unsigned Val = Result.getZExtValue(); + static constexpr unsigned ValidFC[] = {0, 1, 2, 3, 4, 6, 8}; + if (llvm::is_contained(ValidFC, Val)) + return false; + Expr *Arg = TheCall->getArg(2); + return SemaRef.Diag(Arg->getBeginLoc(), diag::err_argument_invalid_range) + << toString(Result, 10) << "0-4, 6" << "8" << Arg->getSourceRange(); + } } llvm_unreachable("must return from switch"); } diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt index faaa53276d0e6..0972ecb08437f 100644 --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -10,7 +10,6 @@ add_subdirectory(Inclusions) add_subdirectory(Refactoring) add_subdirectory(ASTDiff) add_subdirectory(Syntax) -add_subdirectory(DependencyScanning) add_subdirectory(Transformer) add_clang_library(clangTooling @@ -18,6 +17,7 @@ add_clang_library(clangTooling ArgumentsAdjusters.cpp CommonOptionsParser.cpp CompilationDatabase.cpp + DependencyScanningTool.cpp Execution.cpp ExpandResponseFilesCompilationDatabase.cpp FileMatchTrie.cpp @@ -39,6 +39,7 @@ add_clang_library(clangTooling clangAST clangASTMatchers clangBasic + clangDependencyScanning clangDriver clangOptions clangFormat diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanningTool.cpp similarity index 88% rename from clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp rename to clang/lib/Tooling/DependencyScanningTool.cpp index a1f2db7a471be..e037420f4fcf2 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanningTool.cpp @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" +#include "clang/Tooling/DependencyScanningTool.h" #include "clang/Frontend/Utils.h" #include using namespace clang; using namespace tooling; -using namespace dependencies; +using namespace clang::dependencies; +using namespace clang::tooling::dependencies; DependencyScanningTool::DependencyScanningTool( DependencyScanningService &Service, @@ -200,29 +201,3 @@ DependencyScanningTool::computeDependenciesByNameWithContext( llvm::Error DependencyScanningTool::finalizeCompilerInstanceWithContext() { return Worker.finalizeCompilerInstanceWithContextOrError(); } - -TranslationUnitDeps FullDependencyConsumer::takeTranslationUnitDeps() { - TranslationUnitDeps TU; - - TU.ID.ContextHash = std::move(ContextHash); - TU.ID.ModuleName = std::move(ModuleName); - TU.NamedModuleDeps = std::move(NamedModuleDeps); - TU.FileDeps = std::move(Dependencies); - TU.PrebuiltModuleDeps = std::move(PrebuiltModuleDeps); - TU.VisibleModules = std::move(VisibleModules); - TU.Commands = std::move(Commands); - - for (auto &&M : ClangModuleDeps) { - auto &MD = M.second; - // TODO: Avoid handleModuleDependency even being called for modules - // we've already seen. - if (AlreadySeen.count(M.first)) - continue; - TU.ModuleGraph.push_back(std::move(MD)); - } - TU.ClangModuleDeps = std::move(DirectModuleDeps); - - return TU; -} - -CallbackActionController::~CallbackActionController() {} diff --git a/clang/test/CodeGen/PowerPC/builtins-amo-err.c b/clang/test/CodeGen/PowerPC/builtins-amo-err.c new file mode 100644 index 0000000000000..cdc14ef7f7e04 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/builtins-amo-err.c @@ -0,0 +1,18 @@ +// RUN: not %clang_cc1 -triple powerpc-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=AIX32-ERROR +// RUN: not %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=FC-ERROR + +void test_amo() { + unsigned int *ptr1, value1; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_lwat(ptr1, value1, 0); + // FC-ERROR: argument value 9 is outside the valid range [0-4, 6, 8] + __builtin_amo_lwat(ptr1, value1, 9); + + unsigned long int *ptr2, value2; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_ldat(ptr2, value2, 3); + // FC-ERROR: error: argument value 26 is outside the valid range [0-4, 6, 8] + __builtin_amo_ldat(ptr2, value2, 26); +} diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c new file mode 100644 index 0000000000000..2975b99786869 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c @@ -0,0 +1,58 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX + +// CHECK-LABEL: define dso_local void @test_unsigned_lwat( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0) +// CHECK-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_lwat( +// AIX-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0) +// AIX-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// AIX-NEXT: ret void +// +void test_unsigned_lwat(unsigned int *ptr, unsigned int value, unsigned int * resp) { + unsigned int res = __builtin_amo_lwat(ptr, value, 0); + *resp = res; +} + +// CHECK-LABEL: define dso_local void @test_unsigned_ldat( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3) +// CHECK-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_ldat( +// AIX-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3) +// AIX-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]] +// AIX-NEXT: ret void +// +void test_unsigned_ldat(unsigned long int *ptr, unsigned long int value, unsigned long int * resp) { + unsigned long int res = __builtin_amo_ldat(ptr, value, 3); + *resp = res; +} +//. +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"long", [[META4]], i64 0} +//. +// AIX: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// AIX: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// AIX: [[META5]] = !{!"Simple C/C++ TBAA"} +// AIX: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// AIX: [[META7]] = !{!"long", [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/PowerPC/ppc-amo-header.c b/clang/test/CodeGen/PowerPC/ppc-amo-header.c new file mode 100644 index 0000000000000..f544cdef1e7d0 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/ppc-amo-header.c @@ -0,0 +1,91 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +#include + +uint32_t test_lwat_add(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_add + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 0) + return amo_lwat_add(ptr, val); +} + +uint32_t test_lwat_xor(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_xor + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 1) + return amo_lwat_xor(ptr, val); +} + +uint32_t test_lwat_ior(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_ior + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 2) + return amo_lwat_ior(ptr, val); +} + +uint32_t test_lwat_and(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_and + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 3) + return amo_lwat_and(ptr, val); +} + +uint32_t test_lwat_umax(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_umax + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 4) + return amo_lwat_umax(ptr, val); +} + +uint32_t test_lwat_umin(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_umin + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 6) + return amo_lwat_umin(ptr, val); +} + +uint32_t test_lwat_swap(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_swap + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 8) + return amo_lwat_swap(ptr, val); +} + +uint64_t test_ldat_add(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_add + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 0) + return amo_ldat_add(ptr, val); +} + +uint64_t test_ldat_xor(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_xor + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 1) + return amo_ldat_xor(ptr, val); +} + +uint64_t test_ldat_ior(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_ior + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 2) + return amo_ldat_ior(ptr, val); +} + +uint64_t test_ldat_and(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_and + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 3) + return amo_ldat_and(ptr, val); +} + +uint64_t test_ldat_umax(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_umax + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 4) + return amo_ldat_umax(ptr, val); +} + +uint64_t test_ldat_umin(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_umin + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 6) + return amo_ldat_umin(ptr, val); +} + +uint64_t test_ldat_swap(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_swap + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 8) + return amo_ldat_swap(ptr, val); +} diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index d6facfea8962e..c9474e94476fc 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) { // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> return _mm256_permute4x64_epi64(a, 35); } +// Control value 0x00: [0,0,0,0] -> broadcast element 0 +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x00), 40LL, 40LL, 40LL, 40LL)); +// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x1B), 10LL, 20LL, 30LL, 40LL)); +// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x39), 30LL, 20LL, 10LL, 40LL)); +// Control value 0x12: [2,0,1,0] -> [C,A,B,A] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0x12), 20LL, 40LL, 30LL, 40LL)); +// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 30LL, 20LL, 10LL}), 0xE4), 40LL, 30LL, 20LL, 10LL)); +// Test with negative values +TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){-40LL, -30LL, -20LL, -10LL}), 0x1B), -10LL, -20LL, -30LL, -40LL)); __m256d test_mm256_permute4x64_pd(__m256d a) { // CHECK-LABEL: test_mm256_permute4x64_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> return _mm256_permute4x64_pd(a, 25); } +// Control value 0x00: [0,0,0,0] -> broadcast element 0 +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x00), 4.0, 4.0, 4.0, 4.0)); +// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x1B), 1.0, 2.0, 3.0, 4.0)); +// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x39), 3.0, 2.0, 1.0, 4.0)); +// Control value 0x12: [2,0,1,0] -> [C,A,B,A] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0x12), 2.0, 4.0, 3.0, 4.0)); +// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D] +TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 1.0}), 0xE4), 4.0, 3.0, 2.0, 1.0)); __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_permutevar8x32_epi32 diff --git a/clang/test/CodeGen/cfi-salt.c b/clang/test/CodeGen/cfi-salt.c index 7ba1e2fc14daa..8363236869013 100644 --- a/clang/test/CodeGen/cfi-salt.c +++ b/clang/test/CodeGen/cfi-salt.c @@ -27,9 +27,9 @@ typedef unsigned int (* __cfi_salt ufn_salt_t)(void); /// Must emit __kcfi_typeid symbols for address-taken function declarations // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]" -// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,LOW_SODIUM_HASH:]]" +// CHECK: module asm ".set __kcfi_typeid_[[F4]], {{[0-9]+}} # [[#%d,LOW_SODIUM_HASH:]]" // CHECK: module asm ".weak __kcfi_typeid_[[F4_SALT:[a-zA-Z0-9_]+]]" -// CHECK: module asm ".set __kcfi_typeid_[[F4_SALT]], [[#%d,ASM_SALTY_HASH:]]" +// CHECK: module asm ".set __kcfi_typeid_[[F4_SALT]], {{[0-9]+}} # [[#%d,ASM_SALTY_HASH:]]" /// Must not __kcfi_typeid symbols for non-address-taken declarations // CHECK-NOT: module asm ".weak __kcfi_typeid_f6" diff --git a/clang/test/CodeGen/kcfi-hash.c b/clang/test/CodeGen/kcfi-hash.c new file mode 100644 index 0000000000000..636d265feb9b4 --- /dev/null +++ b/clang/test/CodeGen/kcfi-hash.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -o - %s | FileCheck --check-prefix=DEFAULT %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-kcfi-hash=xxHash64 -o - %s | FileCheck --check-prefix=XXHASH %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-kcfi-hash=FNV-1a -o - %s | FileCheck --check-prefix=FNV %s + +void foo(void) {} + +// DEFAULT: ![[#]] = !{i32 4, !"kcfi-hash", !"xxHash64"} +// XXHASH: ![[#]] = !{i32 4, !"kcfi-hash", !"xxHash64"} +// FNV: ![[#]] = !{i32 4, !"kcfi-hash", !"FNV-1a"} diff --git a/clang/test/CodeGen/kcfi.c b/clang/test/CodeGen/kcfi.c index 622843cedba50..b2856b5149be9 100644 --- a/clang/test/CodeGen/kcfi.c +++ b/clang/test/CodeGen/kcfi.c @@ -7,7 +7,7 @@ /// Must emit __kcfi_typeid symbols for address-taken function declarations // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]" -// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,HASH:]]" +// CHECK: module asm ".set __kcfi_typeid_[[F4]], {{[0-9]+}} # [[#%d,HASH:]]" /// Must not __kcfi_typeid symbols for non-address-taken declarations // CHECK-NOT: module asm ".weak __kcfi_typeid_{{f6|_Z2f6v}}" diff --git a/clang/test/Driver/Inputs/rocm-therock/include b/clang/test/Driver/Inputs/rocm-therock/include new file mode 120000 index 0000000000000..13265e5ed3db8 --- /dev/null +++ b/clang/test/Driver/Inputs/rocm-therock/include @@ -0,0 +1 @@ +../rocm/include \ No newline at end of file diff --git a/clang/test/Driver/Inputs/rocm-therock/lib/llvm/amdgcn b/clang/test/Driver/Inputs/rocm-therock/lib/llvm/amdgcn new file mode 120000 index 0000000000000..79d18ba840474 --- /dev/null +++ b/clang/test/Driver/Inputs/rocm-therock/lib/llvm/amdgcn @@ -0,0 +1 @@ +../../../rocm/amdgcn \ No newline at end of file diff --git a/clang/test/Driver/Inputs/rocm-therock/share/hip/version b/clang/test/Driver/Inputs/rocm-therock/share/hip/version new file mode 120000 index 0000000000000..62ff49a023cb9 --- /dev/null +++ b/clang/test/Driver/Inputs/rocm-therock/share/hip/version @@ -0,0 +1 @@ +../../../rocm/bin/.hipVersion \ No newline at end of file diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index effce40d67ebd..f5813c06ae600 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -9,7 +9,7 @@ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR -// Test subtarget with flushing off by ddefault. +// Test subtarget with flushing off by default. // RUN: %clang -### --target=x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm \ @@ -85,6 +85,13 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR +// Test TheRock toolchain layout +// RUN: %clang -### --target=x86_64-linux-gnu \ +// RUN: --offload-arch=gfx803 -nogpuinc \ +// RUN: --rocm-path=%S/Inputs/rocm-therock \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR-THEROCK + // Test finding device lib in resource dir // RUN: %clang -### --target=x86_64-linux-gnu \ // RUN: --offload-arch=gfx803 -nogpuinc \ @@ -210,6 +217,7 @@ // RESDIR-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR:[^"]+(/|\\\\)rocm_resource_dir(/|\\\\)lib(64)?(/|\\\\)amdgcn(/|\\\\).*]]ocml.bc" // ROCMDIR-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR:[^"]+(/|\\\\)rocm(/|\\\\)amdgcn(/|\\\\).*]]ocml.bc" +// ROCMDIR-THEROCK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR:[^"]+(/|\\\\)rocm-therock(/|\\\\)lib(/|\\\\)llvm(/|\\\\)amdgcn(/|\\\\).*]]ocml.bc" // ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]ockl.bc" diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 3efa28b0469c1..6a2acb0d4f20e 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "clang/DependencyScanning/DependencyScanningService.h" +#include "clang/DependencyScanning/DependencyScanningWorker.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" +#include "clang/Tooling/DependencyScanningTool.h" #include "clang/Tooling/JSONCompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/STLExtras.h" @@ -40,7 +40,9 @@ #include "Opts.inc" using namespace clang; -using namespace tooling::dependencies; +using namespace tooling; +using namespace clang::dependencies; +using namespace clang::tooling::dependencies; namespace { diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index 54c781a35c20c..438a5c4c2e711 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -79,6 +79,7 @@ add_subdirectory(Basic) add_subdirectory(Lex) add_subdirectory(Parse) add_subdirectory(Driver) +add_subdirectory(DependencyScanning) if(CLANG_ENABLE_STATIC_ANALYZER) add_subdirectory(Analysis) add_subdirectory(StaticAnalyzer) diff --git a/clang/unittests/DependencyScanning/CMakeLists.txt b/clang/unittests/DependencyScanning/CMakeLists.txt new file mode 100644 index 0000000000000..40425820d4d08 --- /dev/null +++ b/clang/unittests/DependencyScanning/CMakeLists.txt @@ -0,0 +1,11 @@ +add_clang_unittest(ClangDependencyScanningTests + DependencyScanningFilesystemTest.cpp + DependencyScanningWorkerTest.cpp + CLANG_LIBS + clangDependencyScanning + clangFrontend # For TextDiagnosticPrinter. + LLVM_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Option + Support + ) diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp similarity index 98% rename from clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp rename to clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp index cdb0ce2100d60..0e195411915aa 100644 --- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp +++ b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" +#include "clang/DependencyScanning/DependencyScanningFilesystem.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/VirtualFileSystem.h" #include "gtest/gtest.h" -using namespace clang::tooling::dependencies; +using namespace clang::dependencies; TEST(DependencyScanningFilesystem, OpenFileAndGetBufferRepeatedly) { auto InMemoryFS = llvm::makeIntrusiveRefCnt(); diff --git a/clang/unittests/DependencyScanning/DependencyScanningWorkerTest.cpp b/clang/unittests/DependencyScanning/DependencyScanningWorkerTest.cpp new file mode 100644 index 0000000000000..e6a5684b10cc9 --- /dev/null +++ b/clang/unittests/DependencyScanning/DependencyScanningWorkerTest.cpp @@ -0,0 +1,97 @@ +//===- DependencyScanningWorkerTest.cpp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/DependencyScanning/DependencyScanningWorker.h" +#include "clang/DependencyScanning/DependencyScanningUtils.h" +#include "llvm/Support/FormatVariadic.h" +#include "gtest/gtest.h" +#include + +using namespace clang; +using namespace dependencies; + +TEST(DependencyScanner, ScanDepsWithDiagConsumer) { + StringRef CWD = "/root"; + + auto VFS = llvm::makeIntrusiveRefCnt(); + VFS->setCurrentWorkingDirectory(CWD); + auto Sept = llvm::sys::path::get_separator(); + std::string HeaderPath = + std::string(llvm::formatv("{0}root{0}header.h", Sept)); + std::string TestPath = std::string(llvm::formatv("{0}root{0}test.cpp", Sept)); + std::string AsmPath = std::string(llvm::formatv("{0}root{0}test.s", Sept)); + + VFS->addFile(HeaderPath, 0, llvm::MemoryBuffer::getMemBuffer("\n")); + VFS->addFile(TestPath, 0, + llvm::MemoryBuffer::getMemBuffer("#include \"header.h\"\n")); + VFS->addFile(AsmPath, 0, llvm::MemoryBuffer::getMemBuffer("")); + + DependencyScanningService Service(ScanningMode::DependencyDirectivesScan, + ScanningOutputFormat::Make); + DependencyScanningWorker Worker(Service, VFS); + + llvm::DenseSet AlreadySeen; + FullDependencyConsumer DC(AlreadySeen); + CallbackActionController AC(nullptr); + + struct EnsureFinishedConsumer : public DiagnosticConsumer { + bool Finished = false; + void finish() override { Finished = true; } + }; + + { + // Check that a successful scan calls DiagConsumer.finish(). + std::vector Args = {"clang", + "-target", + "x86_64-apple-macosx10.7", + "-c", + "test.cpp", + "-o" + "test.cpp.o"}; + + EnsureFinishedConsumer DiagConsumer; + bool Success = Worker.computeDependencies(CWD, Args, DC, AC, DiagConsumer); + + EXPECT_TRUE(Success); + EXPECT_EQ(DiagConsumer.getNumErrors(), 0u); + EXPECT_TRUE(DiagConsumer.Finished); + } + + { + // Check that an invalid command-line, which never enters the scanning + // action calls DiagConsumer.finish(). + std::vector Args = {"clang", "-invalid-arg"}; + EnsureFinishedConsumer DiagConsumer; + bool Success = Worker.computeDependencies(CWD, Args, DC, AC, DiagConsumer); + + EXPECT_FALSE(Success); + EXPECT_GE(DiagConsumer.getNumErrors(), 1u); + EXPECT_TRUE(DiagConsumer.Finished); + } + + { + // Check that a valid command line that produces no scanning jobs calls + // DiagConsumer.finish(). + std::vector Args = {"clang", + "-target", + "x86_64-apple-macosx10.7", + "-c", + "-x", + "assembler", + "test.s", + "-o" + "test.cpp.o"}; + + EnsureFinishedConsumer DiagConsumer; + bool Success = Worker.computeDependencies(CWD, Args, DC, AC, DiagConsumer); + + EXPECT_FALSE(Success); + EXPECT_EQ(DiagConsumer.getNumErrors(), 1u); + EXPECT_TRUE(DiagConsumer.Finished); + } +} diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt index 106c6b9dc38bd..8c8b22250cd83 100644 --- a/clang/unittests/Tooling/CMakeLists.txt +++ b/clang/unittests/Tooling/CMakeLists.txt @@ -13,8 +13,7 @@ add_clang_unittest(ToolingTests LookupTest.cpp QualTypeNamesTest.cpp RangeSelectorTest.cpp - DependencyScanning/DependencyScannerTest.cpp - DependencyScanning/DependencyScanningFilesystemTest.cpp + DependencyScannerTest.cpp RecursiveASTVisitorTests/Attr.cpp RecursiveASTVisitorTests/BitfieldInitializer.cpp RecursiveASTVisitorTests/CallbacksLeaf.cpp diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp similarity index 78% rename from clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp rename to clang/unittests/Tooling/DependencyScannerTest.cpp index 4523af33e3c28..9fcd0545b17fa 100644 --- a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -9,13 +9,13 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclGroup.h" +#include "clang/DependencyScanning/DependencyScanningWorker.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Tooling/CompilationDatabase.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" -#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" +#include "clang/Tooling/DependencyScanningTool.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/TargetRegistry.h" @@ -29,7 +29,8 @@ using namespace clang; using namespace tooling; -using namespace dependencies; +using namespace clang::dependencies; +using namespace tooling::dependencies; namespace { @@ -304,84 +305,3 @@ TEST(DependencyScanner, ScanDepsWithModuleLookup) { EXPECT_TRUE(!llvm::is_contained(InterceptFS->StatPaths, OtherPath)); EXPECT_EQ(InterceptFS->ReadFiles, std::vector{"test.m"}); } - -TEST(DependencyScanner, ScanDepsWithDiagConsumer) { - StringRef CWD = "/root"; - - auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->setCurrentWorkingDirectory(CWD); - auto Sept = llvm::sys::path::get_separator(); - std::string HeaderPath = - std::string(llvm::formatv("{0}root{0}header.h", Sept)); - std::string TestPath = std::string(llvm::formatv("{0}root{0}test.cpp", Sept)); - std::string AsmPath = std::string(llvm::formatv("{0}root{0}test.s", Sept)); - - VFS->addFile(HeaderPath, 0, llvm::MemoryBuffer::getMemBuffer("\n")); - VFS->addFile(TestPath, 0, - llvm::MemoryBuffer::getMemBuffer("#include \"header.h\"\n")); - VFS->addFile(AsmPath, 0, llvm::MemoryBuffer::getMemBuffer("")); - - DependencyScanningService Service(ScanningMode::DependencyDirectivesScan, - ScanningOutputFormat::Make); - DependencyScanningWorker Worker(Service, VFS); - - llvm::DenseSet AlreadySeen; - FullDependencyConsumer DC(AlreadySeen); - CallbackActionController AC(nullptr); - - struct EnsureFinishedConsumer : public DiagnosticConsumer { - bool Finished = false; - void finish() override { Finished = true; } - }; - - { - // Check that a successful scan calls DiagConsumer.finish(). - std::vector Args = {"clang", - "-target", - "x86_64-apple-macosx10.7", - "-c", - "test.cpp", - "-o" - "test.cpp.o"}; - - EnsureFinishedConsumer DiagConsumer; - bool Success = Worker.computeDependencies(CWD, Args, DC, AC, DiagConsumer); - - EXPECT_TRUE(Success); - EXPECT_EQ(DiagConsumer.getNumErrors(), 0u); - EXPECT_TRUE(DiagConsumer.Finished); - } - - { - // Check that an invalid command-line, which never enters the scanning - // action calls DiagConsumer.finish(). - std::vector Args = {"clang", "-invalid-arg"}; - EnsureFinishedConsumer DiagConsumer; - bool Success = Worker.computeDependencies(CWD, Args, DC, AC, DiagConsumer); - - EXPECT_FALSE(Success); - EXPECT_GE(DiagConsumer.getNumErrors(), 1u); - EXPECT_TRUE(DiagConsumer.Finished); - } - - { - // Check that a valid command line that produces no scanning jobs calls - // DiagConsumer.finish(). - std::vector Args = {"clang", - "-target", - "x86_64-apple-macosx10.7", - "-c", - "-x", - "assembler", - "test.s", - "-o" - "test.cpp.o"}; - - EnsureFinishedConsumer DiagConsumer; - bool Success = Worker.computeDependencies(CWD, Args, DC, AC, DiagConsumer); - - EXPECT_FALSE(Success); - EXPECT_EQ(DiagConsumer.getNumErrors(), 1u); - EXPECT_TRUE(DiagConsumer.Finished); - } -} diff --git a/flang/include/flang/Optimizer/Builder/CUFCommon.h b/flang/include/flang/Optimizer/Builder/CUFCommon.h index 98d01958846f7..736f90123969c 100644 --- a/flang/include/flang/Optimizer/Builder/CUFCommon.h +++ b/flang/include/flang/Optimizer/Builder/CUFCommon.h @@ -14,7 +14,7 @@ #include "mlir/IR/BuiltinOps.h" static constexpr llvm::StringRef cudaDeviceModuleName = "cuda_device_mod"; -static constexpr llvm::StringRef cudaSharedMemSuffix = "__shared_mem"; +static constexpr llvm::StringRef cudaSharedMemSuffix = "__shared_mem__"; namespace fir { class FirOpBuilder; diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 3fda523acb382..920bef99dc996 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -351,7 +351,8 @@ def cuf_SharedMemoryOp OptionalAttr:$bindc_name, Variadic:$typeparams, Variadic:$shape, // offset in bytes from the shared memory base address. - Optional:$offset, OptionalAttr:$alignment); + Optional:$offset, OptionalAttr:$alignment, + UnitAttr:$isStatic); let results = (outs fir_ReferenceType:$ptr); diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 5355ca60181b0..69c3300ba4390 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -3024,11 +3024,9 @@ static Op createComputeOp( } addOperand(operands, operandSegments, ifCond); addOperand(operands, operandSegments, selfCond); - if constexpr (!std::is_same_v) { - addOperands(operands, operandSegments, reductionOperands); - addOperands(operands, operandSegments, privateOperands); - addOperands(operands, operandSegments, firstprivateOperands); - } + addOperands(operands, operandSegments, reductionOperands); + addOperands(operands, operandSegments, privateOperands); + addOperands(operands, operandSegments, firstprivateOperands); addOperands(operands, operandSegments, dataClauseOperands); Op computeOp; diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp index ae6120826f8d2..3c86a9d7451f0 100644 --- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp @@ -53,6 +53,8 @@ static const char __ldlu_r2x2[] = "__ldlu_r2x2_"; static const char __ldlu_r4x4[] = "__ldlu_r4x4_"; static const char __ldlu_r8x2[] = "__ldlu_r8x2_"; +static constexpr unsigned kTMAAlignment = 16; + // CUDA specific intrinsic handlers. static constexpr IntrinsicHandler cudaHandlers[]{ {"__ldca_i4x4", @@ -1505,11 +1507,9 @@ static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize); auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext()); barrier = builder.createConvert(loc, llvmPtrTy, barrier); - setAlignment(dst, 16); - dst = convertPtrToNVVMSpace(builder, loc, dst, - mlir::NVVM::NVVMMemorySpace::Shared); - src = convertPtrToNVVMSpace(builder, loc, src, - mlir::NVVM::NVVMMemorySpace::Shared); + setAlignment(dst, kTMAAlignment); + dst = builder.createConvert(loc, llvmPtrTy, dst); + src = builder.createConvert(loc, llvmPtrTy, src); mlir::NVVM::InlinePtxOp::create( builder, loc, mlir::TypeRange{}, {dst, src, size, barrier}, {}, "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], " @@ -1611,6 +1611,7 @@ static void genTMABulkStore(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value src, mlir::Value dst, mlir::Value count, mlir::Value eleSize) { mlir::Value size = mlir::arith::MulIOp::create(builder, loc, eleSize, count); + setAlignment(src, kTMAAlignment); src = convertPtrToNVVMSpace(builder, loc, src, mlir::NVVM::NVVMMemorySpace::Shared); dst = convertPtrToNVVMSpace(builder, loc, dst, diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index 671e5f9455c22..97f7f76a8fbe7 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -333,7 +333,8 @@ void cuf::SharedMemoryOp::build( bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName); build(builder, result, wrapAllocaResultType(inType), mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape, - /*offset=*/mlir::Value{}, /*alignment=*/mlir::IntegerAttr{}); + /*offset=*/mlir::Value{}, /*alignment=*/mlir::IntegerAttr{}, + /*isStatic=*/nullptr); result.addAttributes(attributes); } diff --git a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp index a64494510d847..7bae0602fe5ca 100644 --- a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp +++ b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp @@ -46,6 +46,43 @@ static bool isAssumedSize(mlir::ValueRange shape) { return false; } +static void createSharedMemoryGlobal(fir::FirOpBuilder &builder, + mlir::Location loc, llvm::StringRef prefix, + llvm::StringRef suffix, + mlir::gpu::GPUModuleOp gpuMod, + mlir::Type sharedMemType, unsigned size, + unsigned align, bool isDynamic) { + std::string sharedMemGlobalName = + isDynamic ? (prefix + llvm::Twine(cudaSharedMemSuffix)).str() + : (prefix + llvm::Twine(cudaSharedMemSuffix) + suffix).str(); + + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(gpuMod.getBody()); + + mlir::StringAttr linkage = isDynamic ? builder.createExternalLinkage() + : builder.createInternalLinkage(); + llvm::SmallVector attrs; + auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(), + gpuMod.getContext()); + attrs.push_back(mlir::NamedAttribute( + fir::GlobalOp::getDataAttrAttrName(globalOpName), + cuf::DataAttributeAttr::get(gpuMod.getContext(), + cuf::DataAttribute::Shared))); + + mlir::DenseElementsAttr init = {}; + mlir::Type i8Ty = builder.getI8Type(); + if (size > 0) { + auto vecTy = mlir::VectorType::get( + static_cast(size), i8Ty); + mlir::Attribute zero = mlir::IntegerAttr::get(i8Ty, 0); + init = mlir::DenseElementsAttr::get(vecTy, llvm::ArrayRef(zero)); + } + auto sharedMem = + fir::GlobalOp::create(builder, loc, sharedMemGlobalName, false, false, + sharedMemType, init, linkage, attrs); + sharedMem.setAlignment(align); +} + struct CUFComputeSharedMemoryOffsetsAndSize : public fir::impl::CUFComputeSharedMemoryOffsetsAndSizeBase< CUFComputeSharedMemoryOffsetsAndSize> { @@ -108,18 +145,23 @@ struct CUFComputeSharedMemoryOffsetsAndSize crtDynOffset, dynSize); else crtDynOffset = dynSize; - - continue; + } else { + // Static shared memory. + auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash( + loc, sharedOp.getInType(), *dl, kindMap); + createSharedMemoryGlobal( + builder, sharedOp.getLoc(), funcOp.getName(), + *sharedOp.getBindcName(), gpuMod, + fir::SequenceType::get(size, i8Ty), size, + sharedOp.getAlignment() ? *sharedOp.getAlignment() : align, + /*isDynamic=*/false); + mlir::Value zero = builder.createIntegerConstant(loc, i32Ty, 0); + sharedOp.getOffsetMutable().assign(zero); + if (!sharedOp.getAlignment()) + sharedOp.setAlignment(align); + sharedOp.setIsStatic(true); + ++nbStaticSharedVariables; } - auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash( - sharedOp.getLoc(), sharedOp.getInType(), *dl, kindMap); - ++nbStaticSharedVariables; - mlir::Value offset = builder.createIntegerConstant( - loc, i32Ty, llvm::alignTo(sharedMemSize, align)); - sharedOp.getOffsetMutable().assign(offset); - sharedMemSize = - llvm::alignTo(sharedMemSize, align) + llvm::alignTo(size, align); - alignment = std::max(alignment, align); } if (nbDynamicSharedVariables == 0 && nbStaticSharedVariables == 0) @@ -130,35 +172,13 @@ struct CUFComputeSharedMemoryOffsetsAndSize funcOp.getLoc(), "static and dynamic shared variables in a single kernel"); - mlir::DenseElementsAttr init = {}; - if (sharedMemSize > 0) { - auto vecTy = mlir::VectorType::get(sharedMemSize, i8Ty); - mlir::Attribute zero = mlir::IntegerAttr::get(i8Ty, 0); - init = mlir::DenseElementsAttr::get(vecTy, llvm::ArrayRef(zero)); - } + if (nbStaticSharedVariables > 0) + continue; - // Create the shared memory global where each shared variable will point - // to. auto sharedMemType = fir::SequenceType::get(sharedMemSize, i8Ty); - std::string sharedMemGlobalName = - (funcOp.getName() + llvm::Twine(cudaSharedMemSuffix)).str(); - // Dynamic shared memory needs an external linkage while static shared - // memory needs an internal linkage. - mlir::StringAttr linkage = nbDynamicSharedVariables > 0 - ? builder.createExternalLinkage() - : builder.createInternalLinkage(); - builder.setInsertionPointToEnd(gpuMod.getBody()); - llvm::SmallVector attrs; - auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(), - gpuMod.getContext()); - attrs.push_back(mlir::NamedAttribute( - fir::GlobalOp::getDataAttrAttrName(globalOpName), - cuf::DataAttributeAttr::get(gpuMod.getContext(), - cuf::DataAttribute::Shared))); - auto sharedMem = fir::GlobalOp::create( - builder, funcOp.getLoc(), sharedMemGlobalName, false, false, - sharedMemType, init, linkage, attrs); - sharedMem.setAlignment(alignment); + createSharedMemoryGlobal(builder, funcOp.getLoc(), funcOp.getName(), "", + gpuMod, sharedMemType, sharedMemSize, alignment, + /*isDynamic=*/true); } } }; diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp index 40f180a8c1657..d5a8212eb5472 100644 --- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp @@ -249,8 +249,13 @@ struct CUFSharedMemoryOpConversion "cuf.shared_memory must have an offset for code gen"); auto gpuMod = op->getParentOfType(); + std::string sharedGlobalName = - (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix)).str(); + op.getIsStatic() + ? (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix) + + *op.getBindcName()) + .str() + : (getFuncName(op) + llvm::Twine(cudaSharedMemSuffix)).str(); mlir::Value sharedGlobalAddr = createAddressOfOp(rewriter, loc, gpuMod, sharedGlobalName); diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir index 60cda9e98c7d8..e83648f21bdf1 100644 --- a/flang/test/Fir/CUDA/cuda-code-gen.mlir +++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir @@ -201,9 +201,9 @@ func.func @_QMm1Psub1(%arg0: !fir.box> {cuf.data_attr = #cuf.c // ----- -fir.global common @_QPshared_static__shared_mem(dense<0> : vector<28xi8>) {alignment = 8 : i64, data_attr = #cuf.cuda} : !fir.array<28xi8> +fir.global common @_QPshared_static__shared_mem__(dense<0> : vector<28xi8>) {alignment = 8 : i64, data_attr = #cuf.cuda} : !fir.array<28xi8> -// CHECK: llvm.mlir.global common @_QPshared_static__shared_mem(dense<0> : vector<28xi8>) {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<28 x i8> +// CHECK: llvm.mlir.global common @_QPshared_static__shared_mem__(dense<0> : vector<28xi8>) {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<28 x i8> // ----- diff --git a/flang/test/Fir/CUDA/cuda-shared-offset.mlir b/flang/test/Fir/CUDA/cuda-shared-offset.mlir index 37b36b2bd050e..1a39fefe85cda 100644 --- a/flang/test/Fir/CUDA/cuda-shared-offset.mlir +++ b/flang/test/Fir/CUDA/cuda-shared-offset.mlir @@ -17,7 +17,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, %{{.*}} : index {bindc_name = "r", uniq_name = "_QFdynsharedEr"} -> !fir.ref> // CHECK: gpu.return // CHECK: } -// CHECK: fir.global external @_QPdynshared__shared_mem {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<0xi8> +// CHECK: fir.global external @_QPdynshared__shared_mem__ {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<0xi8> // ----- @@ -43,15 +43,20 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry !fir.ref -// CHECK: cuf.shared_memory[%c4{{.*}} : i32] i32 {bindc_name = "b", uniq_name = "_QFshared_staticEb"} -> !fir.ref -// CHECK: cuf.shared_memory[%c8{{.*}} : i32] i32 {bindc_name = "c", uniq_name = "_QFshared_staticEc"} -> !fir.ref -// CHECK: cuf.shared_memory[%c12{{.*}} : i32] i32 {bindc_name = "d", uniq_name = "_QFshared_staticEd"} -> !fir.ref -// CHECK: cuf.shared_memory[%c16{{.*}} : i32] i64 {bindc_name = "e", uniq_name = "_QFshared_staticEe"} -> !fir.ref -// CHECK: cuf.shared_memory[%c24{{.*}} : i32] f32 {bindc_name = "r", uniq_name = "_QFshared_staticEr"} -> !fir.ref +// CHECK: cuf.shared_memory[%c0{{.*}} : i32] i32 align 4 {bindc_name = "a", isStatic, uniq_name = "_QFshared_staticEa"} -> !fir.ref +// CHECK: cuf.shared_memory[%c0{{.*}} : i32] i32 align 4 {bindc_name = "b", isStatic, uniq_name = "_QFshared_staticEb"} -> !fir.ref +// CHECK: cuf.shared_memory[%c0{{.*}} : i32] i32 align 4 {bindc_name = "c", isStatic, uniq_name = "_QFshared_staticEc"} -> !fir.ref +// CHECK: cuf.shared_memory[%c0{{.*}} : i32] i32 align 4 {bindc_name = "d", isStatic, uniq_name = "_QFshared_staticEd"} -> !fir.ref +// CHECK: cuf.shared_memory[%c0{{.*}} : i32] i64 align 8 {bindc_name = "e", isStatic, uniq_name = "_QFshared_staticEe"} -> !fir.ref +// CHECK: cuf.shared_memory[%c0{{.*}} : i32] f32 align 4 {bindc_name = "r", isStatic, uniq_name = "_QFshared_staticEr"} -> !fir.ref // CHECK: gpu.return // CHECK: } -// CHECK: fir.global internal @_QPshared_static__shared_mem(dense<0> : vector<28xi8>) {alignment = 8 : i64, data_attr = #cuf.cuda} : !fir.array<28xi8> +// CHECK: fir.global internal @_QPshared_static__shared_mem__a(dense<0> : vector<4xi8>) {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<4xi8> +// CHECK: fir.global internal @_QPshared_static__shared_mem__b(dense<0> : vector<4xi8>) {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<4xi8> +// CHECK: fir.global internal @_QPshared_static__shared_mem__c(dense<0> : vector<4xi8>) {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<4xi8> +// CHECK: fir.global internal @_QPshared_static__shared_mem__d(dense<0> : vector<4xi8>) {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<4xi8> +// CHECK: fir.global internal @_QPshared_static__shared_mem__e(dense<0> : vector<8xi8>) {alignment = 8 : i64, data_attr = #cuf.cuda} : !fir.array<8xi8> +// CHECK: fir.global internal @_QPshared_static__shared_mem__r(dense<0> : vector<4xi8>) {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<4xi8> // CHECK: } // CHECK: } @@ -159,4 +164,4 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, %{{.*}} : index {bindc_name = "dmasks", uniq_name = "_QMmtestsFtestanyEdmasks"} -> !fir.ref> // CHECK: %{{.*}} = cuf.shared_memory[%c0{{.*}} : i32] !fir.array, %{{.*}} : index {bindc_name = "smasks", uniq_name = "_QMmtestsFtestanyEsmasks"} -> !fir.ref> -// CHECK: fir.global external @_QMmtestsPtestany__shared_mem {alignment = 8 : i64, data_attr = #cuf.cuda} : !fir.array<0xi8> +// CHECK: fir.global external @_QMmtestsPtestany__shared_mem__ {alignment = 8 : i64, data_attr = #cuf.cuda} : !fir.array<0xi8> diff --git a/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir b/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir index 26479d1cdd94f..69370613cd348 100644 --- a/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir +++ b/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir @@ -9,14 +9,14 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry !fir.ref llvm.return } - llvm.mlir.global common @_QPshared_static__shared_mem(dense<0> : vector<28xi8>) {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<28 x i8> + llvm.mlir.global common @_QPshared_static__shared_mem__(dense<0> : vector<28xi8>) {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<28 x i8> } } // CHECK-LABEL: llvm.func @_QPshared_static() -// CHECK: %[[ADDR0:.*]] = llvm.mlir.addressof @_QPshared_static__shared_mem : !llvm.ptr<3> +// CHECK: %[[ADDR0:.*]] = llvm.mlir.addressof @_QPshared_static__shared_mem__ : !llvm.ptr<3> // CHECK: %[[ADDRCAST0:.*]] = llvm.addrspacecast %[[ADDR0]] : !llvm.ptr<3> to !llvm.ptr // CHECK: %[[A:.*]] = llvm.getelementptr %[[ADDRCAST0]][%c0{{.*}}] : (!llvm.ptr, i32) -> !llvm.ptr, i8 -// CHECK: %[[ADDR1:.*]] = llvm.mlir.addressof @_QPshared_static__shared_mem : !llvm.ptr<3> +// CHECK: %[[ADDR1:.*]] = llvm.mlir.addressof @_QPshared_static__shared_mem__ : !llvm.ptr<3> // CHECK: %[[ADDRCAST1:.*]] = llvm.addrspacecast %[[ADDR1]] : !llvm.ptr<3> to !llvm.ptr // CHECK: %[[B:.*]] = llvm.getelementptr %[[ADDRCAST1]][%c4{{.*}}] : (!llvm.ptr, i32) -> !llvm.ptr, i8 diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 1e3c66307c334..27ef8e0889627 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -543,7 +543,7 @@ end subroutine ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_c8(a, n) @@ -563,7 +563,7 @@ end subroutine ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 16 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_i4(a, n) @@ -583,7 +583,7 @@ end subroutine ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_i8(a, n) @@ -603,7 +603,7 @@ end subroutine ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_r2(a, n) @@ -623,7 +623,7 @@ end subroutine ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 2 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_r4(a, n) @@ -643,7 +643,7 @@ end subroutine ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_r8(a, n) @@ -663,7 +663,7 @@ end subroutine ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_store_c4(c, n) @@ -675,6 +675,7 @@ attributes(global) subroutine test_tma_bulk_store_c4(c, n) end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c4 +! CHECK: cuf.shared_memory !fir.array<1024xcomplex> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_c4Etmpa"} -> !fir.ref>> ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> ! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 @@ -688,6 +689,7 @@ attributes(global) subroutine test_tma_bulk_store_c8(c, n) end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c8 +! CHECK: cuf.shared_memory !fir.array<1024xcomplex> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_c8Etmpa"} -> !fir.ref>> ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> ! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 @@ -701,6 +703,7 @@ attributes(global) subroutine test_tma_bulk_store_i4(c, n) end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i4 +! CHECK: cuf.shared_memory !fir.array<1024xi32> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_i4Etmpa"} -> !fir.ref> ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> ! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 @@ -714,6 +717,7 @@ attributes(global) subroutine test_tma_bulk_store_i8(c, n) end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i8 +! CHECK: cuf.shared_memory !fir.array<1024xi64> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_i8Etmpa"} -> !fir.ref> ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> ! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 @@ -728,6 +732,7 @@ attributes(global) subroutine test_tma_bulk_store_r2(c, n) end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r2 +! CHECK: cuf.shared_memory !fir.array<1024xf16> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_r2Etmpa"} -> !fir.ref> ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> ! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 @@ -741,6 +746,7 @@ attributes(global) subroutine test_tma_bulk_store_r4(c, n) end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r4 +! CHECK: cuf.shared_memory !fir.array<1024xf32> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_r4Etmpa"} -> !fir.ref> ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> ! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 @@ -754,6 +760,7 @@ attributes(global) subroutine test_tma_bulk_store_r8(c, n) end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r8 +! CHECK: cuf.shared_memory !fir.array<1024xf64> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_r8Etmpa"} -> !fir.ref> ! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> ! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;" ! CHECK: nvvm.cp.async.bulk.wait_group 0 diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90 index b8bfc37d1758f..adc8de00a9b7a 100644 --- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 +++ b/flang/test/Lower/OpenMP/Todo/reduction-task.f90 @@ -8,5 +8,5 @@ subroutine reduction_task() !$omp parallel reduction(task, +:i) i = i + 1 - !$omp end parallel + !$omp end parallel end subroutine reduction_task diff --git a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 index 0688f0ddc180d..ade197bc51590 100644 --- a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 +++ b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 @@ -47,7 +47,7 @@ subroutine read_write_section() integer, allocatable :: sp_read(:) integer, allocatable :: sp_write(:) - allocate(sp_read(10)) + allocate(sp_read(10)) allocate(sp_write(10)) sp_write = (/0,0,0,0,0,0,0,0,0,0/) sp_read = (/1,2,3,4,5,6,7,8,9,10/) diff --git a/flang/test/Lower/OpenMP/allocatable-map.f90 b/flang/test/Lower/OpenMP/allocatable-map.f90 index 114967526b8d5..e1c4694d7d6b7 100644 --- a/flang/test/Lower/OpenMP/allocatable-map.f90 +++ b/flang/test/Lower/OpenMP/allocatable-map.f90 @@ -6,7 +6,7 @@ !HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map.info var_ptr(%[[POINTER]]#1 : !fir.ref>>, !fir.box>) map_clauses(always, to) capture(ByRef) members(%[[POINTER_MAP_MEMBER]] : [0] : !fir.llvm_ptr>) -> !fir.ref>> {name = "point"} !HLFIRDIALECT: omp.target map_entries(%[[POINTER_MAP]] -> {{.*}}, %[[POINTER_MAP_MEMBER]] -> {{.*}} : !fir.ref>>, !fir.llvm_ptr>) { subroutine pointer_routine() - integer, pointer :: point + integer, pointer :: point !$omp target map(tofrom:point) point = 1 !$omp end target diff --git a/flang/test/Lower/OpenMP/atomic-capture.f90 b/flang/test/Lower/OpenMP/atomic-capture.f90 index 14fd0c942a9b4..f561deac91298 100644 --- a/flang/test/Lower/OpenMP/atomic-capture.f90 +++ b/flang/test/Lower/OpenMP/atomic-capture.f90 @@ -7,7 +7,7 @@ program OmpAtomicCapture - use omp_lib + use omp_lib !CHECK: %[[VAL_X_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} !CHECK: %[[VAL_X_DECLARE:.*]]:2 = hlfir.declare %[[VAL_X_ALLOCA]] {{.*}} @@ -25,7 +25,7 @@ program OmpAtomicCapture !CHECK: omp.atomic.read %[[VAL_X_DECLARE]]#0 = %[[VAL_Y_DECLARE]]#0 : !fir.ref, !fir.ref, i32 !CHECK: } !$omp atomic hint(omp_sync_hint_uncontended) capture - y = x * y + y = x * y x = y !$omp end atomic @@ -43,7 +43,7 @@ program OmpAtomicCapture !CHECK: } !$omp atomic hint(omp_lock_hint_nonspeculative) capture acquire x = y - y = 2 * 10 + (8 - x) + y = 2 * 10 + (8 - x) !$omp end atomic end program diff --git a/flang/test/Lower/OpenMP/atomic-read-complex.f90 b/flang/test/Lower/OpenMP/atomic-read-complex.f90 index 2f51f03820926..cd20c5d0316b7 100644 --- a/flang/test/Lower/OpenMP/atomic-read-complex.f90 +++ b/flang/test/Lower/OpenMP/atomic-read-complex.f90 @@ -15,7 +15,7 @@ program atomic_read_complex complex(4) :: c41, c42 ! Test complex(8) - double precision (16 bytes) complex(8) :: c81, c82 - + c42 = (1.0_4, 1.0_4) c82 = (1.0_8, 1.0_8) @@ -25,7 +25,7 @@ program atomic_read_complex ! CHECK: call void @__atomic_load(i64 8, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) !$omp atomic read c41 = c42 - + ! Double precision complex: 16 bytes (this was broken before the fix) ! CHECK: call void @__atomic_load(i64 16, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) !$omp atomic read diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 index f88bbea6fca85..05adee312dd6a 100644 --- a/flang/test/Lower/OpenMP/atomic-update.f90 +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -73,7 +73,7 @@ program OmpAtomicUpdate !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } !$omp atomic update - a = a + b + a = a + b !CHECK: %[[VAL_c1:.*]] = arith.constant 1 : i32 !CHECK: omp.atomic.update %[[VAL_Y_DECLARE]]#0 : !fir.ref { @@ -81,7 +81,7 @@ program OmpAtomicUpdate !CHECK: %[[TEMP:.*]] = arith.addi %[[ARG]], %[[VAL_c1]] : i32 !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } - !$omp atomic + !$omp atomic y = y + 1 !CHECK: %[[VAL_X_LOADED:.*]] = fir.load %[[VAL_X_DECLARE]]#0 : !fir.ref @@ -91,7 +91,7 @@ program OmpAtomicUpdate !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } !$omp atomic update - z = x * z + z = x * z !CHECK: %[[VAL_c1:.*]] = arith.constant 1 : i32 !CHECK: omp.atomic.update hint(uncontended) memory_order(relaxed) %[[VAL_X_DECLARE]]#0 : !fir.ref { @@ -110,7 +110,7 @@ program OmpAtomicUpdate !CHECK: %[[TEMP:.*]] = arith.select {{.*}} : i32 !CHECK: omp.yield(%[[TEMP]] : i32) !CHECK: } - !$omp atomic update relaxed + !$omp atomic update relaxed y = max(y, c, d) !CHECK: %[[VAL_X_LOADED:.*]] = fir.load %[[VAL_X_DECLARE]]#0 : !fir.ref @@ -211,7 +211,7 @@ program OmpAtomicUpdate !CHECK: %[[RESULT:.*]] = fir.convert %[[EXT]] : (f32) -> i32 !CHECK: omp.yield(%[[RESULT]] : i32) !$omp atomic update - w = w + g + w = w + g end program OmpAtomicUpdate ! Check that the clean-ups associated with the function call diff --git a/flang/test/Lower/OpenMP/atomic-write-complex.f90 b/flang/test/Lower/OpenMP/atomic-write-complex.f90 index 48cfe26ca5a49..4e975bf2c9c6a 100644 --- a/flang/test/Lower/OpenMP/atomic-write-complex.f90 +++ b/flang/test/Lower/OpenMP/atomic-write-complex.f90 @@ -13,19 +13,19 @@ program atomic_write_complex ! Test complex(4) - single precision (8 bytes) complex(4) :: c41, c42 - ! Test complex(8) - double precision (16 bytes) + ! Test complex(8) - double precision (16 bytes) complex(8) :: c81, c82 - + c42 = (1.0_4, 1.0_4) c82 = (1.0_8, 1.0_8) ! CHECK-LABEL: define {{.*}} @_QQmain - + ! Single precision complex: 8 bytes ! CHECK: call void @__atomic_store(i64 8, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) !$omp atomic write c41 = c42 - + ! Double precision complex: 16 bytes (this was broken before the fix) ! CHECK: call void @__atomic_store(i64 16, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) !$omp atomic write diff --git a/flang/test/Lower/OpenMP/copyin.f90 b/flang/test/Lower/OpenMP/copyin.f90 index 129d8bde4dd7b..6cdbbd2c12e6e 100644 --- a/flang/test/Lower/OpenMP/copyin.f90 +++ b/flang/test/Lower/OpenMP/copyin.f90 @@ -335,7 +335,7 @@ subroutine common_2() integer :: y common /d/ x, y !$omp threadprivate(/d/) - + !$omp parallel do copyin(/d/) do i = 1, x y = y + i diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 index 4abf750cf735a..d6175dd8730c2 100644 --- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 +++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 @@ -1,8 +1,8 @@ !RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s --check-prefixes ALL,HOST !RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes ALL,DEVICE -! Check specification valid forms of declare target with functions -! utilising device_type and to clauses as well as the default +! Check specification valid forms of declare target with functions +! utilising device_type and to clauses as well as the default ! zero clause declare target ! DEVICE-LABEL: func.func @_QPfunc_t_device() @@ -94,8 +94,8 @@ END FUNCTION FUNC_NAME_AS_RESULT !! ----- -! Check specification valid forms of declare target with subroutines -! utilising device_type and to clauses as well as the default +! Check specification valid forms of declare target with subroutines +! utilising device_type and to clauses as well as the default ! zero clause declare target ! DEVICE-LABEL: func.func @_QPsubr_t_device() diff --git a/flang/test/Lower/OpenMP/default-clause-byref.f90 b/flang/test/Lower/OpenMP/default-clause-byref.f90 index af51c4cc3e814..0d473af7f8b88 100644 --- a/flang/test/Lower/OpenMP/default-clause-byref.f90 +++ b/flang/test/Lower/OpenMP/default-clause-byref.f90 @@ -197,21 +197,21 @@ subroutine nested_default_clause_tests !CHECK: } !CHECK: omp.terminator !CHECK: } - !$omp parallel firstprivate(x) private(y) shared(w) default(private) + !$omp parallel firstprivate(x) private(y) shared(w) default(private) !$omp parallel default(private) y = 20 - x = 10 - !$omp end parallel + x = 10 + !$omp end parallel - !$omp parallel default(firstprivate) shared(y) private(w) + !$omp parallel default(firstprivate) shared(y) private(w) y = 30 - w = 40 + w = 40 z = 50 k = 40 !$omp end parallel !$omp end parallel - - + + !CHECK: omp.parallel private({{.*}} {{.*}}#0 -> %[[PRIVATE_X:.*]], {{.*}} {{.*}}#0 -> %[[PRIVATE_Y:.*]], {{.*}} {{.*}}#0 -> %[[PRIVATE_Z:.*]] : {{.*}}) { !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -240,8 +240,8 @@ subroutine nested_default_clause_tests !$omp parallel default(private) shared(z) w = x + z !$omp end parallel - !$omp end parallel - + !$omp end parallel + !CHECK: omp.parallel private({{.*}} {{.*}}#0 -> %[[PRIVATE_X:.*]], {{.*}} {{.*}}#0 -> %[[PRIVATE_Y:.*]], {{.*}} {{.*}}#0 -> %[[PRIVATE_W:.*]], {{.*}} {{.*}}#0 -> %[[PRIVATE_Z:.*]] : {{.*}}) { !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFnested_default_clause_testsEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -283,7 +283,7 @@ subroutine nested_default_clause_tests !CHECK: omp.terminator !CHECK: } !CHECK: return -!CHECK: } +!CHECK: } !$omp parallel default(firstprivate) !$omp single x = y diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90 index 77725836a0a04..c16d19c129b8f 100644 --- a/flang/test/Lower/OpenMP/default-clause.f90 +++ b/flang/test/Lower/OpenMP/default-clause.f90 @@ -432,7 +432,7 @@ subroutine skipped_default_clause_checks() !CHECK: %[[VAR_X_DECLARE_INNER:.*]] = hlfir.declare %[[CONVERT_INNER]] storage(%[[BLK_THREADPRIVATE_INNER]][0]) {uniq_name = "_QFthreadprivate_with_defaultEx"} : (!fir.ref, !fir.ref>) -> (!fir.ref, !fir.ref) subroutine threadprivate_with_default integer :: x - common /blk/ x + common /blk/ x !$omp threadprivate (/blk/) !$omp parallel do default(private) diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 index ad53703d3122e..d79f97d9bc9e9 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 @@ -31,6 +31,6 @@ subroutine delayed_privatization_private_firstprivate ! CHECK: %[[VAR2_DECL:.*]]:2 = hlfir.declare %[[VAR2_ALLOC]] ! CHECK: omp.parallel private( -! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM]] %[[VAR1_DECL]]#0 -> %{{[^,]+}}, +! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM]] %[[VAR1_DECL]]#0 -> %{{[^,]+}}, ! CHECK-SAME: @[[VAR2_PRIVATIZER_SYM]] %[[VAR2_DECL]]#0 -> %{{.*}} : ! CHECK-SAME: !fir.ref, !fir.ref) { diff --git a/flang/test/Lower/OpenMP/derived-type-map.f90 b/flang/test/Lower/OpenMP/derived-type-map.f90 index 3b39b694c37b2..fb4b88a00d0af 100644 --- a/flang/test/Lower/OpenMP/derived-type-map.f90 +++ b/flang/test/Lower/OpenMP/derived-type-map.f90 @@ -12,8 +12,8 @@ subroutine mapType_derived_implicit integer(4) :: array(10) integer(4) :: int end type scalar_and_array - type(scalar_and_array) :: scalar_arr - + type(scalar_and_array) :: scalar_arr + !$omp target scalar_arr%int = 1 !$omp end target @@ -49,8 +49,8 @@ subroutine mapType_derived_explicit integer(4) :: array(10) integer(4) :: int end type scalar_and_array - type(scalar_and_array) :: scalar_arr - + type(scalar_and_array) :: scalar_arr + !$omp target map(tofrom: scalar_arr) scalar_arr%int = 1 !$omp end target @@ -69,8 +69,8 @@ subroutine mapType_derived_explicit_single_member integer(4) :: array(10) integer(4) :: int end type scalar_and_array - type(scalar_and_array) :: scalar_arr - + type(scalar_and_array) :: scalar_arr + !$omp target map(tofrom: scalar_arr%array) scalar_arr%array(1) = 1 !$omp end target @@ -90,13 +90,13 @@ subroutine mapType_derived_explicit_multiple_members integer(4) :: array(10) integer(4) :: int end type scalar_and_array - type(scalar_and_array) :: scalar_arr - + type(scalar_and_array) :: scalar_arr + !$omp target map(tofrom: scalar_arr%int, scalar_arr%real) scalar_arr%int = 1 !$omp end target end subroutine mapType_derived_explicit_multiple_members - + !CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<_QFmaptype_derived_explicit_member_with_boundsTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> {bindc_name = "scalar_arr", uniq_name = "_QFmaptype_derived_explicit_member_with_boundsEscalar_arr"} !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFmaptype_derived_explicit_member_with_boundsEscalar_arr"} : (!fir.ref,int:i32}>>) -> (!fir.ref,int:i32}>>, !fir.ref,int:i32}>>) !CHECK: %[[MEMBER:.*]] = hlfir.designate %[[DECLARE]]#0{"array"} shape %{{.*}} : (!fir.ref,int:i32}>>, !fir.shape<1>) -> !fir.ref> @@ -113,8 +113,8 @@ subroutine mapType_derived_explicit_member_with_bounds integer(4) :: array(10) integer(4) :: int end type scalar_and_array - type(scalar_and_array) :: scalar_arr - + type(scalar_and_array) :: scalar_arr + !$omp target map(tofrom: scalar_arr%array(2:5)) scalar_arr%array(3) = 3 !$omp end target @@ -141,8 +141,8 @@ subroutine mapType_derived_nested_explicit_single_member type(nested) :: nest integer(4) :: int end type scalar_and_array - - type(scalar_and_array) :: scalar_arr + + type(scalar_and_array) :: scalar_arr !$omp target map(tofrom: scalar_arr%nest%array) scalar_arr%nest%array(1) = 1 @@ -173,7 +173,7 @@ subroutine mapType_derived_nested_explicit_multiple_members integer(4) :: int end type scalar_and_array - type(scalar_and_array) :: scalar_arr + type(scalar_and_array) :: scalar_arr !$omp target map(tofrom: scalar_arr%nest%int, scalar_arr%nest%real) scalar_arr%nest%int = 1 @@ -205,9 +205,9 @@ subroutine mapType_derived_nested_explicit_member_with_bounds type(nested) :: nest integer(4) :: int end type scalar_and_array - - type(scalar_and_array) :: scalar_arr - + + type(scalar_and_array) :: scalar_arr + !$omp target map(tofrom: scalar_arr%nest%array(2:5)) scalar_arr%nest%array(3) = 3 !$omp end target @@ -239,7 +239,7 @@ subroutine mapType_multilpe_derived_nested_explicit_member type(nested) :: nest integer(4) :: int end type scalar_and_array - + type(scalar_and_array) :: scalar_arr1 type(scalar_and_array) :: scalar_arr2 diff --git a/flang/test/Lower/OpenMP/distribute.f90 b/flang/test/Lower/OpenMP/distribute.f90 index ea57d35b964b4..bd0e220c1989c 100644 --- a/flang/test/Lower/OpenMP/distribute.f90 +++ b/flang/test/Lower/OpenMP/distribute.f90 @@ -104,7 +104,7 @@ subroutine distribute_allocate() ! CHECK-NEXT: omp.loop_nest do i = 1, 10 x = i - ! CHECK: omp.yield + ! CHECK: omp.yield end do !$omp end distribute diff --git a/flang/test/Lower/OpenMP/generic-loop-rewriting.f90 b/flang/test/Lower/OpenMP/generic-loop-rewriting.f90 index eaf31e3ffb779..75731a6ec6ff2 100644 --- a/flang/test/Lower/OpenMP/generic-loop-rewriting.f90 +++ b/flang/test/Lower/OpenMP/generic-loop-rewriting.f90 @@ -38,12 +38,12 @@ end subroutine target_teams_loop !CHECK: %[[UB:.*]] = arith.constant 10 : i32 !CHECK: %[[STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.parallel private(@{{.*}} %[[I_DECL]]#0 +!CHECK: omp.parallel private(@{{.*}} %[[I_DECL]]#0 !CHECK-SAME: -> %[[I_PRIV_ARG:[^[:space:]]+]] : !fir.ref) { !CHECK: omp.distribute { !CHECK: omp.wsloop { -!CHECK: omp.loop_nest (%{{.*}}) : i32 = +!CHECK: omp.loop_nest (%{{.*}}) : i32 = !CHECK-SAME: (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !CHECK: %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_PRIV_ARG]] !CHECK: hlfir.assign %{{.*}} to %[[I_PRIV_DECL]]#0 : i32, !fir.ref diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90 index fe5b9597f8620..cd759a988e4f5 100644 --- a/flang/test/Lower/OpenMP/host-eval.f90 +++ b/flang/test/Lower/OpenMP/host-eval.f90 @@ -7,7 +7,7 @@ subroutine teams() ! BOTH: omp.target ! HOST-SAME: host_eval(%{{.*}} -> %[[NUM_TEAMS:.*]], %{{.*}} -> %[[THREAD_LIMIT:.*]] : i32, i32) - + ! DEVICE-NOT: host_eval({{.*}}) ! DEVICE-SAME: { !$omp target @@ -32,9 +32,9 @@ end subroutine teams ! BOTH-LABEL: func.func @_QPdistribute_parallel_do subroutine distribute_parallel_do() ! BOTH: omp.target - + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32) - + ! DEVICE-NOT: host_eval({{.*}}) ! DEVICE-SAME: { @@ -94,9 +94,9 @@ end subroutine distribute_parallel_do ! BOTH-LABEL: func.func @_QPdistribute_parallel_do_simd subroutine distribute_parallel_do_simd() ! BOTH: omp.target - + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]], %{{.*}} -> %[[NUM_THREADS:.*]] : i32, i32, i32, i32) - + ! DEVICE-NOT: host_eval({{.*}}) ! DEVICE-SAME: { @@ -159,9 +159,9 @@ end subroutine distribute_parallel_do_simd ! BOTH-LABEL: func.func @_QPdistribute subroutine distribute() ! BOTH: omp.target - + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) - + ! DEVICE-NOT: host_eval({{.*}}) ! DEVICE-SAME: { @@ -209,9 +209,9 @@ end subroutine distribute ! BOTH-LABEL: func.func @_QPdistribute_simd subroutine distribute_simd() ! BOTH: omp.target - + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) - + ! DEVICE-NOT: host_eval({{.*}}) ! DEVICE-SAME: { @@ -262,9 +262,9 @@ end subroutine distribute_simd ! BOTH-LABEL: func.func @_QPloop subroutine loop() ! BOTH: omp.target - + ! HOST-SAME: host_eval(%{{.*}} -> %[[LB:.*]], %{{.*}} -> %[[UB:.*]], %{{.*}} -> %[[STEP:.*]] : i32, i32, i32) - + ! DEVICE-NOT: host_eval({{.*}}) ! DEVICE-SAME: { diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90 index 114ea6c0f200c..795838d814ac9 100644 --- a/flang/test/Lower/OpenMP/lastprivate-iv.f90 +++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90 @@ -96,7 +96,7 @@ subroutine lastprivate_iv_i1 end subroutine !CHECK: omp.wsloop private(@_QFlastprivate_iv_pointerEi_private_box_ptr_i32 %{{.*}}#0 -> %[[PRIVATE_IV:.*]] : !fir.ref>>) { -!CHECK: omp.loop_nest (%[[LOOP_INDEX:.*]]) : i64 +!CHECK: omp.loop_nest (%[[LOOP_INDEX:.*]]) : i64 !CHECK: %[[PRIVATE_IV_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFlastprivate_iv_pointerEi"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) !CHECK: %[[LOOP_INDEX_INCR:.*]] = arith.addi %[[LOOP_INDEX]], %{{.*}} : i64 !CHECK: fir.if %{{.*}} { diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90 index a974f264cc040..05770c029286d 100644 --- a/flang/test/Lower/OpenMP/loop-directive.f90 +++ b/flang/test/Lower/OpenMP/loop-directive.f90 @@ -112,7 +112,7 @@ subroutine test_nested_directives ! CHECK: omp.teams { - ! Verify the first `loop` directive was combined with `target teams` into + ! Verify the first `loop` directive was combined with `target teams` into ! `target teams distribute parallel do`. ! CHECK: omp.parallel {{.*}} { ! CHECK: omp.distribute { @@ -371,7 +371,7 @@ subroutine teams_loop_cannot_be_parallel_for_with_reductions ! CHECK: %[[ADD_RED:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QF{{.*}}Ex"} ! CHECK: %[[MUL_RED:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QF{{.*}}Ey"} ! CHECK: omp.teams reduction( - ! CHECK-SAME: @add_reduction_i32 %[[ADD_RED]]#0 -> %[[ADD_RED_ARG:[^[:space:]]*]], + ! CHECK-SAME: @add_reduction_i32 %[[ADD_RED]]#0 -> %[[ADD_RED_ARG:[^[:space:]]*]], ! CHECK-SAME: @multiply_reduction_i32 %[[MUL_RED]]#0 -> %[[MUL_RED_ARG:.*]] : {{.*}}) { ! CHECK: omp.distribute private(@{{.*}} %{{.*}} -> %{{.*}}, @{{.*}} %{{.*}} -> %{{.*}} : {{.*}}) { diff --git a/flang/test/Lower/OpenMP/map-no-modifier-v60.f90 b/flang/test/Lower/OpenMP/map-no-modifier-v60.f90 index bcc37e48f8c11..d84ea735d3157 100644 --- a/flang/test/Lower/OpenMP/map-no-modifier-v60.f90 +++ b/flang/test/Lower/OpenMP/map-no-modifier-v60.f90 @@ -9,4 +9,3 @@ subroutine f00 !$omp target map(x) !$omp end target end - diff --git a/flang/test/Lower/OpenMP/masked_taskloop.f90 b/flang/test/Lower/OpenMP/masked_taskloop.f90 index abe20ec1fd87c..4ace6fe40016f 100644 --- a/flang/test/Lower/OpenMP/masked_taskloop.f90 +++ b/flang/test/Lower/OpenMP/masked_taskloop.f90 @@ -3,18 +3,18 @@ ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s -! CHECK-LABEL: omp.private {type = private} +! CHECK-LABEL: omp.private {type = private} ! CHECK-SAME: @[[I_PRIVATE:.*]] : i32 -! CHECK-LABEL: omp.private -! CHECK-SAME: {type = firstprivate} @[[J_FIRSTPRIVATE:.*]] : i32 +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = firstprivate} @[[J_FIRSTPRIVATE:.*]] : i32 ! CHECK-SAME: copy { -! CHECK: hlfir.assign +! CHECK: hlfir.assign ! CHECK-LABEL: func.func @_QPtest_masked_taskloop() { ! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope ! CHECK: %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_masked_taskloopEi"} -! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] +! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] ! CHECK-SAME: {uniq_name = "_QFtest_masked_taskloopEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ALLOCA_J:.*]] = fir.address_of(@_QFtest_masked_taskloopEj) : !fir.ref ! CHECK: %[[DECL_J:.*]]:2 = hlfir.declare %[[ALLOCA_J]] {uniq_name = "_QFtest_masked_taskloopEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -51,5 +51,5 @@ subroutine test_masked_taskloop do i=1,10 j = j + 1 end do - !$omp end masked taskloop + !$omp end masked taskloop end subroutine diff --git a/flang/test/Lower/OpenMP/master_taskloop_simd.f90 b/flang/test/Lower/OpenMP/master_taskloop_simd.f90 index e928afd65244a..a5f5b9f377af9 100644 --- a/flang/test/Lower/OpenMP/master_taskloop_simd.f90 +++ b/flang/test/Lower/OpenMP/master_taskloop_simd.f90 @@ -6,7 +6,7 @@ subroutine test_master_taskloop_simd() integer :: i, j = 1 !CHECK: not yet implemented: Composite TASKLOOP SIMD - !$omp master taskloop simd + !$omp master taskloop simd do i=1,10 j = j + 1 end do diff --git a/flang/test/Lower/OpenMP/multiple-entry-points.f90 b/flang/test/Lower/OpenMP/multiple-entry-points.f90 index 2b8caa79eaa15..604b9cda4af3d 100644 --- a/flang/test/Lower/OpenMP/multiple-entry-points.f90 +++ b/flang/test/Lower/OpenMP/multiple-entry-points.f90 @@ -36,7 +36,7 @@ subroutine process_a(n, a) return entry process_b(n, b) - + !$omp parallel do i = 1, n a(i) = i * i diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 index efab520e96a5d..a6de35786cf64 100644 --- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 +++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 @@ -18,10 +18,10 @@ !CHECK: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) !CHECK: %[[UNIT:.*]] = arith.constant 6 : i32 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX -!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] !CHECK-NEXT: %[[CNST:.*]] = arith.constant !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref -!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#0 +!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#0 !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) @@ -45,7 +45,7 @@ subroutine lastprivate_character(arg1) character(5) :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1(n:n) = 'c' @@ -82,7 +82,7 @@ subroutine lastprivate_character(arg1) subroutine lastprivate_int(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 @@ -123,7 +123,7 @@ subroutine lastprivate_int(arg1) subroutine mult_lastprivate_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -165,7 +165,7 @@ subroutine mult_lastprivate_int(arg1, arg2) subroutine mult_lastprivate_int2(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1, arg2) do n = 1, 5 arg1 = 2 @@ -207,7 +207,7 @@ subroutine mult_lastprivate_int2(arg1, arg2) subroutine firstpriv_lastpriv_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -250,7 +250,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) subroutine firstpriv_lastpriv_int2(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 diff --git a/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90 b/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90 index 497cc396a5a02..e686d080090e1 100644 --- a/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90 +++ b/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90 @@ -3,7 +3,7 @@ ! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s -! CHECK-LABEL: omp.private {type = private} +! CHECK-LABEL: omp.private {type = private} ! CHECK-SAME: @[[I_PRIVATE:.*]] : i32 ! CHECK-LABEL: func.func @_QPtest_parallel_master_taskloop() { ! CHECK: %[[VAL0:.*]] = fir.dummy_scope : !fir.dscope @@ -44,5 +44,5 @@ subroutine test_parallel_master_taskloop do i=1,10 j = j + 1 end do - !$omp end parallel masked taskloop + !$omp end parallel masked taskloop end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 index 086ed01d16d36..38545562f6bbd 100644 --- a/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 +++ b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 @@ -6,7 +6,7 @@ subroutine test_parallel_master_taskloop_simd integer :: i, j = 1 !CHECK: not yet implemented: Composite TASKLOOP SIMD - !$omp parallel master taskloop simd + !$omp parallel master taskloop simd do i=1,10 j = j + 1 end do diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 index f769fd3a278ba..17ee6d0cf6610 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -8,7 +8,7 @@ subroutine proc implicit none real(8),allocatable :: F(:) real(8),allocatable :: A(:) - + integer :: I !$omp parallel private(A) reduction(+:F,I) @@ -20,7 +20,7 @@ end subroutine proc !CHECK: call void (ptr, i32, ptr, ...) !CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}}) -!CHECK: define internal void @[[OMP_PAR]](ptr {{.*}} %[[TID_ADDR:.*]], ptr noalias +!CHECK: define internal void @[[OMP_PAR]](ptr {{.*}} %[[TID_ADDR:.*]], ptr noalias !CHECK: %[[TID_LOCAL:.*]] = alloca i32 !CHECK: %[[TID:.*]] = load i32, ptr %[[TID_ADDR]] !CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]] diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 index 5ff2947c6ac95..6532858da8a93 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 @@ -3,7 +3,7 @@ ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s -! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) +! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) subroutine omp_do_firstprivate(a) ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFomp_do_firstprivateEa"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) integer::a @@ -31,7 +31,7 @@ subroutine omp_do_firstprivate(a) call bar(a) end subroutine omp_do_firstprivate -! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) +! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) subroutine omp_do_firstprivate2(a, n) ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) ! CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/private-commonblock.f90 b/flang/test/Lower/OpenMP/private-commonblock.f90 index 241e9fa2e1b5a..df2702d93d3b8 100644 --- a/flang/test/Lower/OpenMP/private-commonblock.f90 +++ b/flang/test/Lower/OpenMP/private-commonblock.f90 @@ -75,7 +75,7 @@ subroutine private_clause_commonblock() real::b(10) character(5):: c, d(5) common /blk/ a, b, c, d - + call sub1(a, b, c, d) !$omp parallel private(/blk/) call sub2(a, b, c, d) diff --git a/flang/test/Lower/OpenMP/privatize_predetermined_only_when_defined_by_eval.f90 b/flang/test/Lower/OpenMP/privatize_predetermined_only_when_defined_by_eval.f90 index 7671073c2598a..5e9e622a45ef6 100644 --- a/flang/test/Lower/OpenMP/privatize_predetermined_only_when_defined_by_eval.f90 +++ b/flang/test/Lower/OpenMP/privatize_predetermined_only_when_defined_by_eval.f90 @@ -15,7 +15,7 @@ subroutine privatize_predetermined_when_defined_by_eval enddo enddo - !$omp do + !$omp do do j=1,ii enddo !$omp end parallel diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90 index 59827713b6240..599e570597f3f 100644 --- a/flang/test/Lower/OpenMP/sections.f90 +++ b/flang/test/Lower/OpenMP/sections.f90 @@ -12,7 +12,7 @@ !CHECK: %[[CONST_1:.*]] = arith.constant 4 : i64 !CHECK: %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"} !CHECK: %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} +!CHECK: %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"} !CHECK: %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: omp.sections allocate(%[[CONST_1]] : i64 -> %[[COUNT_DECL]]#0 : !fir.ref) { !CHECK: omp.section { @@ -79,7 +79,7 @@ program sample end program sample !CHECK: func @_QPfirstprivate(%[[ARG:.*]]: !fir.ref {fir.bindc_name = "alpha"}) { -!CHECK: %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +!CHECK: %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) !CHECK: %[[PRIVATE_ALPHA:.*]] = fir.alloca f32 {bindc_name = "alpha", pinned, uniq_name = "_QFfirstprivateEalpha"} !CHECK: %[[PRIVATE_ALPHA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ALPHA]] {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[TEMP:.*]] = fir.load %[[ARG_DECL]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/shared-loop.f90 b/flang/test/Lower/OpenMP/shared-loop.f90 index 48ad553752e4a..eb277efb2d3de 100644 --- a/flang/test/Lower/OpenMP/shared-loop.f90 +++ b/flang/test/Lower/OpenMP/shared-loop.f90 @@ -2,14 +2,14 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s ! --- Check that with shared(i) the variable outside the parallel section -! --- is updated. +! --- is updated. ! CHECK-LABEL: func.func @_QPomploop() ! CHECK: %[[ALLOC_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomploopEi"} ! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOC_I]] {uniq_name = "_QFomploopEi"} : ! CHECK: omp.parallel { ! CHECK: omp.sections { ! CHECK: omp.section { -! CHECK: %[[RES:.*]] = fir.do_loop %[[ARG0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG1:.*]] = +! CHECK: %[[RES:.*]] = fir.do_loop %[[ARG0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG1:.*]] = ! CHECK: fir.store %[[ARG1]] to %[[DECL_I]]#0 ! CHECK: hlfir.assign ! CHECK: %[[LOAD_I:.*]] = fir.load %[[DECL_I]]#0 @@ -47,7 +47,7 @@ subroutine omploop ! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[ALLOC_PRIV_I]] ! CHECK: omp.sections { ! CHECK: omp.section { -! CHECK: %[[RES:.*]] = fir.do_loop %[[ARG0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG1:.*]] = +! CHECK: %[[RES:.*]] = fir.do_loop %[[ARG0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG1:.*]] = ! CHECK-NOT: fir.store %[[ARG1]] to %[[DECL_I]]#1 ! CHECK: fir.store %[[ARG1]] to %[[DECL_PRIV_I]]#0 ! CHECK: hlfir.assign @@ -87,7 +87,7 @@ subroutine omploop2 ! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[ALLOC_PRIV_I]] ! CHECK: omp.sections { ! CHECK: omp.section { -! CHECK: %[[RES:.*]] = fir.do_loop %[[ARG0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG1:.*]] = +! CHECK: %[[RES:.*]] = fir.do_loop %[[ARG0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG1:.*]] = ! CHECK-NOT: fir.store %[[ARG1]] to %[[DECL_I]]#1 ! CHECK: fir.store %[[ARG1]] to %[[DECL_PRIV_I]]#0 ! CHECK: hlfir.assign @@ -115,6 +115,3 @@ subroutine omploop3 !$omp end sections !$omp end parallel end subroutine - - - diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 99654d6f1f45e..a3af7628c29f5 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -270,7 +270,7 @@ subroutine lastprivate_with_simd integer :: i real :: sum - + !CHECK: omp.simd private(@_QFlastprivate_with_simdEsum_private_f32 %[[VAR_SUM_DECLARE]]#0 -> %[[VAR_SUM_PINNED:.*]], @{{.*}}) { !CHECK: omp.loop_nest (%[[ARG:.*]]) : i32 = ({{.*}} to ({{.*}}) inclusive step ({{.*}}) { !CHECK: %[[VAR_SUM_PINNED_DECLARE:.*]]:2 = hlfir.declare %[[VAR_SUM_PINNED]] {{.*}} diff --git a/flang/test/Lower/OpenMP/single.f90 b/flang/test/Lower/OpenMP/single.f90 index 45a0318d2892a..69c45630d6bc7 100644 --- a/flang/test/Lower/OpenMP/single.f90 +++ b/flang/test/Lower/OpenMP/single.f90 @@ -74,7 +74,7 @@ end subroutine single_allocate !=============================================================================== ! CHECK-LABEL: func.func @_QPsingle_privatization( -! CHECK-SAME: %[[X:.*]]: !fir.ref {fir.bindc_name = "x"}, +! CHECK-SAME: %[[X:.*]]: !fir.ref {fir.bindc_name = "x"}, ! CHECK-SAME: %[[Y:.*]]: !fir.ref {fir.bindc_name = "y"}) { ! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFsingle_privatizationEx"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) ! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] dummy_scope %{{[0-9]+}} arg {{[0-9]+}} {uniq_name = "_QFsingle_privatizationEy"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 5f7c31bb931f6..a717b38a76593 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -22,6 +22,6 @@ subroutine finalize() !CHECK-52: omp.map.info var_ptr(%2 : !fir.ref>>, !fir.box>) map_clauses(from) capture(ByRef) members(%4 : [0] : !fir.llvm_ptr>) -> !fir.ref>> {name = "a"} !CHECK-51: from, release and delete map types are permitted deallocate(A) - + end subroutine finalize end module test diff --git a/flang/test/Lower/OpenMP/task.f90 b/flang/test/Lower/OpenMP/task.f90 index 67194fa5b19a3..75c7a3f95a683 100644 --- a/flang/test/Lower/OpenMP/task.f90 +++ b/flang/test/Lower/OpenMP/task.f90 @@ -212,7 +212,7 @@ subroutine task_firstprivate type mytype integer :: x end type mytype - + !CHECK: %[[INT_ALLOCA:.+]] = fir.alloca i32 {bindc_name = "int_var", uniq_name = "_QFtask_firstprivateEint_var"} !CHECK: %[[INT_VAR:.+]]:2 = hlfir.declare %[[INT_ALLOCA]] {uniq_name = "_QFtask_firstprivateEint_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[MYTYPE_ALLOCA:.+]] = fir.alloca !fir.type<_QFtask_firstprivateTmytype{x:i32}> {bindc_name = "mytype_var", uniq_name = "_QFtask_firstprivateEmytype_var"} diff --git a/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 b/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 index 3a63bb09c59de..cdc9182ef5210 100644 --- a/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 +++ b/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 @@ -24,7 +24,7 @@ ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_2]]#0 to %[[VAL_3]] : !fir.ref>> ! CHECK: omp.taskgroup task_reduction(byref @add_reduction_byref_box_Uxf32 %[[VAL_3]] -> %[[VAL_4:.*]]: !fir.ref>>) { -! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] ! CHECK-SAME: {uniq_name = "_QFtask_reductionEx"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! CHECK: omp.task in_reduction(byref @add_reduction_byref_box_Uxf32 %[[VAL_5]]#0 -> %[[VAL_6:.*]] : !fir.ref>>) { ! [...] diff --git a/flang/test/Lower/OpenMP/taskgroup-task_reduction01.f90 b/flang/test/Lower/OpenMP/taskgroup-task_reduction01.f90 index be4d3193e99f7..e9f222570471d 100644 --- a/flang/test/Lower/OpenMP/taskgroup-task_reduction01.f90 +++ b/flang/test/Lower/OpenMP/taskgroup-task_reduction01.f90 @@ -16,7 +16,7 @@ !CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_taskgroup_task_reductionEres"} !CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFomp_taskgroup_task_reductionEres"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: omp.taskgroup task_reduction(@[[RED_I32_NAME]] %[[VAL_1]]#0 -> %[[VAL_2:.*]] : !fir.ref) { -!CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] +!CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] !CHECK-SAME: {uniq_name = "_QFomp_taskgroup_task_reductionEres"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 diff --git a/flang/test/Lower/OpenMP/taskloop-cancel.f90 b/flang/test/Lower/OpenMP/taskloop-cancel.f90 index 2bc0f17428c36..710617793c3e7 100644 --- a/flang/test/Lower/OpenMP/taskloop-cancel.f90 +++ b/flang/test/Lower/OpenMP/taskloop-cancel.f90 @@ -1,7 +1,7 @@ ! RUN: bbc -emit-hlfir -fopenmp -o - %s -fopenmp-version=50 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s -fopenmp-version=50 2>&1 | FileCheck %s -! CHECK-LABEL: omp.private {type = private} +! CHECK-LABEL: omp.private {type = private} ! CHECK-SAME: @[[I_PRIVATE:.*]] : i32 ! CHECK-LABEL: func.func @_QPomp_taskloop() { diff --git a/flang/test/Lower/OpenMP/taskloop-grainsize.f90 b/flang/test/Lower/OpenMP/taskloop-grainsize.f90 index 43db8acdeceac..8aee5f69c849f 100644 --- a/flang/test/Lower/OpenMP/taskloop-grainsize.f90 +++ b/flang/test/Lower/OpenMP/taskloop-grainsize.f90 @@ -27,7 +27,7 @@ ! CHECK: %[[GRAINSIZE:.*]] = arith.constant 10 : i32 subroutine test_grainsize integer :: i, x - ! CHECK: omp.taskloop grainsize(%[[GRAINSIZE]]: i32) + ! CHECK: omp.taskloop grainsize(%[[GRAINSIZE]]: i32) ! CHECK-SAME: private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref, !fir.ref) { ! CHECK: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { !$omp taskloop grainsize(10) diff --git a/flang/test/Lower/OpenMP/taskloop-numtasks.f90 b/flang/test/Lower/OpenMP/taskloop-numtasks.f90 index f68f3a2d6ad26..e5b7a49748c51 100644 --- a/flang/test/Lower/OpenMP/taskloop-numtasks.f90 +++ b/flang/test/Lower/OpenMP/taskloop-numtasks.f90 @@ -27,7 +27,7 @@ ! CHECK: %[[VAL_NUMTASKS:.*]] = arith.constant 10 : i32 subroutine test_num_tasks integer :: i, x - ! CHECK: omp.taskloop num_tasks(%[[VAL_NUMTASKS]]: i32) + ! CHECK: omp.taskloop num_tasks(%[[VAL_NUMTASKS]]: i32) ! CHECK-SAME: private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref, !fir.ref) { ! CHECK: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { !$omp taskloop num_tasks(10) diff --git a/flang/test/Lower/OpenMP/taskloop.f90 b/flang/test/Lower/OpenMP/taskloop.f90 index d23eef2d4ac2d..bfe4fe7002811 100644 --- a/flang/test/Lower/OpenMP/taskloop.f90 +++ b/flang/test/Lower/OpenMP/taskloop.f90 @@ -32,19 +32,19 @@ ! CHECK-LABEL: omp.private ! CHECK-SAME: {type = private} @[[X_PRIVATE_TEST_ALLOCATE:.*]] : i32 -! CHECK-LABEL: omp.private +! CHECK-LABEL: omp.private ! CHECK-SAME: {type = private} @[[I_PRIVATE_TEST2:.*]] : i32 -! CHECK-LABEL: omp.private +! CHECK-LABEL: omp.private ! CHECK-SAME: {type = private} @[[RES_PRIVATE_TEST2:.*]] : i32 -! CHECK-LABEL: omp.private +! CHECK-LABEL: omp.private ! CHECK-SAME: {type = private} @[[I_PRIVATE:.*]] : i32 -! CHECK-LABEL: omp.private -! CHECK-SAME: {type = firstprivate} @[[RES_FIRSTPRIVATE:.*]] : i32 +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = firstprivate} @[[RES_FIRSTPRIVATE:.*]] : i32 ! CHECK-SAME: copy { -! CHECK: hlfir.assign +! CHECK: hlfir.assign ! CHECK-LABEL: func.func @_QPomp_taskloop ! CHECK: %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_taskloopEi"} diff --git a/flang/test/Lower/OpenMP/workdistribute-saxpy-two-2d.f90 b/flang/test/Lower/OpenMP/workdistribute-saxpy-two-2d.f90 index 4aeb2e89140cc..a9b3656bc52f3 100644 --- a/flang/test/Lower/OpenMP/workdistribute-saxpy-two-2d.f90 +++ b/flang/test/Lower/OpenMP/workdistribute-saxpy-two-2d.f90 @@ -21,7 +21,7 @@ subroutine target_teams_workdistribute(a, x, y, rows, cols) ! CHECK: fir.do_loop y = a * x + y - + ! CHECK: omp.target ! CHECK: omp.teams ! CHECK: omp.parallel @@ -29,7 +29,7 @@ subroutine target_teams_workdistribute(a, x, y, rows, cols) ! CHECK: omp.wsloop ! CHECK: omp.loop_nest ! CHECK: fir.do_loop - + y = a * y + x !$omp end target teams workdistribute @@ -54,14 +54,14 @@ subroutine teams_workdistribute(a, x, y, rows, cols) ! CHECK: fir.do_loop y = a * x + y - + ! CHECK: omp.teams ! CHECK: omp.parallel ! CHECK: omp.distribute ! CHECK: omp.wsloop ! CHECK: omp.loop_nest ! CHECK: fir.do_loop - + y = a * y + x !$omp end teams workdistribute diff --git a/flang/test/Lower/OpenMP/workdistribute-scalar-assign.f90 b/flang/test/Lower/OpenMP/workdistribute-scalar-assign.f90 index 3062b3598b8ae..e0f773380d10a 100644 --- a/flang/test/Lower/OpenMP/workdistribute-scalar-assign.f90 +++ b/flang/test/Lower/OpenMP/workdistribute-scalar-assign.f90 @@ -11,7 +11,7 @@ subroutine target_teams_workdistribute_scalar_assign() ! CHECK: omp.distribute ! CHECK: omp.wsloop ! CHECK: omp.loop_nest - + !$omp target teams workdistribute aa = 20 !$omp end target teams workdistribute diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90 index f3f11d8c4a6c2..68e2a386ef1f3 100644 --- a/flang/test/Lower/OpenMP/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90 @@ -50,7 +50,7 @@ program wsloop ! CHECK: omp.yield ! CHECK: } ! CHECK: } - + end do !$OMP END DO NOWAIT chunk = 6 diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp index 870a0511b7926..4cc39f928ee1e 100644 --- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp @@ -82,7 +82,7 @@ ProcessSP ProcessWindows::CreateInstance(lldb::TargetSP target_sp, const FileSpec *crash_file_path, bool can_connect) { if (crash_file_path) - return nullptr; // Cannot create a Windows process from a crash_file + return nullptr; // Cannot create a Windows process from a crash_file. return ProcessSP(new ProcessWindows(target_sp, listener_sp)); } diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 83e358677467b..1ba99d78aea32 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -211,7 +211,7 @@ lldb::ProcessSP ProcessGDBRemote::CreateInstance( lldb::TargetSP target_sp, ListenerSP listener_sp, const FileSpec *crash_file_path, bool can_connect) { if (crash_file_path) - return nullptr; // Cannot create a GDBRemote process from a crash_file + return nullptr; // Cannot create a GDBRemote process from a crash_file. return lldb::ProcessSP(new ProcessGDBRemote(target_sp, listener_sp)); } diff --git a/lldb/test/API/python_api/sbframe_extensions/Makefile b/lldb/test/API/python_api/sbframe_extensions/Makefile deleted file mode 100644 index 10495940055b6..0000000000000 --- a/lldb/test/API/python_api/sbframe_extensions/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := main.c - -include Makefile.rules diff --git a/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py b/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py deleted file mode 100644 index d3eabfdd979c5..0000000000000 --- a/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py +++ /dev/null @@ -1,534 +0,0 @@ -""" -Test SBFrameExtensions API. -""" - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class TestSBFrameExtensions(TestBase): - NO_DEBUG_INFO_TESTCASE = True - - def setUp(self): - TestBase.setUp(self) - self.source = "main.c" - - def _get_frame(self): - """Helper method to get a valid frame for testing.""" - self.build() - target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( - self, "Set breakpoint here", lldb.SBFileSpec(self.source) - ) - frame = thread.GetFrameAtIndex(0) - self.assertTrue(frame.IsValid(), "Frame should be valid") - return frame, thread - - def test_property_pc(self): - """Test SBFrame extension property: pc""" - frame, _ = self._get_frame() - - pc = frame.pc - self.assertIsInstance(pc, int, "pc should be an integer") - self.assertGreater(pc, 0, "pc should be greater than 0") - self.assertEqual(pc, frame.GetPC(), "pc property should match GetPC()") - - def test_property_addr(self): - """Test SBFrame extension property: addr""" - frame, _ = self._get_frame() - - addr = frame.addr - self.assertTrue(addr.IsValid(), "addr should be valid") - self.assertEqual(addr, frame.GetPCAddress(), "addr should match GetPCAddress()") - - def test_property_fp(self): - """Test SBFrame extension property: fp""" - frame, _ = self._get_frame() - - fp = frame.fp - self.assertIsInstance(fp, int, "fp should be an integer") - self.assertEqual(fp, frame.GetFP(), "fp property should match GetFP()") - - def test_property_sp(self): - """Test SBFrame extension property: sp""" - frame, _ = self._get_frame() - - sp = frame.sp - self.assertIsInstance(sp, int, "sp should be an integer") - self.assertEqual(sp, frame.GetSP(), "sp property should match GetSP()") - - def test_property_module(self): - """Test SBFrame extension property: module""" - frame, _ = self._get_frame() - - module = frame.module - self.assertTrue(module.IsValid(), "module should be valid") - self.assertEqual(module, frame.GetModule(), "module should match GetModule()") - - def test_property_compile_unit(self): - """Test SBFrame extension property: compile_unit""" - frame, _ = self._get_frame() - - compile_unit = frame.compile_unit - self.assertTrue(compile_unit.IsValid(), "compile_unit should be valid") - self.assertEqual( - compile_unit, - frame.GetCompileUnit(), - "compile_unit should match GetCompileUnit()", - ) - - def test_property_function(self): - """Test SBFrame extension property: function""" - frame, _ = self._get_frame() - - function = frame.function - self.assertTrue(function.IsValid(), "function should be valid") - self.assertEqual( - function, frame.GetFunction(), "function should match GetFunction()" - ) - - def test_property_symbol(self): - """Test SBFrame extension property: symbol""" - frame, _ = self._get_frame() - - symbol = frame.symbol - self.assertTrue(symbol.IsValid(), "symbol should be valid") - self.assertEqual(symbol, frame.GetSymbol(), "symbol should match GetSymbol()") - - def test_property_block(self): - """Test SBFrame extension property: block""" - frame, _ = self._get_frame() - - block = frame.block - self.assertTrue(block.IsValid(), "block should be valid") - block_direct = frame.GetBlock() - self.assertTrue(block_direct.IsValid(), "GetBlock() should return valid block") - # Verify both blocks are valid and have the same ranges - # by comparing their first range start address. - block_ranges = block.GetRanges() - block_direct_ranges = block_direct.GetRanges() - if block_ranges.GetSize() > 0 and block_direct_ranges.GetSize() > 0: - self.assertEqual( - block.GetRangeStartAddress(0), - block_direct.GetRangeStartAddress(0), - "block should match GetBlock() start address", - ) - - def test_property_is_inlined(self): - """Test SBFrame extension property: is_inlined""" - frame, _ = self._get_frame() - - is_inlined = frame.is_inlined - self.assertIsInstance(is_inlined, bool, "is_inlined should be a boolean") - self.assertEqual( - is_inlined, frame.IsInlined(), "is_inlined should match IsInlined()" - ) - - def test_property_name(self): - """Test SBFrame extension property: name""" - frame, _ = self._get_frame() - - name = frame.name - self.assertIsInstance(name, str, "name should be a string") - self.assertEqual( - name, frame.GetFunctionName(), "name should match GetFunctionName()" - ) - # Should be one of our functions. - self.assertIn( - name, ["func1", "func2", "main"], "name should be a known function" - ) - - def test_property_line_entry(self): - """Test SBFrame extension property: line_entry""" - frame, _ = self._get_frame() - - line_entry = frame.line_entry - self.assertTrue(line_entry.IsValid(), "line_entry should be valid") - self.assertEqual( - line_entry, frame.GetLineEntry(), "line_entry should match GetLineEntry()" - ) - - def test_property_thread(self): - """Test SBFrame extension property: thread""" - frame, thread = self._get_frame() - - thread_prop = frame.thread - self.assertTrue(thread_prop.IsValid(), "thread should be valid") - self.assertEqual( - thread_prop, frame.GetThread(), "thread should match GetThread()" - ) - self.assertEqual( - thread_prop.GetThreadID(), - thread.GetThreadID(), - "thread should be the same thread", - ) - - def test_property_disassembly(self): - """Test SBFrame extension property: disassembly""" - frame, _ = self._get_frame() - - disassembly = frame.disassembly - self.assertIsInstance(disassembly, str, "disassembly should be a string") - self.assertGreater(len(disassembly), 0, "disassembly should not be empty") - self.assertEqual( - disassembly, frame.Disassemble(), "disassembly should match Disassemble()" - ) - - def test_property_idx(self): - """Test SBFrame extension property: idx""" - frame, _ = self._get_frame() - - idx = frame.idx - self.assertIsInstance(idx, int, "idx should be an integer") - self.assertEqual(idx, frame.GetFrameID(), "idx should match GetFrameID()") - self.assertEqual(idx, 0, "First frame should have idx 0") - - def test_property_variables(self): - """Test SBFrame extension property: variables""" - frame, _ = self._get_frame() - - variables = frame.variables - self.assertIsInstance( - variables, lldb.SBValueList, "variables should be SBValueList" - ) - all_vars = frame.GetVariables(True, True, True, True) - self.assertEqual( - variables.GetSize(), - all_vars.GetSize(), - "variables should match GetVariables(True, True, True, True)", - ) - - def test_property_vars(self): - """Test SBFrame extension property: vars (alias for variables)""" - frame, _ = self._get_frame() - - vars_prop = frame.vars - self.assertIsInstance(vars_prop, lldb.SBValueList, "vars should be SBValueList") - variables = frame.variables - self.assertEqual( - vars_prop.GetSize(), - variables.GetSize(), - "vars should match variables", - ) - - def test_property_locals(self): - """Test SBFrame extension property: locals""" - frame, _ = self._get_frame() - - locals_prop = frame.locals - self.assertIsInstance( - locals_prop, lldb.SBValueList, "locals should be SBValueList" - ) - locals_direct = frame.GetVariables(False, True, False, False) - self.assertEqual( - locals_prop.GetSize(), - locals_direct.GetSize(), - "locals should match GetVariables(False, True, False, False)", - ) - - def test_property_args(self): - """Test SBFrame extension property: args""" - frame, _ = self._get_frame() - - args_prop = frame.args - self.assertIsInstance(args_prop, lldb.SBValueList, "args should be SBValueList") - args_direct = frame.GetVariables(True, False, False, False) - self.assertEqual( - args_prop.GetSize(), - args_direct.GetSize(), - "args should match GetVariables(True, False, False, False)", - ) - - def test_property_arguments(self): - """Test SBFrame extension property: arguments (alias for args)""" - frame, _ = self._get_frame() - - arguments_prop = frame.arguments - self.assertIsInstance( - arguments_prop, lldb.SBValueList, "arguments should be SBValueList" - ) - args_prop = frame.args - self.assertEqual( - arguments_prop.GetSize(), - args_prop.GetSize(), - "arguments should match args", - ) - - def test_property_statics(self): - """Test SBFrame extension property: statics""" - frame, _ = self._get_frame() - - statics_prop = frame.statics - self.assertIsInstance( - statics_prop, lldb.SBValueList, "statics should be SBValueList" - ) - statics_direct = frame.GetVariables(False, False, True, False) - self.assertEqual( - statics_prop.GetSize(), - statics_direct.GetSize(), - "statics should match GetVariables(False, False, True, False)", - ) - - def test_property_registers(self): - """Test SBFrame extension property: registers""" - frame, _ = self._get_frame() - - registers = frame.registers - # registers returns an SBValueList that can be iterated. - self.assertTrue(hasattr(registers, "__iter__"), "registers should be iterable") - registers_direct = frame.GetRegisters() - # Compare by iterating and counting. - registers_count = sum(1 for _ in registers) - registers_direct_count = sum(1 for _ in registers_direct) - self.assertEqual( - registers_count, - registers_direct_count, - "registers should match GetRegisters()", - ) - - def test_property_regs(self): - """Test SBFrame extension property: regs (alias for registers)""" - frame, _ = self._get_frame() - - regs = frame.regs - self.assertTrue(hasattr(regs, "__iter__"), "regs should be iterable") - registers = frame.registers - regs_count = sum(1 for _ in regs) - registers_count = sum(1 for _ in registers) - self.assertEqual(regs_count, registers_count, "regs should match registers") - - def test_property_register(self): - """Test SBFrame extension property: register (flattened view)""" - frame, _ = self._get_frame() - - register = frame.register - self.assertIsNotNone(register, "register should not be None") - # register is a helper object with __iter__ and __getitem__. - reg_names = set() - for reg in register: - self.assertTrue(reg.IsValid(), "Register should be valid") - reg_names.add(reg.name) - - # Test register indexing by name. - if len(reg_names) > 0: - first_reg_name = list(reg_names)[0] - reg_by_name = register[first_reg_name] - self.assertTrue(reg_by_name.IsValid(), "Register by name should be valid") - self.assertEqual( - reg_by_name.name, first_reg_name, "Register name should match" - ) - - def test_property_reg(self): - """Test SBFrame extension property: reg (alias for register)""" - frame, _ = self._get_frame() - - reg = frame.reg - self.assertIsNotNone(reg, "reg should not be None") - register = frame.register - reg_names = set() - for r in reg: - reg_names.add(r.name) - reg_names2 = set() - for r in register: - reg_names2.add(r.name) - self.assertEqual(reg_names, reg_names2, "reg should match register") - - def test_property_parent(self): - """Test SBFrame extension property: parent""" - frame0, thread = self._get_frame() - - # If there's a parent frame (frame 1), test parent property. - if thread.GetNumFrames() > 1: - frame1 = thread.GetFrameAtIndex(1) - parent = frame0.parent - self.assertTrue(parent.IsValid(), "parent should be valid") - self.assertEqual( - parent.GetFrameID(), - frame1.GetFrameID(), - "parent should be the next frame", - ) - self.assertEqual( - parent.pc, frame1.GetPC(), "parent PC should match frame 1" - ) - - def test_property_child(self): - """Test SBFrame extension property: child""" - frame0, thread = self._get_frame() - - # Test child property (should be frame -1, which doesn't exist, so should return invalid). - child = frame0.child - # Child of frame 0 would be frame -1, which doesn't exist. - # So it should return an invalid frame. - if thread.GetNumFrames() == 1: - self.assertFalse(child.IsValid(), "child of only frame should be invalid") - - def test_method_get_all_variables(self): - """Test SBFrame extension method: get_all_variables()""" - frame, _ = self._get_frame() - - all_vars = frame.get_all_variables() - self.assertIsInstance( - all_vars, lldb.SBValueList, "get_all_variables should return SBValueList" - ) - all_vars_direct = frame.GetVariables(True, True, True, True) - self.assertEqual( - all_vars.GetSize(), - all_vars_direct.GetSize(), - "get_all_variables should match GetVariables(True, True, True, True)", - ) - - def test_method_get_arguments(self): - """Test SBFrame extension method: get_arguments()""" - frame, _ = self._get_frame() - - args = frame.get_arguments() - self.assertIsInstance( - args, lldb.SBValueList, "get_arguments should return SBValueList" - ) - args_direct = frame.GetVariables(True, False, False, False) - self.assertEqual( - args.GetSize(), - args_direct.GetSize(), - "get_arguments should match GetVariables(True, False, False, False)", - ) - - def test_method_get_locals(self): - """Test SBFrame extension method: get_locals()""" - frame, _ = self._get_frame() - - locals = frame.get_locals() - self.assertIsInstance( - locals, lldb.SBValueList, "get_locals should return SBValueList" - ) - locals_direct = frame.GetVariables(False, True, False, False) - self.assertEqual( - locals.GetSize(), - locals_direct.GetSize(), - "get_locals should match GetVariables(False, True, False, False)", - ) - - def test_method_get_statics(self): - """Test SBFrame extension method: get_statics()""" - frame, _ = self._get_frame() - - statics = frame.get_statics() - self.assertIsInstance( - statics, lldb.SBValueList, "get_statics should return SBValueList" - ) - statics_direct = frame.GetVariables(False, False, True, False) - self.assertEqual( - statics.GetSize(), - statics_direct.GetSize(), - "get_statics should match GetVariables(False, False, True, False)", - ) - - def test_method_var(self): - """Test SBFrame extension method: var()""" - frame, _ = self._get_frame() - - # Test var() method with a variable that should exist. - # First, let's see what variables are available. - all_vars = frame.GetVariables(True, True, True, True) - if all_vars.GetSize() > 0: - var_name = all_vars.GetValueAtIndex(0).GetName() - var_value = frame.var(var_name) - self.assertTrue(var_value.IsValid(), f"var('{var_name}') should be valid") - self.assertEqual( - var_value.GetName(), - var_name, - f"var('{var_name}') should return the correct variable", - ) - # Compare with GetValueForVariablePath. - var_direct = frame.GetValueForVariablePath(var_name) - self.assertEqual( - var_value.GetName(), - var_direct.GetName(), - "var() should match GetValueForVariablePath()", - ) - - # Test var() with non-existent variable. - invalid_var = frame.var("NonExistentVariable12345") - self.assertFalse( - invalid_var.IsValid(), "var() with non-existent variable should be invalid" - ) - - def test_method_get_parent_frame(self): - """Test SBFrame extension method: get_parent_frame()""" - frame0, thread = self._get_frame() - - # Test get_parent_frame. - if thread.GetNumFrames() > 1: - parent = frame0.get_parent_frame() - self.assertTrue( - parent.IsValid(), "get_parent_frame should return valid frame" - ) - frame1 = thread.GetFrameAtIndex(1) - self.assertEqual( - parent.GetFrameID(), - frame1.GetFrameID(), - "get_parent_frame should return frame 1", - ) - else: - # If there's only one frame, parent should be invalid. - parent = frame0.get_parent_frame() - # Note: get_parent_frame might return an invalid frame if idx+1 is out of bounds. - - def test_method_get_child_frame(self): - """Test SBFrame extension method: get_child_frame()""" - frame0, thread = self._get_frame() - - # Test get_child_frame (frame -1 doesn't exist, so should be invalid). - child = frame0.get_child_frame() - if thread.GetNumFrames() == 1: - self.assertFalse( - child.IsValid(), "get_child_frame of only frame should be invalid" - ) - - def test_special_method_int(self): - """Test SBFrame extension special method: __int__""" - frame0, _ = self._get_frame() - - # Test __int__ (converts frame to its frame ID). - frame_id = int(frame0) - self.assertIsInstance(frame_id, int, "__int__ should return an integer") - self.assertEqual( - frame_id, frame0.GetFrameID(), "__int__ should return frame ID" - ) - - def test_special_method_hex(self): - """Test SBFrame extension special method: __hex__""" - frame0, _ = self._get_frame() - - # Test __hex__ (converts frame to its PC). - # Note: __hex__ returns the PC as an integer, not a hex string. - # In Python 3, hex() builtin calls __index__ if __hex__ doesn't exist, - # but since __hex__ is defined, it will be called. - pc_hex = frame0.__hex__() - self.assertIsInstance(pc_hex, int, "__hex__ should return an integer (PC)") - self.assertEqual(pc_hex, frame0.GetPC(), "__hex__ should return PC") - - def test_special_method_eq(self): - """Test SBFrame extension special method: __eq__ and __ne__""" - frame0, thread = self._get_frame() - - # Test __eq__ and __ne__. - frame0_copy = thread.GetFrameAtIndex(0) - self.assertTrue(frame0 == frame0_copy, "Same frame should be equal") - self.assertFalse(frame0 != frame0_copy, "Same frame should not be not-equal") - - if thread.GetNumFrames() > 1: - frame1 = thread.GetFrameAtIndex(1) - self.assertFalse(frame0 == frame1, "Different frames should not be equal") - self.assertTrue(frame0 != frame1, "Different frames should be not-equal") - - def test_pc_property_settable(self): - """Test that pc property is settable""" - frame, _ = self._get_frame() - - original_pc = frame.GetPC() - # Test that we can set pc (though this might not work on all platforms). - # We'll just verify the property exists and can be read. - pc = frame.pc - self.assertIsInstance(pc, int, "pc should be readable") - # Note: Setting pc might not be supported on all platforms, so we just test reading. diff --git a/lldb/test/API/python_api/sbframe_extensions/main.c b/lldb/test/API/python_api/sbframe_extensions/main.c deleted file mode 100644 index 8e2d3ed8e5a5f..0000000000000 --- a/lldb/test/API/python_api/sbframe_extensions/main.c +++ /dev/null @@ -1,33 +0,0 @@ -#include - -// Global and static variables for testing -int g_global_var = 42; -static int g_static_var = 100; - -// Function declarations -int func1(int arg1, char arg2); -int func2(int arg1, int arg2); - -int func1(int arg1, char arg2) { - static int static_var = 200; - int local1 = arg1 * 2; - char local2 = arg2; - // Set breakpoint here - return local1 + local2 + static_var; -} - -int func2(int arg1, int arg2) { - int local1 = arg1 + arg2; - int local2 = arg1 * arg2; - // Set breakpoint here - return func1(local1, 'X'); -} - -int main(int argc, char const *argv[]) { - int main_local = 10; - static int main_static = 50; - // Set breakpoint here - int result = func2(5, 7); - printf("Result: %d\n", result); - return 0; -} diff --git a/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp index 24c0ca2111f40..f0996eb3ff0f4 100644 --- a/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp @@ -87,8 +87,10 @@ Error AttachRequestHandler::Run(const AttachRequestArguments &args) const { // Use the unique target ID to get the target. target = dap.debugger.FindTargetByGloballyUniqueID(*target_id); if (!target.IsValid()) { - error.SetErrorStringWithFormat("invalid target_id %lu in attach config", - *target_id); + error.SetErrorString( + llvm::formatv("invalid target_id {0} in attach config", *target_id) + .str() + .c_str()); } } else { target = dap.CreateTarget(error); diff --git a/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h index 28a2ec47f81ad..2d31c8aa6301b 100644 --- a/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h +++ b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h @@ -22,7 +22,12 @@ class LLVM_ABI RuntimeLibraryAnalysis RuntimeLibraryAnalysis() = default; RuntimeLibraryAnalysis(RTLIB::RuntimeLibcallsInfo &&BaselineInfoImpl) : LibcallsInfo(std::move(BaselineInfoImpl)) {} - explicit RuntimeLibraryAnalysis(const Triple &T) : LibcallsInfo(T) {} + RuntimeLibraryAnalysis( + const Triple &TT, + ExceptionHandling ExceptionModel = ExceptionHandling::None, + FloatABI::ABIType FloatABI = FloatABI::Default, + EABI EABIVersion = EABI::Default, StringRef ABIName = "", + VectorLibrary VecLib = VectorLibrary::NoLibrary); LLVM_ABI RTLIB::RuntimeLibcallsInfo run(const Module &M, ModuleAnalysisManager &); @@ -41,12 +46,19 @@ class LLVM_ABI RuntimeLibraryInfoWrapper : public ImmutablePass { public: static char ID; RuntimeLibraryInfoWrapper(); - explicit RuntimeLibraryInfoWrapper(const Triple &T); - explicit RuntimeLibraryInfoWrapper(const RTLIB::RuntimeLibcallsInfo &RTLCI); + RuntimeLibraryInfoWrapper( + const Triple &TT, + ExceptionHandling ExceptionModel = ExceptionHandling::None, + FloatABI::ABIType FloatABI = FloatABI::Default, + EABI EABIVersion = EABI::Default, StringRef ABIName = "", + VectorLibrary VecLib = VectorLibrary::NoLibrary); const RTLIB::RuntimeLibcallsInfo &getRTLCI(const Module &M) { - ModuleAnalysisManager DummyMAM; - RTLCI = RTLA.run(M, DummyMAM); + if (!RTLCI) { + ModuleAnalysisManager DummyMAM; + RTLCI = RTLA.run(M, DummyMAM); + } + return *RTLCI; } diff --git a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h index 8624fd2403a12..3e0137710e8eb 100644 --- a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h @@ -9,12 +9,16 @@ #ifndef LLVM_CODEGEN_LIBCALLLOWERINGINFO_H #define LLVM_CODEGEN_LIBCALLLOWERINGINFO_H +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/RuntimeLibcalls.h" +#include "llvm/Pass.h" namespace llvm { class TargetSubtargetInfo; +class TargetMachine; +/// Tracks which library functions to use for a particular subtarget. class LibcallLoweringInfo { private: const RTLIB::RuntimeLibcallsInfo &RTLCI; @@ -73,6 +77,70 @@ class LibcallLoweringInfo { } }; +/// Record a mapping from subtarget to LibcallLoweringInfo. +class LibcallLoweringModuleAnalysisResult { +private: + using LibcallLoweringMap = + DenseMap; + mutable LibcallLoweringMap LoweringMap; + const RTLIB::RuntimeLibcallsInfo *RTLCI = nullptr; + +public: + LibcallLoweringModuleAnalysisResult() = default; + LibcallLoweringModuleAnalysisResult(RTLIB::RuntimeLibcallsInfo &RTLCI) + : RTLCI(&RTLCI) {} + + void init(const RTLIB::RuntimeLibcallsInfo *RT) { RTLCI = RT; } + + void clear() { + RTLCI = nullptr; + LoweringMap.clear(); + } + + LLVM_ABI bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &); + + const LibcallLoweringInfo & + getLibcallLowering(const TargetSubtargetInfo &Subtarget) const { + return LoweringMap.try_emplace(&Subtarget, *RTLCI, Subtarget).first->second; + } +}; + +class LibcallLoweringModuleAnalysis + : public AnalysisInfoMixin { +private: + friend AnalysisInfoMixin; + static AnalysisKey Key; + + LibcallLoweringModuleAnalysisResult LibcallLoweringMap; + +public: + using Result = LibcallLoweringModuleAnalysisResult; + + LLVM_ABI Result run(Module &M, ModuleAnalysisManager &); +}; + +class LLVM_ABI LibcallLoweringInfoWrapper : public ImmutablePass { + LibcallLoweringModuleAnalysisResult Result; + +public: + static char ID; + LibcallLoweringInfoWrapper(); + + const LibcallLoweringInfo & + getLibcallLowering(const TargetSubtargetInfo &Subtarget) const { + return Result.getLibcallLowering(Subtarget); + } + + const LibcallLoweringModuleAnalysisResult &getResult() const { + return Result; + } + + bool doInitialization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; +}; + } // end namespace llvm #endif // LLVM_CODEGEN_LIBCALLLOWERINGINFO_H diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td index 20ba51ade35a7..2c945d2399b25 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagon.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td @@ -14,7 +14,7 @@ // // All Hexagon intrinsics start with "llvm.hexagon.". let TargetPrefix = "hexagon" in { - /// Hexagon_Intrinsic - Base class for the majority of Hexagon intrinsics. + /// Hexagon_Intrinsic - Base class for majority of Hexagon intrinsics. class Hexagon_Intrinsic ret_types, list param_types, list properties> @@ -435,6 +435,84 @@ def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B; def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B; def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B; +// Carryo +// The script can't autogenerate clang builtins for vaddcarryo/vsubarryo, +// and they are marked in HexagonIset.py as not having intrinsics at all. +// The script could generate intrinsics, but instead of doing intrinsics +// without builtins, just put the intrinsics here. + +// tag : V6_vaddcarryo +class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic< + list intr_properties = [IntrNoMem]> + : Hexagon_NonGCC_Intrinsic< + [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty], + intr_properties>; + +// tag : V6_vaddcarryo +class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B< + list intr_properties = [IntrNoMem]> + : Hexagon_NonGCC_Intrinsic< + [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty], + intr_properties>; + +// Pseudo intrinsics for widening vector isntructions that +// get replaced with the real Hexagon instructions during +// instruction lowering. +class Hexagon_widenvec_Intrinsic + : Hexagon_NonGCC_Intrinsic< + [llvm_anyvector_ty], + [LLVMTruncatedType<0>, LLVMTruncatedType<0>], + [IntrNoMem]>; + +class Hexagon_non_widenvec_Intrinsic + : Hexagon_NonGCC_Intrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + +// Widening vector add +def int_hexagon_vadd_su: Hexagon_widenvec_Intrinsic; +def int_hexagon_vadd_uu: Hexagon_widenvec_Intrinsic; +def int_hexagon_vadd_ss: Hexagon_widenvec_Intrinsic; +def int_hexagon_vadd_us: Hexagon_widenvec_Intrinsic; + + +// Widening vector subtract +def int_hexagon_vsub_su: Hexagon_widenvec_Intrinsic; +def int_hexagon_vsub_uu: Hexagon_widenvec_Intrinsic; +def int_hexagon_vsub_ss: Hexagon_widenvec_Intrinsic; +def int_hexagon_vsub_us: Hexagon_widenvec_Intrinsic; + +// Widening vector multiply +def int_hexagon_vmpy_su: Hexagon_widenvec_Intrinsic; +def int_hexagon_vmpy_uu: Hexagon_widenvec_Intrinsic; +def int_hexagon_vmpy_ss: Hexagon_widenvec_Intrinsic; +def int_hexagon_vmpy_us: Hexagon_widenvec_Intrinsic; + +def int_hexagon_vavgu: Hexagon_non_widenvec_Intrinsic; +def int_hexagon_vavgs: Hexagon_non_widenvec_Intrinsic; + +class Hexagon_vasr_Intrinsic + : Hexagon_NonGCC_Intrinsic< + [LLVMSubdivide2VectorType<0>], + [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem]>; + +def int_hexagon_vasrsat_su: Hexagon_vasr_Intrinsic; +def int_hexagon_vasrsat_uu: Hexagon_vasr_Intrinsic; +def int_hexagon_vasrsat_ss: Hexagon_vasr_Intrinsic; + +class Hexagon_widen_vec_scalar_Intrinsic + : Hexagon_NonGCC_Intrinsic< + [llvm_anyvector_ty], + [LLVMTruncatedType<0>, llvm_i32_ty], + [IntrNoMem]>; + +// Widening vector scalar multiply +def int_hexagon_vmpy_ub_b: Hexagon_widen_vec_scalar_Intrinsic; +def int_hexagon_vmpy_ub_ub: Hexagon_widen_vec_scalar_Intrinsic; +def int_hexagon_vmpy_uh_uh: Hexagon_widen_vec_scalar_Intrinsic; +def int_hexagon_vmpy_h_h: Hexagon_widen_vec_scalar_Intrinsic; // Intrinsic for instrumentation based profiling using a custom handler. The // name of the handler is passed as the first operand to the intrinsic. The diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td index dde4132791f06..2a673603e4e03 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td @@ -491,20 +491,6 @@ class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B< [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty], intr_properties>; -// tag : V6_vaddcarryo -class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty], - intr_properties>; - -// tag : V6_vaddcarryo -class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty], - intr_properties>; - // tag : V6_vaddcarrysat class Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic intr_properties = [IntrNoMem]> diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 3907e864bed1e..446113c4670dd 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -2139,3 +2139,15 @@ let TargetPrefix = "ppc" in { Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrWriteMem, NoCapture>]>; } + +// AMO intrisics +let TargetPrefix = "ppc" in { + def int_ppc_amo_lwat : ClangBuiltin<"__builtin_amo_lwat">, + DefaultAttrsIntrinsic<[llvm_i32_ty],[llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], + [IntrArgMemOnly, ImmArg>]>; + def int_ppc_amo_ldat : ClangBuiltin<"__builtin_amo_ldat">, + DefaultAttrsIntrinsic<[llvm_i64_ty],[llvm_ptr_ty, + llvm_i64_ty, llvm_i32_ty], + [IntrArgMemOnly, ImmArg>]>; +} diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 10a4d8525a9e8..c718e29b99ff4 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -133,6 +133,7 @@ LLVM_ABI void initializeGlobalMergeFuncPassWrapperPass(PassRegistry &); LLVM_ABI void initializeGlobalMergePass(PassRegistry &); LLVM_ABI void initializeGlobalsAAWrapperPassPass(PassRegistry &); LLVM_ABI void initializeHardwareLoopsLegacyPass(PassRegistry &); +LLVM_ABI void initializeLibcallLoweringInfoWrapperPass(PassRegistry &); LLVM_ABI void initializeMIRProfileLoaderPassPass(PassRegistry &); LLVM_ABI void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &); LLVM_ABI void initializeIRTranslatorPass(PassRegistry &); diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index ac28e45891df2..fc722378b586a 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -328,6 +328,7 @@ class MCDecodedPseudoProbeInlineTree // Return false if it's a dummy inline site bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } + bool isTopLevelFunc() const { return !isRoot() && Parent->isRoot(); } InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); } void setProbes(MutableArrayRef ProbesRef) { Probes = ProbesRef.data(); diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index d63714780afef..594065206ebd8 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -1214,13 +1214,19 @@ class FunctionSamples { // Note the sequence of the suffixes in the knownSuffixes array matters. // If suffix "A" is appended after the suffix "B", "A" should be in front // of "B" in knownSuffixes. - const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; + const SmallVector KnownSuffixes{LLVMSuffix, PartSuffix, + UniqSuffix}; + return getCanonicalFnName(FnName, KnownSuffixes, Attr); + } + + static StringRef getCanonicalFnName(StringRef FnName, + ArrayRef Suffixes, + StringRef Attr = "selected") { if (Attr == "" || Attr == "all") return FnName.split('.').first; if (Attr == "selected") { StringRef Cand(FnName); - for (const auto &Suf : KnownSuffixes) { - StringRef Suffix(Suf); + for (const auto Suffix : Suffixes) { // If the profile contains ".__uniq." suffix, don't strip the // suffix for names in the IR. if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) @@ -1229,7 +1235,7 @@ class FunctionSamples { if (It == StringRef::npos) continue; auto Dit = Cand.rfind('.'); - if (Dit == It + Suffix.size() - 1) + if (Dit == It || Dit == It + Suffix.size() - 1) Cand = Cand.substr(0, It); } return Cand; diff --git a/llvm/include/llvm/Support/Hash.h b/llvm/include/llvm/Support/Hash.h new file mode 100644 index 0000000000000..bf98f0dcef836 --- /dev/null +++ b/llvm/include/llvm/Support/Hash.h @@ -0,0 +1,36 @@ +//===- llvm/Support/Hash.h - Hash functions --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides hash functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HASH_H +#define LLVM_SUPPORT_HASH_H + +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { + +enum class KCFIHashAlgorithm { xxHash64, FNV1a }; + +/// Parse a KCFI hash algorithm name. +/// Returns xxHash64 if the name is not recognized. +KCFIHashAlgorithm parseKCFIHashAlgorithm(StringRef Name); + +/// Convert a KCFI hash algorithm enum to its string representation. +StringRef stringifyKCFIHashAlgorithm(KCFIHashAlgorithm Algorithm); + +/// Compute KCFI type ID from mangled type name. +/// The algorithm can be xxHash64 or FNV-1a. +uint32_t getKCFITypeID(StringRef MangledTypeName, KCFIHashAlgorithm Algorithm); + +} // end namespace llvm + +#endif // LLVM_SUPPORT_HASH_H diff --git a/llvm/lib/Analysis/RuntimeLibcallInfo.cpp b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp index 9ea789a4ee45a..1c5a1cc75b7bd 100644 --- a/llvm/lib/Analysis/RuntimeLibcallInfo.cpp +++ b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp @@ -13,6 +13,15 @@ using namespace llvm; AnalysisKey RuntimeLibraryAnalysis::Key; +RuntimeLibraryAnalysis::RuntimeLibraryAnalysis(const Triple &TT, + ExceptionHandling ExceptionModel, + FloatABI::ABIType FloatABI, + EABI EABIVersion, + StringRef ABIName, + VectorLibrary VecLib) + : LibcallsInfo(std::in_place, TT, ExceptionModel, FloatABI, EABIVersion, + ABIName, VecLib) {} + RTLIB::RuntimeLibcallsInfo RuntimeLibraryAnalysis::run(const Module &M, ModuleAnalysisManager &) { if (!LibcallsInfo) @@ -26,6 +35,13 @@ INITIALIZE_PASS(RuntimeLibraryInfoWrapper, "runtime-library-info", RuntimeLibraryInfoWrapper::RuntimeLibraryInfoWrapper() : ImmutablePass(ID), RTLA(RTLIB::RuntimeLibcallsInfo(Triple())) {} +RuntimeLibraryInfoWrapper::RuntimeLibraryInfoWrapper( + const Triple &TT, ExceptionHandling ExceptionModel, + FloatABI::ABIType FloatABI, EABI EABIVersion, StringRef ABIName, + VectorLibrary VecLib) + : ImmutablePass(ID), RTLCI(std::in_place, TT, ExceptionModel, FloatABI, + EABIVersion, ABIName, VecLib) {} + char RuntimeLibraryInfoWrapper::ID = 0; ModulePass *llvm::createRuntimeLibraryInfoWrapperPass() { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 0ac0ca7463131..1d7a8b981b5ee 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3490,11 +3490,9 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } /// Get a canonical UDivExpr for a recurrence. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. - // We can currently only fold X%N if X is constant. - const SCEVConstant *StartC = dyn_cast(AR->getStart()); - if (StartC && !DivInt.urem(StepInt)) { - const APInt &StartInt = StartC->getAPInt(); - const APInt &StartRem = StartInt.urem(StepInt); + const APInt *StartRem; + if (!DivInt.urem(StepInt) && match(getURemExpr(AR->getStart(), Step), + m_scev_APInt(StartRem))) { bool NoWrap = getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), @@ -3507,10 +3505,15 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // all offsets in [[(X - X%N), X). bool CanFoldWithWrap = StepInt.ule(DivInt) && // N <= C StepInt.isPowerOf2() && DivInt.isPowerOf2(); - if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) { - const SCEV *NewLHS = getAddRecExpr( - getConstant(StartInt - StartRem), Step, AR->getLoop(), - NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap); + // Only fold if the subtraction can be folded in the start + // expression. + const SCEV *NewStart = + getMinusSCEV(AR->getStart(), getConstant(*StartRem)); + if (*StartRem != 0 && (NoWrap || CanFoldWithWrap) && + !isa(NewStart)) { + const SCEV *NewLHS = + getAddRecExpr(NewStart, Step, AR->getLoop(), + NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap); if (LHS != NewLHS) { LHS = NewLHS; diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 9795a0b707fd3..fe293c63fa762 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -57,6 +57,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); initializeJMCInstrumenterPass(Registry); + initializeLibcallLoweringInfoWrapperPass(Registry); initializeLiveDebugValuesLegacyPass(Registry); initializeLiveDebugVariablesWrapperLegacyPass(Registry); initializeLiveIntervalsWrapperPassPass(Registry); diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index f44eb227133ae..13ed4846d2bf7 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -975,11 +975,12 @@ static RTLIB::Libcall fremToLibcall(Type *Ty) { /* Return true if, according to \p LibInfo, the target either directly supports the frem instruction for the \p Ty, has a custom lowering, or uses a libcall. */ -static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) { +static bool targetSupportsFrem(const TargetLowering &TLI, + const LibcallLoweringInfo &Libcalls, Type *Ty) { if (!TLI.isOperationExpand(ISD::FREM, EVT::getEVT(Ty))) return true; - return TLI.getLibcallName(fremToLibcall(Ty->getScalarType())); + return Libcalls.getLibcallName(fremToLibcall(Ty->getScalarType())); } static void addToWorklist(Instruction &I, @@ -991,7 +992,7 @@ static void addToWorklist(Instruction &I, } static bool runImpl(Function &F, const TargetLowering &TLI, - AssumptionCache *AC) { + const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) { SmallVector Worklist; unsigned MaxLegalFpConvertBitWidth = @@ -1010,7 +1011,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI, switch (I.getOpcode()) { case Instruction::FRem: - return !targetSupportsFrem(TLI, Ty) && + return !targetSupportsFrem(TLI, Libcalls, Ty) && FRemExpander::canExpandType(Ty->getScalarType()); case Instruction::FPToUI: @@ -1090,20 +1091,27 @@ class ExpandFpLegacyPass : public FunctionPass { bool runOnFunction(Function &F) override { auto *TM = &getAnalysis().getTM(); - auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F); + auto *TLI = Subtarget->getTargetLowering(); AssumptionCache *AC = nullptr; + const LibcallLoweringInfo &Libcalls = + getAnalysis().getLibcallLowering( + *Subtarget); + if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone()) AC = &getAnalysis().getAssumptionCache(F); - return runImpl(F, *TLI, AC); + return runImpl(F, *TLI, Libcalls, AC); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); if (OptLevel != CodeGenOptLevel::None) AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); } }; } // namespace @@ -1126,13 +1134,29 @@ PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) { AssumptionCache *AC = nullptr; if (OptLevel != CodeGenOptLevel::None) AC = &FAM.getResult(F); - return runImpl(F, TLI, AC) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); + + auto &MAMProxy = FAM.getResult(F); + + const LibcallLoweringModuleAnalysisResult *LibcallLowering = + MAMProxy.getCachedResult(*F.getParent()); + + if (!LibcallLowering) { + F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() + + "' analysis required"); + return PreservedAnalyses::all(); + } + + const LibcallLoweringInfo &Libcalls = + LibcallLowering->getLibcallLowering(*STI); + + return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } char ExpandFpLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp", "Expand certain fp instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper) INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false) FunctionPass *llvm::createExpandFpPass(CodeGenOptLevel OptLevel) { diff --git a/llvm/lib/CodeGen/LibcallLoweringInfo.cpp b/llvm/lib/CodeGen/LibcallLoweringInfo.cpp index 6f3607e8db824..0d54fac2422e2 100644 --- a/llvm/lib/CodeGen/LibcallLoweringInfo.cpp +++ b/llvm/lib/CodeGen/LibcallLoweringInfo.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LibcallLoweringInfo.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -28,3 +31,42 @@ LibcallLoweringInfo::LibcallLoweringInfo( Subtarget.initLibcallLoweringInfo(*this); } + +AnalysisKey LibcallLoweringModuleAnalysis::Key; + +bool LibcallLoweringModuleAnalysisResult::invalidate( + Module &, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &) { + // Passes that change the runtime libcall set must explicitly invalidate this + // pass. + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless(); +} + +LibcallLoweringModuleAnalysisResult +LibcallLoweringModuleAnalysis::run(Module &M, ModuleAnalysisManager &MAM) { + LibcallLoweringMap.init(&MAM.getResult(M)); + return LibcallLoweringMap; +} + +INITIALIZE_PASS_BEGIN(LibcallLoweringInfoWrapper, "libcall-lowering-info", + "Library Function Lowering Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(RuntimeLibraryInfoWrapper) +INITIALIZE_PASS_END(LibcallLoweringInfoWrapper, "libcall-lowering-info", + "Library Function Lowering Analysis", false, true) + +char LibcallLoweringInfoWrapper::ID = 0; + +LibcallLoweringInfoWrapper::LibcallLoweringInfoWrapper() : ImmutablePass(ID) {} + +bool LibcallLoweringInfoWrapper::doInitialization(Module &M) { + Result.init(&getAnalysis().getRTLCI(M)); + return false; +} + +void LibcallLoweringInfoWrapper::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesAll(); +} + +void LibcallLoweringInfoWrapper::releaseMemory() { Result.clear(); } diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index d738dc4eea36d..72c3c566163e2 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExpandVectorPredication.h" +#include "llvm/CodeGen/LibcallLoweringInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -51,6 +52,7 @@ namespace { struct PreISelIntrinsicLowering { const TargetMachine *TM; + const LibcallLoweringModuleAnalysisResult &ModuleLibcalls; const function_ref LookupTTI; const function_ref LookupTLI; @@ -61,11 +63,13 @@ struct PreISelIntrinsicLowering { explicit PreISelIntrinsicLowering( const TargetMachine *TM_, + const LibcallLoweringModuleAnalysisResult &ModuleLibcalls_, function_ref LookupTTI_, function_ref LookupTLI_, bool UseMemIntrinsicLibFunc_ = true) - : TM(TM_), LookupTTI(LookupTTI_), LookupTLI(LookupTLI_), - UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {} + : TM(TM_), ModuleLibcalls(ModuleLibcalls_), LookupTTI(LookupTTI_), + LookupTLI(LookupTLI_), UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) { + } static bool shouldExpandMemIntrinsicWithSize(Value *Size, const TargetTransformInfo &TTI); @@ -230,21 +234,26 @@ bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize( return SizeVal > Threshold || Threshold == 0; } -static bool canEmitLibcall(const TargetMachine *TM, Function *F, - RTLIB::Libcall LC) { +static bool +canEmitLibcall(const LibcallLoweringModuleAnalysisResult &ModuleLowering, + const TargetMachine *TM, Function *F, RTLIB::Libcall LC) { // TODO: Should this consider the address space of the memcpy? if (!TM) return true; - const TargetLowering *TLI = TM->getSubtargetImpl(*F)->getTargetLowering(); - return TLI->getLibcallName(LC) != nullptr; + const LibcallLoweringInfo &Lowering = + ModuleLowering.getLibcallLowering(*TM->getSubtargetImpl(*F)); + return Lowering.getLibcallImpl(LC) != RTLIB::Unsupported; } -static bool canEmitMemcpy(const TargetMachine *TM, Function *F) { +static bool +canEmitMemcpy(const LibcallLoweringModuleAnalysisResult &ModuleLowering, + const TargetMachine *TM, Function *F) { // TODO: Should this consider the address space of the memcpy? if (!TM) return true; - const TargetLowering *TLI = TM->getSubtargetImpl(*F)->getTargetLowering(); - return TLI->getMemcpyImpl() != RTLIB::Unsupported; + const LibcallLoweringInfo &Lowering = + ModuleLowering.getLibcallLowering(*TM->getSubtargetImpl(*F)); + return Lowering.getMemcpyImpl() != RTLIB::Unsupported; } // Return a value appropriate for use with the memset_pattern16 libcall, if @@ -317,7 +326,8 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses( Function *ParentFunc = Memcpy->getFunction(); const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) { - if (UseMemIntrinsicLibFunc && canEmitMemcpy(TM, ParentFunc)) + if (UseMemIntrinsicLibFunc && + canEmitMemcpy(ModuleLibcalls, TM, ParentFunc)) break; // TODO: For optsize, emit the loop into a separate function @@ -349,7 +359,7 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses( const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) { if (UseMemIntrinsicLibFunc && - canEmitLibcall(TM, ParentFunc, RTLIB::MEMMOVE)) + canEmitLibcall(ModuleLibcalls, TM, ParentFunc, RTLIB::MEMMOVE)) break; if (expandMemMoveAsLoop(Memmove, TTI)) { @@ -366,7 +376,7 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses( const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) { if (UseMemIntrinsicLibFunc && - canEmitLibcall(TM, ParentFunc, RTLIB::MEMSET)) + canEmitLibcall(ModuleLibcalls, TM, ParentFunc, RTLIB::MEMSET)) break; expandMemSetAsLoop(Memset); @@ -619,10 +629,14 @@ class PreISelIntrinsicLoweringLegacyPass : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); } bool runOnModule(Module &M) override { + const LibcallLoweringModuleAnalysisResult &ModuleLibcalls = + getAnalysis().getResult(); + auto LookupTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis().getTTI(F); }; @@ -631,7 +645,7 @@ class PreISelIntrinsicLoweringLegacyPass : public ModulePass { }; const auto *TM = &getAnalysis().getTM(); - PreISelIntrinsicLowering Lowering(TM, LookupTTI, LookupTLI); + PreISelIntrinsicLowering Lowering(TM, ModuleLibcalls, LookupTTI, LookupTLI); return Lowering.lowerIntrinsics(M); } }; @@ -643,6 +657,8 @@ char PreISelIntrinsicLoweringLegacyPass::ID; INITIALIZE_PASS_BEGIN(PreISelIntrinsicLoweringLegacyPass, "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering", false, false) +INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper) +INITIALIZE_PASS_DEPENDENCY(RuntimeLibraryInfoWrapper) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) @@ -654,9 +670,12 @@ ModulePass *llvm::createPreISelIntrinsicLoweringPass() { return new PreISelIntrinsicLoweringLegacyPass(); } -PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M, - ModuleAnalysisManager &AM) { - auto &FAM = AM.getResult(M).getManager(); +PreservedAnalyses +PreISelIntrinsicLoweringPass::run(Module &M, ModuleAnalysisManager &MAM) { + const LibcallLoweringModuleAnalysisResult &LibcallLowering = + MAM.getResult(M); + + auto &FAM = MAM.getResult(M).getManager(); auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); @@ -665,7 +684,7 @@ PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M, return FAM.getResult(F); }; - PreISelIntrinsicLowering Lowering(TM, LookupTTI, LookupTLI); + PreISelIntrinsicLowering Lowering(TM, LibcallLowering, LookupTTI, LookupTLI); if (!Lowering.lowerIntrinsics(M)) return PreservedAnalyses::all(); else diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8336e1d1f4134..e739659d68561 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2130,7 +2130,7 @@ SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, if (const char *LibcallName = TLI.getLibcallName(LC)) Callee = DAG.getExternalSymbol(LibcallName, CodePtrTy); else { - Callee = DAG.getUNDEF(CodePtrTy); + Callee = DAG.getPOISON(CodePtrTy); DAG.getContext()->emitError(Twine("no libcall available for ") + Node->getOperationName(&DAG)); } @@ -4992,7 +4992,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { // If the exponent does not match with sizeof(int) a libcall to // RTLIB::POWI would use the wrong type for the argument. DAG.getContext()->emitError("POWI exponent does not match sizeof(int)"); - Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(DAG.getPOISON(Node->getValueType(0))); break; } ExpandFPLibCall(Node, LC, Results); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 93118becedbac..f9cde383ce32d 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" @@ -446,6 +447,11 @@ static void codegen(const Config &Conf, TargetMachine *TM, legacy::PassManager CodeGenPasses; TargetLibraryInfoImpl TLII(Mod.getTargetTriple()); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); + CodeGenPasses.add(new RuntimeLibraryInfoWrapper( + Mod.getTargetTriple(), TM->Options.ExceptionModel, + TM->Options.FloatABIType, TM->Options.EABIVersion, + TM->Options.MCOptions.ABIName, TM->Options.VecLib)); + // No need to make index available if the module is empty. // In theory these passes should not use the index for an empty // module, however, this guards against doing any unnecessary summary-based diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 074c328ef0931..e9874ecd553ee 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -30,6 +30,7 @@ MODULE_ANALYSIS("ir2vec-vocab", IR2VecVocabAnalysis()) MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis()) MODULE_ANALYSIS("last-run-tracking", LastRunTrackingAnalysis()) MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis()) +MODULE_ANALYSIS("libcall-lowering-info", LibcallLoweringModuleAnalysis()) MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis()) MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index a0980bda2a212..1c397e8c0b766 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -202,6 +202,7 @@ add_llvm_component_library(LLVMSupport FormatVariadic.cpp GlobPattern.cpp GraphWriter.cpp + Hash.cpp HexagonAttributeParser.cpp HexagonAttributes.cpp InitLLVM.cpp diff --git a/llvm/lib/Support/Hash.cpp b/llvm/lib/Support/Hash.cpp new file mode 100644 index 0000000000000..38befcca86b15 --- /dev/null +++ b/llvm/lib/Support/Hash.cpp @@ -0,0 +1,51 @@ +//===- Hash.cpp - Hash functions ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements hash functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Hash.h" +#include "llvm/Support/xxhash.h" + +using namespace llvm; + +KCFIHashAlgorithm llvm::parseKCFIHashAlgorithm(StringRef Name) { + if (Name == "FNV-1a") + return KCFIHashAlgorithm::FNV1a; + // Default to xxHash64 for backward compatibility + return KCFIHashAlgorithm::xxHash64; +} + +StringRef llvm::stringifyKCFIHashAlgorithm(KCFIHashAlgorithm Algorithm) { + switch (Algorithm) { + case KCFIHashAlgorithm::xxHash64: + return "xxHash64"; + case KCFIHashAlgorithm::FNV1a: + return "FNV-1a"; + } + llvm_unreachable("Unknown KCFI hash algorithm"); +} + +uint32_t llvm::getKCFITypeID(StringRef MangledTypeName, + KCFIHashAlgorithm Algorithm) { + switch (Algorithm) { + case KCFIHashAlgorithm::xxHash64: + // Use lower 32 bits of xxHash64 + return static_cast(xxHash64(MangledTypeName)); + case KCFIHashAlgorithm::FNV1a: + // FNV-1a hash (32-bit) + uint32_t Hash = 2166136261u; // FNV offset basis + for (unsigned char C : MangledTypeName) { + Hash ^= C; + Hash *= 16777619u; // FNV prime + } + return Hash; + } + llvm_unreachable("Unknown KCFI hash algorithm"); +} diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 90f0b49ab9a78..bcfdb2ca5a3da 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3450,6 +3450,12 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) { static const AMDGPU::OpName VOP2MADMKOps[4] = { AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::src1, AMDGPU::OpName::vdst}; + static const AMDGPU::OpName VOPDFMAMKOpsX[4] = { + AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES, + AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX}; + static const AMDGPU::OpName VOPDFMAMKOpsY[4] = { + AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES, + AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY}; unsigned TSFlags = Desc.TSFlags; @@ -3491,8 +3497,11 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) { if (TSFlags & SIInstrFlags::VIMAGE) return {VIMGOps, nullptr}; - if (AMDGPU::isVOPD(Desc.getOpcode())) - return {VOPDOpsX, VOPDOpsY}; + if (AMDGPU::isVOPD(Desc.getOpcode())) { + auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode()); + return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX, + (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY}; + } assert(!(TSFlags & SIInstrFlags::MIMG)); diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index 1a5f09642ea66..eddab5a235dab 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -37,6 +37,8 @@ add_llvm_target(HexagonCodeGen HexagonGenMemAbsolute.cpp HexagonGenMux.cpp HexagonGenPredicate.cpp + HexagonGenWideningVecFloatInstr.cpp + HexagonGenWideningVecInstr.cpp HexagonHardwareLoops.cpp HexagonHazardRecognizer.cpp HexagonInstrInfo.cpp @@ -53,6 +55,7 @@ add_llvm_target(HexagonCodeGen HexagonNewValueJump.cpp HexagonOptAddrMode.cpp HexagonOptimizeSZextends.cpp + HexagonOptShuffleVector.cpp HexagonPeephole.cpp HexagonQFPOptimizer.cpp HexagonRDFOpt.cpp diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h index 422ab20891b94..b98369d1b3e30 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.h +++ b/llvm/lib/Target/Hexagon/Hexagon.h @@ -92,6 +92,9 @@ FunctionPass *createHexagonGenInsert(); FunctionPass *createHexagonGenMemAbsolute(); FunctionPass *createHexagonGenMux(); FunctionPass *createHexagonGenPredicate(); +FunctionPass * +createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &); +FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOptLevel OptLevel); @@ -102,6 +105,7 @@ FunctionPass *createHexagonMergeActivateWeight(); FunctionPass *createHexagonNewValueJump(); FunctionPass *createHexagonOptAddrMode(); FunctionPass *createHexagonOptimizeSZextends(); +FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &); FunctionPass *createHexagonPacketizer(bool Minimal); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonRDFOpt(); diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index 5344ed8446efc..412d58743df94 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -51,8 +51,7 @@ struct PrintRegister { }; [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, - const PrintRegister &PR); -raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) { + const PrintRegister &PR) { return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); } diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp new file mode 100644 index 0000000000000..7271f1f839d69 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp @@ -0,0 +1,565 @@ +//===------------------- HexagonGenWideningVecFloatInstr.cpp --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Replace widening vector float operations with hexagon intrinsics. +// +//===----------------------------------------------------------------------===// +// +// Brief overview of working of GenWideningVecFloatInstr pass. +// This version of pass is replica of already existing pass(which will replace +// widen vector integer operations with it's respective intrinsics). In this +// pass we will generate hexagon intrinsics for widen vector float instructions. +// +// Example1(64 vector-width widening): +// %wide.load = load <64 x half>, <64 x half>* %0, align 2 +// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2 +// %1 = fpext <64 x half> %wide.load to <64 x float> +// %3 = fpext <64 x half> %wide.load53 to <64 x float> +// %4 = fmul <64 x float> %1, %3 +// +// If we run this pass on the above example, it will first find fmul +// instruction, and then it will check whether the operands of fmul instruction +// (%1 and %3) belongs to either of these categories [%1 ->fpext, %3 ->fpext] +// or [%1 ->fpext, %3 ->constant_vector] or [%1 ->constant_vector, %3 ->fpext]. +// If it sees such pattern, then this pass will replace such pattern with +// appropriate hexagon intrinsics. +// +// After replacement: +// %wide.load = load <64 x half>, <64 x half>* %0, align 2 +// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2 +// %3 = bitcast <64 x half> %wide.load to <32 x i32> +// %4 = bitcast <64 x half> %wide.load53 to <32 x i32> +// %5 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %4) +// %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> ShuffMask1 +// %7 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %6) +// %8 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %6) +// %9 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %7) +// %10 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %8) +// %11 = bitcast <32 x i32> %9 to <32 x float> +// %12 = bitcast <32 x i32> %10 to <32 x float> +// %13 = shufflevector <32 x float> %12, <32 x float> %11, <64 x i32> ShuffMask2 +// +// +// +// Example2(128 vector-width widening): +// %0 = bitcast half* %a to <128 x half>* +// %wide.load = load <128 x half>, <128 x half>* %0, align 2 +// %1 = fpext <128 x half> %wide.load to <128 x float> +// %2 = bitcast half* %b to <128 x half>* +// %wide.load2 = load <128 x half>, <128 x half>* %2, align 2 +// %3 = fpext <128 x half> %wide.load2 to <128 x float> +// %4 = fmul <128 x float> %1, %3 +// +// After replacement: +// %0 = bitcast half* %a to <128 x half>* +// %wide.load = load <128 x half>, <128 x half>* %0, align 2 +// %1 = bitcast half* %b to <128 x half>* +// %wide.load2 = load <128 x half>, <128 x half>* %1, align 2 +// %2 = bitcast <128 x half> %wide.load to <64 x i32> +// %3 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %2) +// %4 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %2) +// %5 = bitcast <128 x half> %wide.load2 to <64 x i32> +// %6 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %5) +// %7 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %5) +// %8 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %6) +// %9 = shufflevector <64 x i32> %8, <64 x i32> poison, <64 x i32> Mask1 +// %10 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %9) +// %11 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %9) +// %12 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %10) +// %13 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %11) +// %14 = bitcast <32 x i32> %12 to <32 x float> +// %15 = bitcast <32 x i32> %13 to <32 x float> +// %16 = shufflevector <32 x float> %15, <32 x float> %14, <64 x i32> Mask2 +// %17 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%4, %7) +// %18 = shufflevector <64 x i32> %17, <64 x i32> poison, <64 x i32> Mask1 +// %19 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %18) +// %20 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %18) +// %21 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %19) +// %22 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %20) +// %23 = bitcast <32 x i32> %21 to <32 x float> +// %24 = bitcast <32 x i32> %22 to <32 x float> +// %25 = shufflevector <32 x float> %24, <32 x float> %23, <64 x i32> Mask2 +// %26 = shufflevector <64 x float> %25, <64 x float> %16, <128 x i32> Mask3 +// +// +//===----------------------------------------------------------------------===// +#include "HexagonTargetMachine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include +#include + +using namespace llvm; + +namespace llvm { +void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &); +FunctionPass * +createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &); +} // end namespace llvm + +namespace { + +class HexagonGenWideningVecFloatInstr : public FunctionPass { +public: + static char ID; + + HexagonGenWideningVecFloatInstr() : FunctionPass(ID) { + initializeHexagonGenWideningVecFloatInstrPass( + *PassRegistry::getPassRegistry()); + } + + HexagonGenWideningVecFloatInstr(const HexagonTargetMachine *TM) + : FunctionPass(ID), TM(TM) { + initializeHexagonGenWideningVecFloatInstrPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Hexagon generate widening vector float instructions"; + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + } + +private: + Module *M = nullptr; + const HexagonTargetMachine *TM = nullptr; + const HexagonSubtarget *HST = nullptr; + unsigned HwVLen; + unsigned NumHalfEltsInFullVec; + + struct OPInfo { + Value *OP; + Value *ExtInOP; + unsigned ExtInSize; + }; + + bool visitBlock(BasicBlock *B); + bool processInstruction(Instruction *Inst); + bool replaceWithIntrinsic(Instruction *Inst, OPInfo &OP1Info, + OPInfo &OP2Info); + + bool getOperandInfo(Value *V, OPInfo &OPI); + bool isExtendedConstant(Constant *C); + unsigned getElementSizeInBits(Value *V); + Type *getElementTy(unsigned size, IRBuilder<> &IRB); + + Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB, + unsigned NewEltsize, unsigned NumElts); + + std::pair opSplit(Value *OP, Instruction *Inst); + + Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, + Value *NewOP2, FixedVectorType *ResType, + unsigned NumElts, bool BitCastOp); +}; + +} // end anonymous namespace + +char HexagonGenWideningVecFloatInstr::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonGenWideningVecFloatInstr, "widening-vec-float", + "Hexagon generate " + "widening vector float instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonGenWideningVecFloatInstr, "widening-vec-float", + "Hexagon generate " + "widening vector float instructions", + false, false) + +bool HexagonGenWideningVecFloatInstr::isExtendedConstant(Constant *C) { + if (Value *SplatV = C->getSplatValue()) { + if (auto *CFP = dyn_cast(SplatV)) { + bool Ignored; + APFloat APF = CFP->getValueAPF(); + APFloat::opStatus sts = APF.convert( + APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); + if (sts == APFloat::opStatus::opOK || sts == APFloat::opStatus::opInexact) + return true; + } + return false; + } + unsigned NumElts = cast(C->getType())->getNumElements(); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (auto *CFP = dyn_cast(C->getAggregateElement(i))) { + bool Ignored; + APFloat APF = CFP->getValueAPF(); + APFloat::opStatus sts = APF.convert( + APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); + if (sts != APFloat::opStatus::opOK && sts != APFloat::opStatus::opInexact) + return false; + continue; + } + return false; + } + return true; +} + +unsigned HexagonGenWideningVecFloatInstr::getElementSizeInBits(Value *V) { + Type *ValTy = V->getType(); + Type *EltTy = ValTy; + if (dyn_cast(V)) { + unsigned EltSize = + cast(EltTy)->getElementType()->getPrimitiveSizeInBits(); + unsigned ReducedSize = EltSize / 2; + + return ReducedSize; + } + + if (ValTy->isVectorTy()) + EltTy = cast(ValTy)->getElementType(); + return EltTy->getPrimitiveSizeInBits(); +} + +bool HexagonGenWideningVecFloatInstr::getOperandInfo(Value *V, OPInfo &OPI) { + using namespace PatternMatch; + OPI.OP = V; + Value *ExtV = nullptr; + Constant *C = nullptr; + + if (match(V, (m_FPExt(m_Value(ExtV)))) || + match(V, + m_Shuffle(m_InsertElt(m_Poison(), m_FPExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))) { + + if (auto *ExtVType = dyn_cast(ExtV->getType())) { + // Matches the first branch. + if (ExtVType->getElementType()->isBFloatTy()) + // do not confuse bf16 with ieee-fp16. + return false; + } else { + // Matches the second branch (insert element branch) + if (ExtV->getType()->isBFloatTy()) + return false; + } + + OPI.ExtInOP = ExtV; + OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP); + return true; + } + + if (match(V, m_Constant(C))) { + if (!isExtendedConstant(C)) + return false; + OPI.ExtInOP = C; + OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP); + return true; + } + + return false; +} + +Type *HexagonGenWideningVecFloatInstr::getElementTy(unsigned size, + IRBuilder<> &IRB) { + switch (size) { + case 16: + return IRB.getHalfTy(); + case 32: + return IRB.getFloatTy(); + default: + llvm_unreachable("Unhandled Element size"); + } +} + +Value *HexagonGenWideningVecFloatInstr::adjustExtensionForOp( + OPInfo &OPI, IRBuilder<> &IRB, unsigned NewExtSize, unsigned NumElts) { + Value *V = OPI.ExtInOP; + unsigned EltSize = getElementSizeInBits(OPI.ExtInOP); + assert(NewExtSize >= EltSize); + Type *EltType = getElementTy(NewExtSize, IRB); + auto *NewOpTy = FixedVectorType::get(EltType, NumElts); + + if (auto *C = dyn_cast(V)) + return IRB.CreateFPTrunc(C, NewOpTy); + + if (V->getType()->isVectorTy()) + if (NewExtSize == EltSize) + return V; + + return nullptr; +} + +std::pair +HexagonGenWideningVecFloatInstr::opSplit(Value *OP, Instruction *Inst) { + Type *InstTy = Inst->getType(); + unsigned NumElts = cast(InstTy)->getNumElements(); + IRBuilder<> IRB(Inst); + Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B; + Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B; + Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi); + Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo); + if (NumElts == 128) { + auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64); + OP = IRB.CreateBitCast(OP, InType); + } + Value *OP1Hi = IRB.CreateCall(ExtFHi, {OP}); + Value *OP1Lo = IRB.CreateCall(ExtFLo, {OP}); + return std::pair(OP1Hi, OP1Lo); +} + +Value *HexagonGenWideningVecFloatInstr::createIntrinsic( + Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2, + FixedVectorType *ResType, unsigned NumElts, bool BitCastOp) { + + IRBuilder<> IRB(Inst); + Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId); + Function *ConvF = Intrinsic::getOrInsertDeclaration( + M, Intrinsic::hexagon_V6_vconv_sf_qf32_128B); + auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 32); + auto *RType = FixedVectorType::get(IRB.getFloatTy(), 32); + + // Make sure inputs to vmpy instrinsic are full vectors + if (NumElts == NumHalfEltsInFullVec / 2) { + SmallVector ConcatMask1; + for (unsigned i = 0; i < NumHalfEltsInFullVec; ++i) + ConcatMask1.push_back(IRB.getInt32(i)); + NewOP1 = + IRB.CreateShuffleVector(NewOP1, PoisonValue::get(NewOP1->getType()), + ConstantVector::get(ConcatMask1)); + NewOP2 = + IRB.CreateShuffleVector(NewOP2, PoisonValue::get(NewOP2->getType()), + ConstantVector::get(ConcatMask1)); + } + + if (BitCastOp) { + NewOP1 = IRB.CreateBitCast(NewOP1, InType); + NewOP2 = IRB.CreateBitCast(NewOP2, InType); + } + + Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2}); + // Interleave the output elements to ensure correct order in Hi and Lo vectors + // Shuffled Mask: [0, 32, 1, 33, ..., 31, 63] + // Hi: [0, 1, ..., 31] and Lo: [32, 33, ..., 63] + SmallVector Mask; + unsigned HalfVecPoint = NumHalfEltsInFullVec / 2; + for (unsigned i = 0; i < HalfVecPoint; ++i) { + Mask.push_back(IRB.getInt32(i)); + Mask.push_back(IRB.getInt32(HalfVecPoint + i)); + } + NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(NewIn->getType()), + ConstantVector::get(Mask)); + + std::pair SplitOP = opSplit(NewIn, Inst); + Value *ConvHi = IRB.CreateCall(ConvF, {SplitOP.first}); + ConvHi = IRB.CreateBitCast(ConvHi, RType); + + if (ResType->getNumElements() == NumHalfEltsInFullVec / 2) { + return ConvHi; + } + + Value *ConvLo = IRB.CreateCall(ConvF, {SplitOP.second}); + ConvLo = IRB.CreateBitCast(ConvLo, RType); + + SmallVector ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + NewIn = + IRB.CreateShuffleVector(ConvLo, ConvHi, ConstantVector::get(ShuffleMask)); + return NewIn; +} + +bool HexagonGenWideningVecFloatInstr::replaceWithIntrinsic(Instruction *Inst, + OPInfo &OP1Info, + OPInfo &OP2Info) { + Type *InstTy = Inst->getType(); + Type *EltTy = cast(InstTy)->getElementType(); + unsigned NumElts = cast(InstTy)->getNumElements(); + [[maybe_unused]] unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + unsigned MaxEltSize = OP1Info.ExtInSize; + unsigned NewOpEltSize = MaxEltSize; + unsigned NewResEltSize = 2 * MaxEltSize; + + unsigned ResVLen = NewResEltSize * NumElts; + if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0)) + return false; + + Intrinsic::ID IntId = Intrinsic::hexagon_V6_vmpy_qf32_hf_128B; + IRBuilder<> IRB(Inst); + Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts); + Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts); + + if (NewOP1 == nullptr || NewOP2 == nullptr) + return false; + + if (ResVLen > 2 * HwVLen) { + // The code written in this if block generates the widening code when + // vector-width is 128: + // + // Step 1: Bitcast <128 x half> type to <64 x i32> + // %wide.load = load <128 x half>, <128 x half>* %0 is bitcasted to, + // bitcast <128 x half> %wide.load to <64 x i32> + // + // Step 2: Generate Hi and Lo vectors + // call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %4) + // call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %4) + // + // Perform above 2 steps for both the operands of fmul instruction + // + // Step 3: Generate vmpy_qf32_hf multiply instruction to multiply two Hi + // vectors from both operands. + // call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%5, %8) + // + // Step 4: Convert the resultant 'qf32' output to 'sf' format + // %11 = shufflevector <64 x i32> %10, <64 x i32> poison, <64 x i32> Mask1 + // %12 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %11) + // %13 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %11) + // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %12) + // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %13) + // + // Repeat steps 3 and 4 for mutiplication and conversion of Lo vectors. + // Finally merge the output values in correct sequence using shuffle + // vectors. + + assert(ResVLen == 4 * HwVLen); + // Split the operands + unsigned HalfElts = NumElts / 2; + std::pair SplitOP1 = opSplit(NewOP1, Inst); + std::pair SplitOP2 = opSplit(NewOP2, Inst); + auto *castResType = FixedVectorType::get(IRB.getInt32Ty(), HalfElts); + Value *NewInHi = + createIntrinsic(IntId, Inst, SplitOP1.first, SplitOP2.first, + castResType, HalfElts, false); + Value *NewInLo = + createIntrinsic(IntId, Inst, SplitOP1.second, SplitOP2.second, + castResType, HalfElts, false); + assert(InstEltSize == NewResEltSize); + SmallVector ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi, + ConstantVector::get(ShuffleMask)); + + Inst->replaceAllUsesWith(NewIn); + return true; + } + + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts); + + // The following widening code can only be generated in cases where + // input vectors are 64xhalf/32xhalf and the results are 64xfloat/32xfloat + // respectively. + if (!(NumElts == NumHalfEltsInFullVec && + ResType->getNumElements() == NumHalfEltsInFullVec) && + !(NumElts == NumHalfEltsInFullVec / 2 && + ResType->getNumElements() == NumHalfEltsInFullVec / 2)) + return false; + Value *NewIn = + createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true); + + Inst->replaceAllUsesWith(NewIn); + return true; +} + +// Process instruction and replace them with widening vector +// intrinsics if possible. +bool HexagonGenWideningVecFloatInstr::processInstruction(Instruction *Inst) { + Type *InstTy = Inst->getType(); + if (!InstTy->isVectorTy() || + cast(InstTy)->getNumElements() > 128) + return false; + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + if (!HST->isTypeForHVX(cast(InstTy)) && InstLen != 4 * HwVLen) + return false; + if (InstLen < HwVLen) + return false; + + using namespace PatternMatch; + + Value *OP1 = nullptr, *OP2 = nullptr; + OPInfo OP1Info, OP2Info; + + // Handle the case when Inst = fpext(fmul<64xhalf>(op1, op2)). The Inst can + // be replaced with widening multiply. + if (match(Inst, (m_FPExt((m_FMul(m_Value(OP1), m_Value(OP2))))))) { + OP1Info.ExtInOP = OP1; + OP1Info.ExtInSize = getElementSizeInBits(OP1); + OP2Info.ExtInOP = OP2; + OP2Info.ExtInSize = getElementSizeInBits(OP2); + + if (auto *Op1Vtype = dyn_cast(OP1->getType())) { + if (!Op1Vtype->getElementType()->isHalfTy()) { + return false; + } + } else { + return false; + } + + if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16 && + getElementSizeInBits(Inst) == 32) { + return replaceWithIntrinsic(Inst, OP1Info, OP2Info); + } + } + + if (!match(Inst, (m_FMul(m_Value(OP1), m_Value(OP2))))) + return false; + + if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info)) + return false; + + if (!OP1Info.ExtInOP || !OP2Info.ExtInOP) + return false; + + if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16) { + return replaceWithIntrinsic(Inst, OP1Info, OP2Info); + } + + return false; +} + +bool HexagonGenWideningVecFloatInstr::visitBlock(BasicBlock *B) { + bool Changed = false; + for (auto &I : *B) + Changed |= processInstruction(&I); + return Changed; +} + +bool HexagonGenWideningVecFloatInstr::runOnFunction(Function &F) { + M = F.getParent(); + HST = TM->getSubtargetImpl(F); + + // Return if useHVX128BOps is not set. It can be enabled for 64B mode + // but wil require some changes. For example, bitcast for intrinsics + // assumes 128B mode. + if (skipFunction(F) || !HST->useHVX128BOps()) + return false; + + unsigned VecLength = HST->getVectorLength(); // Vector Length in Bytes + HwVLen = HST->getVectorLength() * 8; // Vector Length in bits + NumHalfEltsInFullVec = + VecLength / + 2; // Number of half (2B) elements that fit into a full HVX vector + bool Changed = false; + for (auto &B : F) + Changed |= visitBlock(&B); + + return Changed; +} + +FunctionPass * +llvm::createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &TM) { + return new HexagonGenWideningVecFloatInstr(&TM); +} diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp new file mode 100644 index 0000000000000..8df22ae6ebb06 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp @@ -0,0 +1,1184 @@ +//===--------------------- HexagonGenWideningVecInstr.cpp -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Replace widening vector operations with hexagon intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include +#include + +using namespace llvm; + +// A command line argument to enable the generation of widening instructions +// for short-vectors. +static cl::opt WidenShortVector( + "hexagon-widen-short-vector", + cl::desc("Generate widening instructions for short vectors."), cl::Hidden); + +namespace llvm { +void initializeHexagonGenWideningVecInstrPass(PassRegistry &); +FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &); +} // end namespace llvm + +namespace { + +class HexagonGenWideningVecInstr : public FunctionPass { +public: + static char ID; + + HexagonGenWideningVecInstr() : FunctionPass(ID) { + initializeHexagonGenWideningVecInstrPass(*PassRegistry::getPassRegistry()); + } + + HexagonGenWideningVecInstr(const HexagonTargetMachine *TM) + : FunctionPass(ID), TM(TM) { + initializeHexagonGenWideningVecInstrPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Hexagon generate widening vector instructions"; + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + } + +private: + Module *M = nullptr; + const HexagonTargetMachine *TM = nullptr; + const HexagonSubtarget *HST = nullptr; + unsigned HwVLen; + enum OPKind { OP_None = 0, OP_Add, OP_Sub, OP_Mul, OP_Shl }; + + struct OPInfo { + Value *OP = nullptr; + Value *ExtInOP = nullptr; + bool IsZExt = false; + unsigned ExtInSize = 0; + bool IsScalar = false; + }; + + bool visitBlock(BasicBlock *B); + bool processInstruction(Instruction *Inst); + bool replaceWithIntrinsic(Instruction *Inst, OPKind OPK, OPInfo &OP1Info, + OPInfo &OP2Info); + bool getOperandInfo(Value *V, OPInfo &OPI); + bool isExtendedConstant(Constant *C, bool IsSigned); + unsigned getElementSizeInBits(Value *V, bool IsZExt); + Type *getElementTy(unsigned size, IRBuilder<> &IRB); + + Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB, + unsigned NewEltsize, unsigned NumElts); + + Intrinsic::ID getIntrinsic(OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt, + unsigned NewOpEltSize, unsigned NewResEltSize, + bool IsConstScalar, int ConstOpNum); + + std::pair opSplit(Value *OP, Instruction *Inst, + Type *NewOpType); + + Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, + Value *NewOP2, Type *ResType, unsigned NumElts, + bool Interleave); + bool processInstructionForVMPA(Instruction *Inst); + bool getVmpaOperandInfo(Value *V, OPInfo &OPI); + void reorderVmpaOperands(OPInfo *OPI); + bool replaceWithVmpaIntrinsic(Instruction *Inst, OPInfo *OPI); + bool genSaturatingInst(Instruction *Inst); + bool getMinMax(Constant *MinC, Constant *MaxC, std::pair &MinMax); + bool isSaturatingVAsr(Instruction *Inst, Value *S, int MinV, int MaxV, + bool &IsResSigned); + Value *extendShiftByVal(Value *ShiftByVal, IRBuilder<> &IRB); + Intrinsic::ID getVAsrIntrinsic(bool IsInSigned, bool IsResSigned); + Value *createVAsrIntrinsic(Instruction *Inst, Value *VecOP, Value *ShiftByVal, + bool IsResSigned); + bool genVAvg(Instruction *Inst); + bool checkConstantVector(Value *OP, int64_t &SplatVal, bool IsOPZExt); + void updateMPYConst(Intrinsic::ID IntId, int64_t &SplatVal, bool IsOPZExt, + Value *&OP, IRBuilder<> &IRB); + void packConstant(Intrinsic::ID IntId, int64_t &SplatVal, Value *&OP, + IRBuilder<> &IRB); +}; + +} // end anonymous namespace + +char HexagonGenWideningVecInstr::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonGenWideningVecInstr, "widening-vec", + "Hexagon generate " + "widening vector instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonGenWideningVecInstr, "widening-vec", + "Hexagon generate " + "widening vector instructions", + false, false) + +static bool hasNegativeValues(Constant *C) { + if (Value *SplatV = C->getSplatValue()) { + auto *CI = dyn_cast(SplatV); + assert(CI); + return CI->getValue().isNegative(); + } + unsigned NumElts = cast(C->getType())->getNumElements(); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + auto *CI = dyn_cast(C->getAggregateElement(i)); + assert(CI); + if (CI->getValue().isNegative()) + return true; + continue; + } + return false; +} + +bool HexagonGenWideningVecInstr::getOperandInfo(Value *V, OPInfo &OPI) { + using namespace PatternMatch; + OPI.OP = V; + Value *ExtV = nullptr; + Constant *C = nullptr; + + bool Match = false; + if ((Match = (match(V, (m_ZExt(m_Value(ExtV)))) || + match(V, m_Shuffle(m_InsertElt(m_Poison(), + m_ZExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + } + + if (!Match && + (Match = (match(V, (m_SExt(m_Value(ExtV)))) || + match(V, m_Shuffle(m_InsertElt(m_Poison(), + m_SExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = false; + } + if (!Match && + (Match = + (match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()), + m_Poison(), m_ZeroMask()))))) { + if (match(ExtV, m_And(m_Value(), m_SpecificInt(255)))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.ExtInSize = 8; + return true; + } + if (match(ExtV, m_And(m_Value(), m_SpecificInt(65535)))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.ExtInSize = 16; + return true; + } + return false; + } + + if (!Match && (Match = match(V, m_Constant(C)))) { + if (!isExtendedConstant(C, false) && !isExtendedConstant(C, true)) + return false; + OPI.ExtInOP = C; + OPI.IsZExt = !hasNegativeValues(C); + } + + if (!Match) + return false; + + // If the operand is extended, find the element size of its input. + if (OPI.ExtInOP) + OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt); + return true; +} + +bool HexagonGenWideningVecInstr::isExtendedConstant(Constant *C, + bool IsSigned) { + Type *CTy = cast(C->getType())->getElementType(); + unsigned EltSize = CTy->getPrimitiveSizeInBits(); + unsigned HalfSize = EltSize / 2; + if (Value *SplatV = C->getSplatValue()) { + if (auto *CI = dyn_cast(SplatV)) + return IsSigned ? isIntN(HalfSize, CI->getSExtValue()) + : isUIntN(HalfSize, CI->getZExtValue()); + return false; + } + unsigned NumElts = cast(C->getType())->getNumElements(); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (auto *CI = dyn_cast(C->getAggregateElement(i))) { + if ((IsSigned && !isIntN(HalfSize, CI->getSExtValue())) || + (!IsSigned && !isUIntN(HalfSize, CI->getZExtValue()))) + return false; + continue; + } + return false; + } + return true; +} + +unsigned HexagonGenWideningVecInstr::getElementSizeInBits(Value *V, + bool IsZExt = false) { + using namespace PatternMatch; + Type *ValTy = V->getType(); + Type *EltTy = ValTy; + if (auto *C = dyn_cast(V)) { + unsigned NumElts = cast(EltTy)->getNumElements(); + unsigned EltSize = cast(EltTy) + ->getElementType() + ->getPrimitiveSizeInBits() + .getKnownMinValue(); + unsigned ReducedSize = EltSize / 2; + + while (ReducedSize >= 8) { + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (auto *CI = dyn_cast(C->getAggregateElement(i))) { + if (IsZExt) { + if (!isUIntN(ReducedSize, CI->getZExtValue())) + return EltSize; + } else if (!isIntN(ReducedSize, CI->getSExtValue())) + return EltSize; + } + } + EltSize = ReducedSize; + ReducedSize = ReducedSize / 2; + } + return EltSize; + } + + if (ValTy->isVectorTy()) + EltTy = cast(ValTy)->getElementType(); + return EltTy->getPrimitiveSizeInBits(); +} + +Value *HexagonGenWideningVecInstr::adjustExtensionForOp(OPInfo &OPI, + IRBuilder<> &IRB, + unsigned NewExtSize, + unsigned NumElts) { + Value *V = OPI.ExtInOP; + bool IsZExt = OPI.IsZExt; + unsigned EltSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt); + Type *EltType = getElementTy(NewExtSize, IRB); + auto *NewOpTy = FixedVectorType::get(EltType, NumElts); + + if (dyn_cast(V)) + return IRB.CreateTrunc(V, NewOpTy); + + if (V->getType()->isVectorTy()) { + if (NewExtSize == EltSize) + return V; + assert(NewExtSize == 16); + auto *NewOpTy = FixedVectorType::get(IRB.getInt16Ty(), NumElts); + return (IsZExt) ? IRB.CreateZExt(V, NewOpTy) : IRB.CreateSExt(V, NewOpTy); + } + + // The operand must correspond to a shuffle vector which is used to construct + // a vector out of a scalar. Since the scalar value (V) is extended, + // replace it with a new shuffle vector with the smaller element size. + [[maybe_unused]] auto *I = dyn_cast(OPI.OP); + assert(I && I->getOpcode() == Instruction::ShuffleVector); + + if (NewExtSize > EltSize) + V = (IsZExt) ? IRB.CreateZExt(V, EltType) : IRB.CreateSExt(V, EltType); + else if (NewExtSize < EltSize) + V = IRB.CreateTrunc(V, EltType); + + Value *IE = + IRB.CreateInsertElement(PoisonValue::get(NewOpTy), V, IRB.getInt32(0)); + + SmallVector ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(0)); + + return IRB.CreateShuffleVector(IE, PoisonValue::get(NewOpTy), + ConstantVector::get(ShuffleMask)); +} + +Intrinsic::ID HexagonGenWideningVecInstr::getIntrinsic( + OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt, unsigned InEltSize, + unsigned ResEltSize, bool IsConstScalar, int ConstOpNum) { + // Since the operands have been extended, the ResEltSize must be 16 or more. + switch (OPK) { + case OP_Add: + // Both operands should be either zero extended or sign extended. + assert(IsOP1ZExt == IsOP2ZExt); + if (InEltSize == 8 && ResEltSize == 16) { + // Operands must be zero extended as we don't have a widening vector + // 'add' that can take signed exteded values. + assert(IsOP1ZExt && "Operands must be zero-extended"); + return Intrinsic::hexagon_vadd_uu; + } + if (InEltSize == 16 && ResEltSize == 32) + return (IsOP1ZExt) ? Intrinsic::hexagon_vadd_uu + : Intrinsic::hexagon_vadd_ss; + + llvm_unreachable("Incorrect input and output operand sizes"); + + case OP_Sub: + // Both operands should be either zero extended or sign extended. + assert(IsOP1ZExt == IsOP2ZExt); + if (InEltSize == 8 && ResEltSize == 16) { + // Operands must be zero extended as we don't have a widening vector + // 'sub' that can take signed exteded values. + assert(IsOP1ZExt && "Operands must be zero-extended"); + return Intrinsic::hexagon_vsub_uu; + } + if (InEltSize == 16 && ResEltSize == 32) + return (IsOP1ZExt) ? Intrinsic::hexagon_vsub_uu + : Intrinsic::hexagon_vsub_ss; + + llvm_unreachable("Incorrect input and output operand sizes"); + + case OP_Mul: + assert(ResEltSize = 2 * InEltSize); + // Enter inside 'if' block when one of the operand is constant vector + if (IsConstScalar) { + // When inputs are of 8bit type and output is 16bit type, enter 'if' block + if (InEltSize == 8 && ResEltSize == 16) { + // Enter the 'if' block, when 2nd operand of the mul instruction is + // constant vector, otherwise enter 'else' block + if (ConstOpNum == 2 && IsOP1ZExt) { + // If the value inside the constant vector is zero-extended, then + // return hexagon_vmpy_ub_ub, else return hexagon_vmpy_ub_b + return (IsOP2ZExt) ? Intrinsic::hexagon_vmpy_ub_ub + : Intrinsic::hexagon_vmpy_ub_b; + } else if (ConstOpNum == 1 && IsOP2ZExt) { + return (IsOP1ZExt) ? Intrinsic::hexagon_vmpy_ub_ub + : Intrinsic::hexagon_vmpy_ub_b; + } + } + // When inputs are of 16bit type and output is 32bit type, + // enter 'if' block + if (InEltSize == 16 && ResEltSize == 32) { + if (IsOP1ZExt && IsOP2ZExt) { + // If the value inside the constant vector and other operand is + // zero-extended, then return hexagon_vmpy_uh_uh + return Intrinsic::hexagon_vmpy_uh_uh; + } else if (!IsOP1ZExt && !IsOP2ZExt) { + // If the value inside the constant vector and other operand is + // sign-extended, then return hexagon_vmpy_h_h + return Intrinsic::hexagon_vmpy_h_h; + } + } + } + if (IsOP1ZExt) + return IsOP2ZExt ? Intrinsic::hexagon_vmpy_uu + : Intrinsic::hexagon_vmpy_us; + else + return IsOP2ZExt ? Intrinsic::hexagon_vmpy_su + : Intrinsic::hexagon_vmpy_ss; + default: + llvm_unreachable("Instruction not handled!"); + } +} + +Type *HexagonGenWideningVecInstr::getElementTy(unsigned size, + IRBuilder<> &IRB) { + switch (size) { + case 8: + return IRB.getInt8Ty(); + case 16: + return IRB.getInt16Ty(); + case 32: + return IRB.getInt32Ty(); + default: + llvm_unreachable("Unhandled Element size"); + } +} + +Value *HexagonGenWideningVecInstr::createIntrinsic( + Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2, + Type *ResType, unsigned NumElts, bool Interleave = true) { + IRBuilder<> IRB(Inst); + Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId, ResType); + Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2}); + if (Interleave) { + // Interleave elements in the output vector. + SmallVector ShuffleMask; + unsigned HalfElts = NumElts / 2; + for (unsigned i = 0; i < HalfElts; ++i) { + ShuffleMask.push_back(IRB.getInt32(i)); + ShuffleMask.push_back(IRB.getInt32(HalfElts + i)); + } + NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType), + ConstantVector::get(ShuffleMask)); + } + return NewIn; +} + +std::pair +HexagonGenWideningVecInstr::opSplit(Value *OP, Instruction *Inst, + Type *NewOpType) { + Type *InstTy = Inst->getType(); + unsigned NumElts = cast(InstTy)->getNumElements(); + IRBuilder<> IRB(Inst); + if (InstTy->getPrimitiveSizeInBits() < 2 * HwVLen) { + // The only time we need to split an OP even though it is not a + // vector-pair is while generating vasr instruction for the short vector. + // Since hi/lo intrinsics can't be used here as they expect the operands to + // be of 64xi32 type, the shuffle_vector pair with the appropriate masks is + // used instead. + assert(NumElts % 2 == 0 && "Unexpected Vector Type!!"); + unsigned HalfElts = NumElts / 2; + SmallVector HiM; + SmallVector LoM; + for (unsigned i = 0; i < HalfElts; ++i) + LoM.push_back(IRB.getInt32(i)); + for (unsigned i = 0; i < HalfElts; ++i) + HiM.push_back(IRB.getInt32(HalfElts + i)); + + Value *Hi = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()), + ConstantVector::get(HiM)); + Value *Lo = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()), + ConstantVector::get(LoM)); + return std::pair(Hi, Lo); + } + + Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B; + Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B; + Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi); + Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo); + auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64); + OP = IRB.CreateBitCast(OP, InType); + Value *Hi = IRB.CreateCall(ExtFHi, {OP}); // 32xi32 + Value *Lo = IRB.CreateCall(ExtFLo, {OP}); + Hi = IRB.CreateBitCast(Hi, NewOpType); + Lo = IRB.CreateBitCast(Lo, NewOpType); + return std::pair(Hi, Lo); +} + +bool HexagonGenWideningVecInstr::checkConstantVector(Value *OP, + int64_t &SplatVal, + bool IsOPZExt) { + if (auto *C1 = dyn_cast(OP)) { + if (Value *SplatV = C1->getSplatValue()) { + auto *CI = dyn_cast(SplatV); + if (IsOPZExt) { + SplatVal = CI->getZExtValue(); + } else { + SplatVal = CI->getSExtValue(); + } + return true; + } + } + return false; +} + +void HexagonGenWideningVecInstr::updateMPYConst(Intrinsic::ID IntId, + int64_t &SplatVal, + bool IsOPZExt, Value *&OP, + IRBuilder<> &IRB) { + if ((IntId == Intrinsic::hexagon_vmpy_uu || + IntId == Intrinsic::hexagon_vmpy_us || + IntId == Intrinsic::hexagon_vmpy_su || + IntId == Intrinsic::hexagon_vmpy_ss) && + OP->getType()->isVectorTy()) { + // Create a vector with all elements equal to SplatVal + auto *VecTy = cast(OP->getType()); + Value *scalar = IRB.getIntN(VecTy->getScalarSizeInBits(), + static_cast(SplatVal)); + Value *splatVector = ConstantVector::getSplat(VecTy->getElementCount(), + cast(scalar)); + OP = IsOPZExt ? IRB.CreateZExt(splatVector, VecTy) + : IRB.CreateSExt(splatVector, VecTy); + } else { + packConstant(IntId, SplatVal, OP, IRB); + } +} + +void HexagonGenWideningVecInstr::packConstant(Intrinsic::ID IntId, + int64_t &SplatVal, Value *&OP, + IRBuilder<> &IRB) { + uint32_t Val32 = static_cast(SplatVal); + if (IntId == Intrinsic::hexagon_vmpy_ub_ub) { + assert(SplatVal >= 0 && SplatVal <= UINT8_MAX); + uint32_t packed = (Val32 << 24) | (Val32 << 16) | (Val32 << 8) | Val32; + OP = IRB.getInt32(packed); + } else if (IntId == Intrinsic::hexagon_vmpy_ub_b) { + assert(SplatVal >= INT8_MIN && SplatVal <= INT8_MAX); + uint32_t packed = (Val32 << 24) | ((Val32 << 16) & ((1 << 24) - 1)) | + ((Val32 << 8) & ((1 << 16) - 1)) | + (Val32 & ((1 << 8) - 1)); + OP = IRB.getInt32(packed); + } else if (IntId == Intrinsic::hexagon_vmpy_uh_uh) { + assert(SplatVal >= 0 && SplatVal <= UINT16_MAX); + uint32_t packed = (Val32 << 16) | Val32; + OP = IRB.getInt32(packed); + } else if (IntId == Intrinsic::hexagon_vmpy_h_h) { + assert(SplatVal >= INT16_MIN && SplatVal <= INT16_MAX); + uint32_t packed = (Val32 << 16) | (Val32 & ((1 << 16) - 1)); + OP = IRB.getInt32(packed); + } +} + +bool HexagonGenWideningVecInstr::replaceWithIntrinsic(Instruction *Inst, + OPKind OPK, + OPInfo &OP1Info, + OPInfo &OP2Info) { + Type *InstTy = Inst->getType(); + Type *EltTy = cast(InstTy)->getElementType(); + unsigned NumElts = cast(InstTy)->getNumElements(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + bool IsOP1ZExt = OP1Info.IsZExt; + bool IsOP2ZExt = OP2Info.IsZExt; + + // The resulting values of 'add' and 'sub' are always sign-extended. + bool IsResZExt = (OPK == OP_Mul || OPK == OP_Shl) + ? (OP1Info.IsZExt && OP2Info.IsZExt) + : false; + + unsigned MaxEltSize = std::max(OP1Info.ExtInSize, OP2Info.ExtInSize); + unsigned NewOpEltSize = MaxEltSize; + unsigned NewResEltSize = 2 * MaxEltSize; + + // For Add and Sub, both the operands should be either zero extended + // or sign extended. In case of a mismatch, they are extended to the + // next size (ex: 8 bits -> 16 bits) so that the sign-extended vadd/vsub + // instructions can be used. Also, we don't support 8-bits signed vadd/vsub + // instructions. They are extended to 16-bits and then signed 16-bits + // non-widening vadd/vsub is used to perform the operation. + if (OPK != OP_Mul && OPK != OP_Shl && + (IsOP1ZExt != IsOP2ZExt || (!IsOP1ZExt && NewOpEltSize == 8))) + NewOpEltSize = 2 * NewOpEltSize; + + unsigned ResVLen = NewResEltSize * NumElts; + if (ResVLen < HwVLen && !WidenShortVector) + return false; + if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0)) + return false; + + IRBuilder<> IRB(Inst); + Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts); + Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts); + + if (NewOpEltSize == NewResEltSize) { + assert(OPK != OP_Mul && OPK != OP_Shl); + // Instead of intrinsics, use vector add/sub. + Value *NewIn = IRB.CreateBinOp(cast(Inst)->getOpcode(), + NewOP1, NewOP2); + if (InstEltSize > NewResEltSize) + NewIn = IRB.CreateSExt(NewIn, InstTy); + Inst->replaceAllUsesWith(NewIn); + return true; + } + + bool IsConstScalar = false; + int64_t SplatVal = 0; + int ConstOpNum = 1; + if (OPK == OP_Mul || OPK == OP_Shl) { + IsConstScalar = checkConstantVector(NewOP1, SplatVal, IsOP1ZExt); + if (!IsConstScalar) { + IsConstScalar = checkConstantVector(NewOP2, SplatVal, IsOP2ZExt); + ConstOpNum = 2; + } + } + + if (IsConstScalar && OPK == OP_Shl) { + if (((NewOpEltSize == 8) && (SplatVal > 0) && (SplatVal < 8)) || + ((NewOpEltSize == 16) && (SplatVal > 0) && (SplatVal < 16))) { + SplatVal = 1 << SplatVal; + OPK = OP_Mul; + } else { + return false; + } + } else if (!IsConstScalar && OPK == OP_Shl) { + return false; + } + + Intrinsic::ID IntId = getIntrinsic(OPK, IsOP1ZExt, IsOP2ZExt, NewOpEltSize, + NewResEltSize, IsConstScalar, ConstOpNum); + + if (IsConstScalar) { + updateMPYConst(IntId, SplatVal, IsOP2ZExt, NewOP2, IRB); + } + + // Split the node if it needs more than a vector pair for the result. + if (ResVLen > 2 * HwVLen) { + assert(ResVLen == 4 * HwVLen); + // Split the operands + unsigned HalfElts = NumElts / 2; + auto *NewOpType = + FixedVectorType::get(getElementTy(NewOpEltSize, IRB), HalfElts); + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), HalfElts); + std::pair SplitOP1 = opSplit(NewOP1, Inst, NewOpType); + std::pair SplitOP2; + if (IsConstScalar && (IntId == Intrinsic::hexagon_vmpy_h_h || + IntId == Intrinsic::hexagon_vmpy_uh_uh)) { + SplitOP2 = std::pair(NewOP2, NewOP2); + } else { + SplitOP2 = opSplit(NewOP2, Inst, NewOpType); + } + Value *NewInHi = createIntrinsic(IntId, Inst, SplitOP1.first, + SplitOP2.first, ResType, HalfElts, true); + Value *NewInLo = createIntrinsic(IntId, Inst, SplitOP1.second, + SplitOP2.second, ResType, HalfElts, true); + assert(InstEltSize == NewResEltSize); + SmallVector ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi, + ConstantVector::get(ShuffleMask)); + + Inst->replaceAllUsesWith(NewIn); + return true; + } + + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts); + Value *NewIn = + createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true); + if (InstEltSize > NewResEltSize) + NewIn = (IsResZExt) ? IRB.CreateZExt(NewIn, InstTy) + : IRB.CreateSExt(NewIn, InstTy); + + Inst->replaceAllUsesWith(NewIn); + + return true; +} + +// Process instruction and replace them with widening vector +// intrinsics if possible. +bool HexagonGenWideningVecInstr::processInstruction(Instruction *Inst) { + Type *InstTy = Inst->getType(); + if (!InstTy->isVectorTy() || + cast(InstTy)->getNumElements() > 128) + return false; + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + if (!HST->isTypeForHVX(cast(InstTy)) && InstLen != 4 * HwVLen) + return false; + if (InstLen < HwVLen && !WidenShortVector) + return false; + + using namespace PatternMatch; + + OPKind OPK; + Value *OP1 = nullptr, *OP2 = nullptr; + if (match(Inst, (m_Sub(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Sub; + else if (match(Inst, (m_Add(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Add; + else if (match(Inst, (m_Mul(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Mul; + else if (match(Inst, (m_Shl(m_Value(OP1), m_Value(OP2))))) + OPK = OP_Shl; + else + return false; + + OPInfo OP1Info, OP2Info; + + if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info)) + return false; + + // Proceed only if both input operands are extended. + if (!OP1Info.ExtInOP || !OP2Info.ExtInOP) + return false; + + return replaceWithIntrinsic(Inst, OPK, OP1Info, OP2Info); +} + +bool HexagonGenWideningVecInstr::getVmpaOperandInfo(Value *V, OPInfo &OPI) { + using namespace PatternMatch; + OPI.OP = V; + Value *ExtV, *OP1 = nullptr; + + if (match(V, + m_ZExt(m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()), + m_Poison(), m_ZeroMask()))) || + match(V, + m_Shuffle(m_InsertElt(m_Poison(), m_ZExt(m_Value(ExtV)), m_Zero()), + m_Poison(), m_ZeroMask()))) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.IsScalar = true; + OPI.ExtInSize = ExtV->getType()->getPrimitiveSizeInBits(); + return true; + } + + ConstantInt *I = nullptr; + if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()), + m_Poison(), m_ZeroMask())))) { + if (match(ExtV, m_And(m_Value(OP1), m_ConstantInt(I)))) { + uint32_t IValue = I->getZExtValue(); + if (IValue <= 255) { + OPI.ExtInOP = ExtV; + OPI.IsZExt = true; + OPI.ExtInSize = 8; + OPI.IsScalar = true; + return true; + } + } + } + + // Match for non-scalar operands + return getOperandInfo(V, OPI); +} + +// Process instruction and replace with the vmpa intrinsic if possible. +bool HexagonGenWideningVecInstr::processInstructionForVMPA(Instruction *Inst) { + using namespace PatternMatch; + Type *InstTy = Inst->getType(); + // TODO: Extend it to handle short vector instructions (< HwVLen). + // vmpa instructions produce a vector register pair. + if (!InstTy->isVectorTy() || InstTy->getPrimitiveSizeInBits() != 2 * HwVLen) + return false; + + Value *OP1 = nullptr, *OP2 = nullptr; + if (!match(Inst, (m_Add(m_Value(OP1), m_Value(OP2))))) + return false; + + Value *OP[4] = {nullptr, nullptr, nullptr, nullptr}; + if (!match(OP1, m_Mul(m_Value(OP[0]), m_Value(OP[1]))) || + !match(OP2, m_Mul(m_Value(OP[2]), m_Value(OP[3])))) + return false; + + OPInfo OP_Info[4]; + for (unsigned i = 0; i < 4; i++) + if (!getVmpaOperandInfo(OP[i], OP_Info[i]) || !OP_Info[i].ExtInOP) + return false; + + return replaceWithVmpaIntrinsic(Inst, OP_Info); +} + +// Reorder operand info in OPI so that the vector operands come before their +// scalar counterparts. +void HexagonGenWideningVecInstr::reorderVmpaOperands(OPInfo *OPI) { + for (unsigned i = 0; i < 2; i++) + if (!OPI[2 * i].ExtInOP->getType()->isVectorTy()) { + OPInfo Temp; + Temp = OPI[2 * i]; + OPI[2 * i] = OPI[2 * i + 1]; + OPI[2 * i + 1] = Temp; + } +} + +// Only handles the case where one input to vmpa has to be a scalar +// and another is a vector. It can be easily extended to cover +// other types of vmpa instructions. +bool HexagonGenWideningVecInstr::replaceWithVmpaIntrinsic(Instruction *Inst, + OPInfo *OPI) { + reorderVmpaOperands(OPI); + + // After reordering of the operands in OPI, the odd elements must have + // IsScalar flag set to true. Also, check the even elements for non-scalars. + if (!OPI[1].IsScalar || !OPI[3].IsScalar || OPI[0].IsScalar || + OPI[2].IsScalar) + return false; + + OPInfo SOPI1 = OPI[1]; + OPInfo SOPI2 = OPI[3]; + + // The scalar operand in the vmpa instructions needs to be an int8. + if (SOPI1.ExtInSize != SOPI2.ExtInSize || SOPI1.ExtInSize != 8) + return false; + + Type *InstTy = Inst->getType(); + Type *EltTy = cast(InstTy)->getElementType(); + unsigned NumElts = cast(InstTy)->getNumElements(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + unsigned MaxVEltSize = std::max(OPI[0].ExtInSize, OPI[2].ExtInSize); + unsigned NewVOpEltSize = MaxVEltSize; + unsigned NewResEltSize = 2 * MaxVEltSize; + + if (NumElts * NewVOpEltSize < HwVLen) { + // Extend the operand so that we don't end up with an invalid vector size. + NewVOpEltSize = 2 * NewVOpEltSize; + NewResEltSize = 2 * NewResEltSize; + } + + IRBuilder<> IRB(Inst); + + // Construct scalar operand + Value *NewSOP1 = SOPI1.ExtInOP; + Value *NewSOP2 = SOPI2.ExtInOP; + + Type *S1Ty = NewSOP1->getType(); + Type *S2Ty = NewSOP2->getType(); + if (S1Ty->getPrimitiveSizeInBits() < 32) + NewSOP1 = IRB.CreateZExt(NewSOP1, IRB.getInt32Ty()); + if (S2Ty->getPrimitiveSizeInBits() < 32) + NewSOP2 = IRB.CreateZExt(NewSOP2, IRB.getInt32Ty()); + + Value *SHL = IRB.CreateShl(NewSOP1, IRB.getInt32(8)); + Value *OR = IRB.CreateOr(SHL, NewSOP2); + Intrinsic::ID CombineIntID = Intrinsic::hexagon_A2_combine_ll; + Function *ExtF = Intrinsic::getOrInsertDeclaration(M, CombineIntID); + Value *ScalarOP = IRB.CreateCall(ExtF, {OR, OR}); + + // Construct vector operand + Value *NewVOP1 = adjustExtensionForOp(OPI[0], IRB, NewVOpEltSize, NumElts); + Value *NewVOP2 = adjustExtensionForOp(OPI[2], IRB, NewVOpEltSize, NumElts); + + // Combine both vector operands to form the vector-pair for vmpa + Intrinsic::ID VCombineIntID = Intrinsic::hexagon_V6_vcombine_128B; + ExtF = Intrinsic::getOrInsertDeclaration(M, VCombineIntID); + Type *InType = FixedVectorType::get(IRB.getInt32Ty(), 32); + NewVOP1 = IRB.CreateBitCast(NewVOP1, InType); + NewVOP2 = IRB.CreateBitCast(NewVOP2, InType); + Value *VecOP = IRB.CreateCall(ExtF, {NewVOP1, NewVOP2}); + + Intrinsic::ID VmpaIntID = + (NewResEltSize == 16) ? VmpaIntID = Intrinsic::hexagon_V6_vmpabus_128B + : VmpaIntID = Intrinsic::hexagon_V6_vmpauhb_128B; + ExtF = Intrinsic::getOrInsertDeclaration(M, VmpaIntID); + auto *ResType = + FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts); + Value *NewIn = IRB.CreateCall(ExtF, {VecOP, ScalarOP}); + NewIn = IRB.CreateBitCast(NewIn, ResType); + + if (InstEltSize > NewResEltSize) + // Extend the output to match the original instruction type. + NewIn = IRB.CreateSExt(NewIn, InstTy); + + // Interleave elements in the output vector. + SmallVector ShuffleMask; + unsigned HalfElts = NumElts / 2; + for (unsigned i = 0; i < HalfElts; ++i) { + ShuffleMask.push_back(IRB.getInt32(i)); + ShuffleMask.push_back(IRB.getInt32(HalfElts + i)); + } + NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType), + ConstantVector::get(ShuffleMask)); + + Inst->replaceAllUsesWith(NewIn); + return true; +} + +bool HexagonGenWideningVecInstr::genSaturatingInst(Instruction *Inst) { + Type *InstTy = Inst->getType(); + assert(InstTy->isVectorTy()); + if (InstTy->getPrimitiveSizeInBits() > HwVLen) + return false; + + using namespace PatternMatch; + CmpPredicate P1, P2; + Value *L1 = nullptr, *T1 = nullptr, *L2 = nullptr, *T2 = nullptr, + *L3 = nullptr; + Constant *RC1 = nullptr, *FC1 = nullptr, *RC2 = nullptr, *FC2 = nullptr, + *RC3 = nullptr; + + // Pattern of interest: ashr -> llvm.smin -> llvm.smax -> trunc + // Match trunc instruction + if (match(Inst, m_Trunc(m_Intrinsic(m_Value(L1), + m_Constant(RC1))))) { + // Match llvm.smin instruction + if (match(L1, m_Intrinsic(m_Value(L2), m_Constant(RC2)))) { + // Match ashr instruction + if (match(L2, m_AShr(m_Value(L3), m_Constant(RC3)))) { + std::pair MinMax; + // get min, max values from operatands of smin and smax + if (getMinMax(RC1, RC2, MinMax)) { + bool IsResSigned; + // Validate the saturating vasr pattern + if (isSaturatingVAsr(Inst, L2, MinMax.first, MinMax.second, + IsResSigned)) { + // Get the shift value from the ashr operand + ConstantInt *shift_val = + dyn_cast(RC3->getSplatValue()); + if (shift_val) { + Value *NewIn = + createVAsrIntrinsic(Inst, L3, shift_val, IsResSigned); + Inst->replaceAllUsesWith(NewIn); + return true; + } + } + } + } + } + } + + if (!match(Inst, (m_Trunc(m_Select(m_ICmp(P1, m_Value(L1), m_Constant(RC1)), + m_Value(T1), m_Constant(FC1))))) || + (T1 != L1 || FC1 != RC1)) + return false; + + if (!match(L1, m_Select(m_ICmp(P2, m_Value(L2), m_Constant(RC2)), m_Value(T2), + m_Constant(FC2))) || + (T2 != L2 || FC2 != RC2)) + return false; + + if (!((P1 == CmpInst::ICMP_SGT && P2 == CmpInst::ICMP_SLT) || + (P1 == CmpInst::ICMP_SLT && P2 == CmpInst::ICMP_SGT))) + return false; + + std::pair MinMax; + if ((P1 == CmpInst::ICMP_SGT) && (P2 == CmpInst::ICMP_SLT)) { + if (!getMinMax(RC1, RC2, MinMax)) + return false; + } else if (!getMinMax(RC2, RC1, MinMax)) + return false; + + Value *S = L2; // Value being saturated + + // Only AShr instructions are handled. + // Also, second operand to AShr must be a scalar. + Value *OP1 = nullptr, *ShiftByVal = nullptr; + if (!match(S, m_AShr(m_Value(OP1), + m_Shuffle(m_InsertElt(m_Poison(), m_Value(ShiftByVal), + m_Zero()), + m_Poison(), m_ZeroMask())))) + return false; + + bool IsResSigned; + if (!isSaturatingVAsr(Inst, S, MinMax.first, MinMax.second, IsResSigned)) + return false; + + Value *NewIn = createVAsrIntrinsic(Inst, OP1, ShiftByVal, IsResSigned); + Inst->replaceAllUsesWith(NewIn); + return true; +} + +Value *HexagonGenWideningVecInstr::extendShiftByVal(Value *ShiftByVal, + IRBuilder<> &IRB) { + using namespace PatternMatch; + Value *A = nullptr; + if (match(ShiftByVal, m_Trunc(m_Value(A)))) + return A; + return IRB.CreateZExt(ShiftByVal, IRB.getInt32Ty()); +} + +bool HexagonGenWideningVecInstr::getMinMax(Constant *MinC, Constant *MaxC, + std::pair &MinMax) { + Value *SplatV; + if (!(SplatV = MinC->getSplatValue()) || !(dyn_cast(SplatV))) + return false; + if (!(SplatV = MaxC->getSplatValue()) || !(dyn_cast(SplatV))) + return false; + + ConstantInt *MinI = dyn_cast(MinC->getSplatValue()); + ConstantInt *MaxI = dyn_cast(MaxC->getSplatValue()); + MinMax = std::pair(MinI->getSExtValue(), MaxI->getSExtValue()); + return true; +} + +bool HexagonGenWideningVecInstr::isSaturatingVAsr(Instruction *Inst, Value *S, + int MinV, int MaxV, + bool &IsResSigned) { + if (MinV >= MaxV) + return false; + + IsResSigned = true; + Type *InstTy = Inst->getType(); + Type *EltTy = cast(InstTy)->getElementType(); + unsigned TruncSize = EltTy->getPrimitiveSizeInBits(); + + int MaxRange, MinRange; + if (MinV < 0) { // Saturate to a signed value + MaxRange = (1 << (TruncSize - 1)) - 1; + MinRange = -(1 << (TruncSize - 1)); + } else if (MinV == 0) { // Saturate to an unsigned value + MaxRange = (1 << (TruncSize)) - 1; + MinRange = 0; + IsResSigned = false; + } else + return false; + + if (MinV != MinRange || MaxV != MaxRange) + return false; + + auto *SInst = dyn_cast(S); + if (SInst->getOpcode() == Instruction::AShr) { + Type *SInstTy = SInst->getType(); + Type *SEltTy = cast(SInstTy)->getElementType(); + unsigned SInstEltSize = SEltTy->getPrimitiveSizeInBits(); + if (SInstEltSize != 2 * TruncSize || TruncSize > 16) + return false; + } + return true; +} + +Intrinsic::ID HexagonGenWideningVecInstr::getVAsrIntrinsic(bool IsInSigned, + bool IsResSigned) { + if (!IsResSigned) + return (IsInSigned) ? Intrinsic::hexagon_vasrsat_su + : Intrinsic::hexagon_vasrsat_uu; + return Intrinsic::hexagon_vasrsat_ss; +} + +Value *HexagonGenWideningVecInstr::createVAsrIntrinsic(Instruction *Inst, + Value *VecOP, + Value *ShiftByVal, + bool IsResSigned) { + IRBuilder<> IRB(Inst); + Type *ShiftByTy = ShiftByVal->getType(); + if (ShiftByTy->getPrimitiveSizeInBits() < 32) + ShiftByVal = extendShiftByVal(ShiftByVal, IRB); + + Type *InstTy = Inst->getType(); + Type *EltTy = cast(InstTy)->getElementType(); + unsigned NumElts = cast(InstTy)->getNumElements(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + + // Replace the instruction with saturating vasr intrinsic. + // Since vasr with saturation interleaves elements from both input vectors, + // they must be deinterleaved for output to end up in the right order. + SmallVector ShuffleMask; + unsigned HalfElts = NumElts / 2; + // Even elements + for (unsigned i = 0; i < HalfElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i * 2)); + // Odd elements + for (unsigned i = 0; i < HalfElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i * 2 + 1)); + + VecOP = IRB.CreateShuffleVector(VecOP, PoisonValue::get(VecOP->getType()), + ConstantVector::get(ShuffleMask)); + + auto *InVecOPTy = + FixedVectorType::get(getElementTy(InstEltSize * 2, IRB), HalfElts); + std::pair HiLo = opSplit(VecOP, Inst, InVecOPTy); + Intrinsic::ID IntID = getVAsrIntrinsic(true, IsResSigned); + Function *F = Intrinsic::getOrInsertDeclaration(M, IntID, InVecOPTy); + Value *NewIn = IRB.CreateCall(F, {HiLo.first, HiLo.second, ShiftByVal}); + return IRB.CreateBitCast(NewIn, InstTy); +} + +// Generate vavg instruction. +bool HexagonGenWideningVecInstr::genVAvg(Instruction *Inst) { + using namespace PatternMatch; + Type *InstTy = Inst->getType(); + assert(InstTy->isVectorTy()); + + bool Match = false; + Value *OP1 = nullptr, *OP2 = nullptr; + bool IsSigned; + if ((Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_ZExt(m_Value(OP1)), + m_ZExt(m_Value(OP2))), + m_SpecificInt(1))))))) + IsSigned = false; + if (!Match && + (Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_SExt(m_Value(OP1)), + m_SExt(m_Value(OP2))), + m_SpecificInt(1))))) || + match(Inst, m_LShr(m_Add(m_Value(OP1), m_Value(OP2)), + m_SpecificInt(1))))) + IsSigned = true; + + if (!Match) + return false; + + unsigned OP1EltSize = getElementSizeInBits(OP1); + unsigned OP2EltSize = getElementSizeInBits(OP2); + unsigned NewEltSize = std::max(OP1EltSize, OP2EltSize); + + Type *EltTy = cast(InstTy)->getElementType(); + unsigned InstEltSize = EltTy->getPrimitiveSizeInBits(); + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + + // Only vectors that are either smaller, same or twice of the hardware + // vector length are allowed. + if (InstEltSize < NewEltSize || (InstLen > 2 * HwVLen)) + return false; + + if ((InstLen > HwVLen) && (InstLen % HwVLen != 0)) + return false; + + IRBuilder<> IRB(Inst); + unsigned NumElts = cast(InstTy)->getNumElements(); + auto *AvgInstTy = + FixedVectorType::get(getElementTy(NewEltSize, IRB), NumElts); + if (OP1EltSize < NewEltSize) + OP1 = (IsSigned) ? IRB.CreateSExt(OP1, AvgInstTy) + : IRB.CreateZExt(OP1, AvgInstTy); + if (OP2EltSize < NewEltSize) + OP2 = (IsSigned) ? IRB.CreateSExt(OP2, AvgInstTy) + : IRB.CreateZExt(OP2, AvgInstTy); + + Intrinsic::ID AvgIntID = + (IsSigned) ? Intrinsic::hexagon_vavgs : Intrinsic::hexagon_vavgu; + Value *NewIn = nullptr; + + // Split operands if they need more than a vector length. + if (NewEltSize * NumElts > HwVLen) { + unsigned HalfElts = NumElts / 2; + auto *ResType = + FixedVectorType::get(getElementTy(NewEltSize, IRB), HalfElts); + std::pair SplitOP1 = opSplit(OP1, Inst, ResType); + std::pair SplitOP2 = opSplit(OP2, Inst, ResType); + Value *NewHi = createIntrinsic(AvgIntID, Inst, SplitOP1.first, + SplitOP2.first, ResType, NumElts, false); + Value *NewLo = createIntrinsic(AvgIntID, Inst, SplitOP1.second, + SplitOP2.second, ResType, NumElts, false); + SmallVector ShuffleMask; + for (unsigned i = 0; i < NumElts; ++i) + ShuffleMask.push_back(IRB.getInt32(i)); + // Concat Hi and Lo. + NewIn = + IRB.CreateShuffleVector(NewLo, NewHi, ConstantVector::get(ShuffleMask)); + } else + NewIn = + createIntrinsic(AvgIntID, Inst, OP1, OP2, AvgInstTy, NumElts, false); + + if (InstEltSize > NewEltSize) + // Extend the output to match the original instruction type. + NewIn = (IsSigned) ? IRB.CreateSExt(NewIn, InstTy) + : IRB.CreateZExt(NewIn, InstTy); + Inst->replaceAllUsesWith(NewIn); + return true; +} + +bool HexagonGenWideningVecInstr::visitBlock(BasicBlock *B) { + bool Changed = false; + for (auto &I : *B) { + Type *InstTy = I.getType(); + if (!InstTy->isVectorTy() || !HST->isTypeForHVX(cast(InstTy))) + continue; + + unsigned InstLen = InstTy->getPrimitiveSizeInBits(); + if (InstLen < HwVLen && !WidenShortVector) + continue; + + Changed |= processInstructionForVMPA(&I); + Changed |= genSaturatingInst(&I); + Changed |= genVAvg(&I); + } + // Generate widening instructions. + for (auto &I : *B) + Changed |= processInstruction(&I); + return Changed; +} + +bool HexagonGenWideningVecInstr::runOnFunction(Function &F) { + M = F.getParent(); + HST = TM->getSubtargetImpl(F); + + // Return if useHVX128BOps is not set. It can be enabled for 64B mode + // but wil require some changes. For example, bitcast for intrinsics + // assumes 128B mode. + if (skipFunction(F) || !HST->useHVX128BOps()) + return false; + + HwVLen = HST->getVectorLength() * 8; // Vector Length in bits + bool Changed = false; + for (auto &B : F) + Changed |= visitBlock(&B); + + return Changed; +} + +FunctionPass * +llvm::createHexagonGenWideningVecInstr(const HexagonTargetMachine &TM) { + return new HexagonGenWideningVecInstr(&TM); +} diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index cde8b5ba8d8a7..2d7e3c3ad87db 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -592,6 +592,7 @@ class HexagonTargetLowering : public TargetLowering { SDValue WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const; SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; SDValue WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; SDValue LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const; SDValue ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG) const; SDValue EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0b782d79237da..4bc8e741c56bf 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -470,6 +470,7 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::ANY_EXTEND, VecTy, Custom); setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom); setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, VecTy, Custom); if (Subtarget.useHVXFloatingPoint()) { setOperationAction(ISD::FP_TO_SINT, VecTy, Custom); setOperationAction(ISD::FP_TO_UINT, VecTy, Custom); @@ -3433,6 +3434,104 @@ HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { {SetCC, getZero(dl, MVT::i32, DAG)}); } +SDValue HexagonTargetLowering::WidenHvxIntrinsic(SDValue Op, + SelectionDAG &DAG) const { + const SDLoc &dl(Op); + unsigned HwWidth = 8 * Subtarget.getVectorLength(); + bool IsResInterleaved = false; + + SDValue WideRes = SDValue(); + SDValue Op1 = Op.getOperand(1); + MVT ResTy = ty(Op); + MVT OpTy = ty(Op1); + if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) + return SDValue(); + + auto getFactor = [HwWidth](MVT Ty) { + unsigned Width = Ty.getSizeInBits(); + assert(HwWidth % Width == 0); + return HwWidth / Width; + }; + + auto getWideTy = [getFactor](MVT Ty) { + unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); + return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); + }; + + unsigned IID = cast(Op.getOperand(0))->getZExtValue(); + SDValue Op2 = Op.getOperand(2); + SDValue WideOp1 = appendUndef(Op1, getWideTy(OpTy), DAG); + SDValue WideOp2; + if (dyn_cast(Op2.getNode())) { + WideOp2 = Op2; + } else { + WideOp2 = appendUndef(Op2, getWideTy(OpTy), DAG); + } + unsigned WidenFactor = getFactor(OpTy); + unsigned WideLen = ResTy.getVectorNumElements() * WidenFactor; + MVT WideResTy = MVT::getVectorVT(ResTy.getVectorElementType(), WideLen); + + switch (IID) { + default: + return SDValue(); + case Intrinsic::hexagon_vasrsat_su: + case Intrinsic::hexagon_vasrsat_uu: + case Intrinsic::hexagon_vasrsat_ss: + WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy, + DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2, + Op.getOperand(3)); + break; + case Intrinsic::hexagon_vadd_su: + case Intrinsic::hexagon_vadd_uu: + case Intrinsic::hexagon_vadd_ss: + case Intrinsic::hexagon_vadd_us: + + case Intrinsic::hexagon_vsub_su: + case Intrinsic::hexagon_vsub_uu: + case Intrinsic::hexagon_vsub_ss: + case Intrinsic::hexagon_vsub_us: + + case Intrinsic::hexagon_vmpy_su: + case Intrinsic::hexagon_vmpy_uu: + case Intrinsic::hexagon_vmpy_ss: + case Intrinsic::hexagon_vmpy_us: + case Intrinsic::hexagon_vmpy_ub_ub: + case Intrinsic::hexagon_vmpy_ub_b: + case Intrinsic::hexagon_vmpy_uh_uh: + case Intrinsic::hexagon_vmpy_h_h: + IsResInterleaved = true; + WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy, + DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2); + break; + case Intrinsic::hexagon_vavgu: + case Intrinsic::hexagon_vavgs: + WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy, + DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2); + break; + } + unsigned OrigLen = ResTy.getVectorNumElements(); + assert(OrigLen % 2 == 0); + unsigned HalfOrigLen = OrigLen / 2; + unsigned SplitLen = WideLen / 2; + if (IsResInterleaved) { + // Get the valid odd and even elements from the widened vector-pair while + // maintaining their deinterleaved order. The following shuffle_vector will + // produce a vector-pair with all the valid elements (even followed by odd) + // accumulated together followed by undefs. + SmallVector ShuffV; + for (unsigned j = 0; j < WidenFactor; j++) { + for (unsigned i = 0; i < HalfOrigLen; i++) + ShuffV.push_back(j * HalfOrigLen + i); + for (unsigned i = 0; i < HalfOrigLen; i++) + ShuffV.push_back(SplitLen + j * HalfOrigLen + i); + } + WideRes = DAG.getVectorShuffle(WideResTy, dl, WideRes, + DAG.getUNDEF(WideResTy), ShuffV); + } + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, + {WideRes, getZero(dl, MVT::i32, DAG)}); +} + SDValue HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); @@ -3699,6 +3798,12 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, Results.push_back(S); } break; + case ISD::INTRINSIC_WO_CHAIN: + if (shouldWidenToHvx(ty(Op.getOperand(1)), DAG)) { + if (SDValue T = WidenHvxIntrinsic(Op, DAG)) + Results.push_back(T); + } + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::FP_TO_SINT: @@ -3759,6 +3864,11 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, Results.push_back(C); } break; + case ISD::INTRINSIC_WO_CHAIN: + assert(shouldWidenToHvx(ty(N->getOperand(1)), DAG) && "Not widening?"); + if (SDValue T = WidenHvxIntrinsic(Op, DAG)) + Results.push_back(T); + break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) { diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index 25b81d8cd21ff..7f16c3e231d09 100644 --- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -355,6 +355,120 @@ defm : T_VVI_inv_pat ; defm : T_VVI_inv_pat ; defm : T_VVR_pat ; + +class VAccGenIntrin_pat + : Pat<(add WPred:$Vx, (ResType (IntID VPred:$Vs, VPred:$Vt))), + (MI WPred:$Vx, VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>; + +let AddedComplexity = 100 in { + def : VAccGenIntrin_pat; + def : VAccGenIntrin_pat; + def : VAccGenIntrin_pat; + def : VAccGenIntrin_pat; + + // The second operand in V6_vmpybusv_acc is unsigned. + def : Pat<(add HWI16:$Vx, (VecPI16 (int_hexagon_vmpy_us HVI8:$Vs, + HVI8:$Vv))), + (V6_vmpybusv_acc HWI16:$Vx, HVI8:$Vs, HVI8:$Vv)>; + + def : Pat<(add HWI16:$Vx, (VecPI16 (int_hexagon_vmpy_su HVI8:$Vs, + HVI8:$Vv))), + (V6_vmpybusv_acc HWI16:$Vx, HVI8:$Vv, HVI8:$Vs)>; + + // The third operand in V6_vmpyhus_acc is unsigned. + def : Pat<(add HWI32:$Vx, (VecPI32 (int_hexagon_vmpy_us HVI16:$Vs, + HVI16:$Vv))), + (V6_vmpyhus_acc HWI32:$Vx, HVI16:$Vv, HVI16:$Vs)>; + + def : Pat<(add HWI32:$Vx, (VecPI32 (int_hexagon_vmpy_su HVI16:$Vs, + HVI16:$Vv))), + (V6_vmpyhus_acc HWI32:$Vx, HVI16:$Vs, HVI16:$Vv)>; +} + +class ExtIntrin_pat + : Pat<(ResType (IntID VPred:$Vs, VPred:$Vt)), + (MI VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>; + +def : ExtIntrin_pat; +def : ExtIntrin_pat; +def : ExtIntrin_pat; + +def : ExtIntrin_pat; +def : ExtIntrin_pat; +def : ExtIntrin_pat; + +def : ExtIntrin_pat; +def : ExtIntrin_pat; +def : ExtIntrin_pat; +def : ExtIntrin_pat; + +// The first operand in V6_vmpybusv is unsigned. +def : Pat<(VecPI16 (int_hexagon_vmpy_us HVI8:$Vs, HVI8:$Vv)), + (V6_vmpybusv HVI8:$Vs, HVI8:$Vv)>; + +def : Pat<(VecPI16 (int_hexagon_vmpy_su HVI8:$Vs, HVI8:$Vv)), + (V6_vmpybusv HVI8:$Vv, HVI8:$Vs)>; + +// The second operand in V6_vmpyhus is unsigned. +def : Pat<(VecPI32 (int_hexagon_vmpy_us HVI16:$Vs, HVI16:$Vv)), + (V6_vmpyhus HVI16:$Vv, HVI16:$Vs)>; + +def : Pat<(VecPI32 (int_hexagon_vmpy_su HVI16:$Vs, HVI16:$Vv)), + (V6_vmpyhus HVI16:$Vs, HVI16:$Vv)>; + +class VAvgInstr_pat + : Pat<(ResType (IntID VPred:$Vs, VPred:$Vt)), + (MI VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>; + +def : VAvgInstr_pat; +def : VAvgInstr_pat; +def : VAvgInstr_pat; +def : VAvgInstr_pat; +def : VAvgInstr_pat; +def : VAvgInstr_pat; + +class VAsrIntr_pat +: Pat<(ResType (IntID VPred:$Vs, VPred:$Vt, IntRegsLow8:$Rt)), + (MI VPred:$Vs, VPred:$Vt, IntRegsLow8:$Rt)>, Requires<[UseHVX128B]>; + +def : VAsrIntr_pat; +def : VAsrIntr_pat; +def : VAsrIntr_pat; +def : VAsrIntr_pat; +def : VAsrIntr_pat; +def : VAsrIntr_pat; + +class VMpyVSInstr_pat +: Pat<(ResType (IntID VPred:$Vs, IntRegs:$Rt)), + (MI VPred:$Vs, IntRegs:$Rt)>, Requires<[UseHVX128B]>; + +def : VMpyVSInstr_pat; +def : VMpyVSInstr_pat; +def : VMpyVSInstr_pat; +def : VMpyVSInstr_pat; + +class VAccIntrin_pat + : Pat<(add HvxWR:$Vx, (IntID HvxVR:$Vs, HvxVR:$Vt)), + (MI HvxWR:$Vx, HvxVR:$Vs, HvxVR:$Vt)>, Requires<[UseHVX128B]>; + +let AddedComplexity = 350 in { + def : VAccIntrin_pat; + def : VAccIntrin_pat; + def : VAccIntrin_pat; + def : VAccIntrin_pat; + def : VAccIntrin_pat; + def : VAccIntrin_pat; +} + def: Pat<(int_hexagon_V6_vd0), (V6_vd0)>, Requires<[UseHVXV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vd0_128B ), diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 5a1d5bc669169..c68b63205fbbf 100644 --- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -138,7 +138,7 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, return false; // Make sure that the (unique) def operand is a register from IntRegs. - bool HadDef = false; + [[maybe_unused]] bool HadDef = false; for (const MachineOperand &Op : II->operands()) { if (!Op.isReg() || !Op.isDef()) continue; diff --git a/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp b/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp new file mode 100644 index 0000000000000..fcfae1776ecec --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp @@ -0,0 +1,713 @@ +//===---------------------- HexagonOptShuffleVector.cpp -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Optimize vector shuffles by postponing them as late as possible. The intent +// here is to remove uncessary shuffles and also increases the oportunities for +// adjacent shuffles to be merged together. +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "hex-shuff-vec" +/// A command line argument to limit the search space along def chain. +static cl::opt MaxDefSearchCount( + "shuffvec-max-search-count", + cl::desc("Maximum number of instructions traversed along def chain."), + cl::Hidden, cl::init(15)); + +#ifndef NDEBUG +static cl::opt + ShuffVecLimit("shuff-vec-max", + cl::desc("Maximum number of shuffles to be relocated."), + cl::Hidden, cl::init(-1)); +#endif + +namespace llvm { +void initializeHexagonOptShuffleVectorPass(PassRegistry &); +FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &); +} // end namespace llvm + +namespace { + +class HexagonOptShuffleVector : public FunctionPass { +public: + static char ID; +#ifndef NDEBUG + static int NumRelocated; +#endif + HexagonOptShuffleVector() : FunctionPass(ID) { + initializeHexagonOptShuffleVectorPass(*PassRegistry::getPassRegistry()); + } + + HexagonOptShuffleVector(const HexagonTargetMachine *TM) + : FunctionPass(ID), TM(TM) { + initializeHexagonOptShuffleVectorPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Hexagon Optimize Vector Shuffles"; + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + } + +private: + using ValueVector = SmallVector; + const HexagonTargetMachine *TM = nullptr; + const HexagonSubtarget *HST = nullptr; + SmallPtrSet Visited; + using ShuffUseList = + SmallDenseMap>; + ShuffUseList ShuffUses; + int DefSearchCount; + + bool visitBlock(BasicBlock *B); + bool findNewShuffLoc(Instruction *I, ArrayRef &ShuffMask, + Value *&NewLoc); + bool isValidIntrinsic(IntrinsicInst *I); + bool relocateShuffVec(Instruction *I, ArrayRef &M, Value *NewLoc, + std::list &WorkList); + bool getUseList(Instruction *I, ValueVector &UseList); + bool analyzeHiLoUse(Instruction *HI, Instruction *LO, + ArrayRef &ShuffMask, Value *&NewLoc, + ShuffUseList &CurShuffUses); + bool isHILo(Value *V, bool IsHI); + bool hasDefWithSameShuffMask(Value *V, SmallVector &ImmUse, + ArrayRef &ShuffMask, + ShuffUseList &CurShuffUses); + void FindHiLoUse(ValueVector &UseList, Instruction *&HI, Instruction *&LO); + bool isConcatMask(ArrayRef &Mask, Instruction *ShuffInst); + bool isValidUseInstr(ValueVector &UseList, Instruction *&UI); + bool areAllOperandsValid(Instruction *I, Instruction *UI, + ArrayRef &ShuffMask, + ShuffUseList &CurShuffUses); + Value *getOperand(Instruction *I, unsigned i); + static iterator_range getArgOperands(User *U); + static std::pair stripCasts(Value *V); + static bool isConstantVectorSplat(Value *V); +}; + +} // end anonymous namespace + +#ifndef NDEBUG +int HexagonOptShuffleVector::NumRelocated = 0; +#endif +char HexagonOptShuffleVector::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonOptShuffleVector, "shuff-vec", + "Hexagon Optimize Shuffle Vector", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonOptShuffleVector, "shuff-vec", + "Hexagon Optimize Shuffle Vector", false, false) + +bool HexagonOptShuffleVector::isConcatMask(ArrayRef &Mask, + Instruction *ShuffInst) { + Type *ShuffTy = ShuffInst->getType(); + int NumElts = cast(ShuffTy)->getNumElements(); + for (int i = 0; i < NumElts; i++) { + if (Mask[i] != i) + return false; + } + return true; +} + +bool HexagonOptShuffleVector::isValidIntrinsic(IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + default: + return false; + case Intrinsic::hexagon_V6_vaddubh_128B: + case Intrinsic::hexagon_V6_vadduhw_128B: + case Intrinsic::hexagon_V6_vaddhw_128B: + case Intrinsic::hexagon_V6_vaddh_dv_128B: + case Intrinsic::hexagon_V6_vsububh_128B: + case Intrinsic::hexagon_V6_vsubuhw_128B: + case Intrinsic::hexagon_V6_vsubhw_128B: + case Intrinsic::hexagon_V6_vsubh_dv_128B: + case Intrinsic::hexagon_V6_vmpyubv_128B: + case Intrinsic::hexagon_V6_vmpybv_128B: + case Intrinsic::hexagon_V6_vmpyuhv_128B: + case Intrinsic::hexagon_V6_vmpyhv_128B: + case Intrinsic::hexagon_V6_vmpybusv_128B: + case Intrinsic::hexagon_V6_vmpyhus_128B: + case Intrinsic::hexagon_V6_vavgb_128B: + case Intrinsic::hexagon_V6_vavgub_128B: + case Intrinsic::hexagon_V6_vavgh_128B: + case Intrinsic::hexagon_V6_vavguh_128B: + case Intrinsic::hexagon_V6_vavgw_128B: + case Intrinsic::hexagon_V6_vavguw_128B: + case Intrinsic::hexagon_V6_hi_128B: + case Intrinsic::hexagon_V6_lo_128B: + case Intrinsic::sadd_sat: + case Intrinsic::uadd_sat: + // Generic hexagon vector intrinsics + case Intrinsic::hexagon_vadd_su: + case Intrinsic::hexagon_vadd_uu: + case Intrinsic::hexagon_vadd_ss: + case Intrinsic::hexagon_vadd_us: + case Intrinsic::hexagon_vsub_su: + case Intrinsic::hexagon_vsub_uu: + case Intrinsic::hexagon_vsub_ss: + case Intrinsic::hexagon_vsub_us: + case Intrinsic::hexagon_vmpy_su: + case Intrinsic::hexagon_vmpy_uu: + case Intrinsic::hexagon_vmpy_ss: + case Intrinsic::hexagon_vmpy_us: + case Intrinsic::hexagon_vavgu: + case Intrinsic::hexagon_vavgs: + case Intrinsic::hexagon_vmpy_ub_b: + case Intrinsic::hexagon_vmpy_ub_ub: + case Intrinsic::hexagon_vmpy_uh_uh: + case Intrinsic::hexagon_vmpy_h_h: + return true; + } + llvm_unreachable("Unsupported instruction!"); +} + +bool HexagonOptShuffleVector::getUseList(Instruction *I, ValueVector &UseList) { + for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) { + Instruction *J = dyn_cast(*UI); + if (!J) + return false; + if (auto *C = dyn_cast(*UI)) { + if (!getUseList(C, UseList)) + return false; + } else + UseList.push_back(*UI); + ++UI; + } + return true; +} + +bool HexagonOptShuffleVector::isHILo(Value *V, bool IsHI) { + if (!(dyn_cast(V))) + return false; + Instruction *I = dyn_cast(V); + if (!isa(I)) + return false; + IntrinsicInst *II = dyn_cast(I); + if (!II) + return false; + if ((II->getIntrinsicID() == Intrinsic::hexagon_V6_hi_128B && IsHI) || + (II->getIntrinsicID() == Intrinsic::hexagon_V6_lo_128B && !IsHI)) + return true; + return false; +} + +Value *HexagonOptShuffleVector::getOperand(Instruction *I, unsigned i) { + Value *V = I->getOperand(i); + if (auto *C = dyn_cast(V)) + return C->getOperand(0); + return V; +} + +iterator_range +HexagonOptShuffleVector::getArgOperands(User *U) { + if (auto *CB = dyn_cast(U)) + return CB->args(); + return U->operands(); +} + +// Strip out all the cast operations to find the first non-cast definition of a +// value. The function also returns the last cast operation in the def-chain. +std::pair HexagonOptShuffleVector::stripCasts(Value *V) { + Value *LastCast = nullptr; + while (auto *C = dyn_cast(V)) { + LastCast = V; + V = C->getOperand(0); + } + return std::make_pair(V, LastCast); +} + +bool HexagonOptShuffleVector::isConstantVectorSplat(Value *V) { + if (auto *CV = dyn_cast(V)) + return CV->getSplatValue(); + if (auto *CV = dyn_cast(V)) + return CV->isSplat(); + return false; +} + +// Make sure all the operations on HI and LO counterparts are identical +// until both halves are merged together. When a merge point (concat) +// is found, set it as 'NewLoc' and return. +bool HexagonOptShuffleVector::analyzeHiLoUse(Instruction *HI, Instruction *LO, + ArrayRef &ShuffMask, + Value *&NewLoc, + ShuffUseList &CurShuffUses) { + ValueVector HiUseList, LoUseList; + getUseList(HI, HiUseList); + getUseList(LO, LoUseList); + + // To keep the analsis simple, only handle Hi and Lo with a single use. Also, + // not even sure at this point if it will be profitable due to multiple + // merge points. + if (HiUseList.size() != 1 || LoUseList.size() != 1) + return false; + + Instruction *HiUse = dyn_cast(HiUseList[0]); + Instruction *LoUse = dyn_cast(LoUseList[0]); + if (!HiUse || !LoUse) + return false; + + bool IsUseIntrinsic = false; + if (isa(HiUse)) { + if (!isa(LoUse)) + return false; + // Continue only if both Hi and Lo uses are calls to the same intrinsic. + IntrinsicInst *HiUseII = dyn_cast(HiUse); + IntrinsicInst *LoUseII = dyn_cast(LoUse); + if (!HiUseII || !LoUseII || + HiUseII->getIntrinsicID() != LoUseII->getIntrinsicID() || + !isValidIntrinsic(HiUseII)) + return false; + IsUseIntrinsic = true; + HiUse = HiUseII; + LoUse = LoUseII; + } + if (HiUse->getOpcode() != LoUse->getOpcode()) + return false; + + // If both Hi and Lo use are same and is a concat operation, set it + // as a 'NewLoc'. + if (HiUse == LoUse) { + // Return true if use is a concat of Hi and Lo. + ArrayRef M; + if (match(HiUse, (m_Shuffle(m_Value(), m_Value(), m_Mask(M))))) { + if (isConcatMask(M, HiUse)) { + NewLoc = HiUse; + return true; + } + } + return false; + } + + // Check if HiUse and LoUse are shuffles with the same mask. If so, safe to + // continue the search. + ArrayRef M1, M2; + if (match(HiUse, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M1)))) && + match(LoUse, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M2)))) && + M1.equals(M2)) + return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses); + + // For now, only handling binary ops and some of the instrinsics + // which appear to be safe (hardcoded in isValidIntrinsic()). + if (!HiUse->isBinaryOp() && !IsUseIntrinsic) + return false; + + ValueVector HiUseOperands, LoUseOperands; + int HiOpNum = -1, LoOpNum = -1; + for (unsigned i = 0; i < HiUse->getNumOperands(); i++) { + Value *V = getOperand(HiUse, i); + if (V == HI) + HiOpNum = i; + else + HiUseOperands.push_back(V); + } + for (unsigned i = 0; i < LoUse->getNumOperands(); i++) { + Value *V = getOperand(LoUse, i); + if (V == LO) + LoOpNum = i; + else + LoUseOperands.push_back(V); + } + + // Enforcing strict ordering which is not necessary in case of + // commutative operations and may be relaxed in future if needed. + if (HiOpNum < 0 || HiOpNum != LoOpNum || + LoUseOperands.size() != HiUseOperands.size()) + return false; + + unsigned NumOperands = HiUseOperands.size(); + for (unsigned i = 0; i < NumOperands; i++) { + if (HiUseOperands[i] == LoUseOperands[i]) + continue; + // Only handle the case where other operands to Hi and Lo uses + // are comming from another Hi and Lo pair. + if (!isHILo(HiUseOperands[i], true) || !isHILo(LoUseOperands[i], false)) + return false; + + Value *DefHiUse = dyn_cast(HiUseOperands[i])->getOperand(0); + Value *DefLoUse = dyn_cast(LoUseOperands[i])->getOperand(0); + if (!DefHiUse || DefHiUse != DefLoUse) + return false; + SmallVector ImmUseList; + if (dyn_cast(DefHiUse)) + ImmUseList.push_back(dyn_cast(DefHiUse)); + else { + ImmUseList.push_back(HiUse); + ImmUseList.push_back(LoUse); + } + + // Make sure that the Hi/Lo def has the same shuffle mask. + if (!hasDefWithSameShuffMask(DefHiUse, ImmUseList, ShuffMask, CurShuffUses)) + return false; + } + + // Continue the search along Hi/Lo use-chain. + return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses); +} + +bool HexagonOptShuffleVector::hasDefWithSameShuffMask( + Value *V, SmallVector &ImmUses, ArrayRef &ShuffMask, + ShuffUseList &CurShuffUses) { + // Follow def-chain until we have found a shuffle_vector or have run out + // of max number of attempts. + if (DefSearchCount >= MaxDefSearchCount) + return false; + + ++DefSearchCount; + V = stripCasts(V).first; + Instruction *I = dyn_cast(V); + if (!I) + return false; + bool Found = true; + ArrayRef M; + if (match(V, (m_Shuffle(m_Value(), m_Value(), m_Mask(M)))) && + M.equals(ShuffMask)) { + CurShuffUses[I] = ImmUses; + return true; + } + if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()), + m_Poison(), m_ZeroMask())))) + return true; // scalar converted to a vector + + auto *II = dyn_cast(I); + if (!I->isBinaryOp() && (!II || !isValidIntrinsic(II))) + return false; + + for (Value *OpV : getArgOperands(I)) { + std::pair P = stripCasts(OpV); + OpV = P.first; + + SmallVector ImmUseList; + if (P.second) + ImmUseList.push_back(dyn_cast(P.second)); + else + ImmUseList.push_back(dyn_cast(I)); + + if (isa(OpV)) + continue; + if (isConstantVectorSplat(OpV)) + continue; + if (!dyn_cast(OpV)) + return false; + if ((match(OpV, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()), + m_Poison(), m_ZeroMask())))) + continue; + Found &= hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses); + } + return Found; +} + +void HexagonOptShuffleVector::FindHiLoUse(ValueVector &UseList, + Instruction *&HI, Instruction *&LO) { + + for (unsigned i = 0; i < UseList.size(); i++) { + auto *J = dyn_cast(UseList[i]); + auto *CI = dyn_cast(J); + if (CI) { + auto *II = dyn_cast(CI); + if (II) { + Intrinsic::ID IntID = II->getIntrinsicID(); + if (IntID == Intrinsic::hexagon_V6_hi_128B) + HI = J; + if (IntID == Intrinsic::hexagon_V6_lo_128B) + LO = J; + } + } + } +} + +bool HexagonOptShuffleVector::isValidUseInstr(ValueVector &UseList, + Instruction *&UI) { + // Don't allow multiple uses. Only done in case of a Hi/Lo pair. + if (UseList.size() != 1) + return false; + UI = dyn_cast(UseList[0]); + if (!UI) + return false; + // Should be either a binary op or one of the supported instrinsics. + if (auto *CI = dyn_cast(UI)) { + auto *II = dyn_cast(CI); + if (!II || !isValidIntrinsic(II)) + return false; + UI = II; + } else if (!UI->isBinaryOp()) + return false; + return true; +} + +// Check all the operands of 'Use' to make sure that they are either: +// 1) a constant +// 2) a scalar +// 3) a constant vector +// 4) a vector using the same mask as I +bool HexagonOptShuffleVector::areAllOperandsValid(Instruction *I, + Instruction *Use, + ArrayRef &ShuffMask, + ShuffUseList &CurShuffUses) { + bool AllOperandsOK = true; + for (Value *OpV : getArgOperands(Use)) { + bool HasOneUse = OpV->hasOneUse(); + std::pair P = stripCasts(OpV); + OpV = P.first; + + SmallVector ImmUseList; + if (P.second) + ImmUseList.push_back(dyn_cast(P.second)); + else + ImmUseList.push_back(dyn_cast(Use)); + + if (OpV == I || isa(OpV)) + continue; + if (isConstantVectorSplat(OpV)) + continue; + if (!dyn_cast(OpV) || !HasOneUse) + return false; + + if ((match(OpV, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()), + m_Poison(), m_ZeroMask())))) + continue; + AllOperandsOK &= + hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses); + } + return AllOperandsOK; +} + +// Find the new location where it's safe to relocate shuffle instruction 'I'. +bool HexagonOptShuffleVector::findNewShuffLoc(Instruction *I, + ArrayRef &ShuffMask, + Value *&NewLoc) { + DefSearchCount = 0; + ValueVector UseList; + if (!getUseList(I, UseList)) + return false; + + using ShuffUseList = + SmallDenseMap>; + ShuffUseList CurShuffUses; + // Check for Hi and Lo pair. + Instruction *HI = nullptr, *LO = nullptr; + FindHiLoUse(UseList, HI, LO); + if (UseList.size() == 2 && HI && LO) { + // If 'I' has Hi and Lo use-pair, then it can be relocated only after Hi/Lo + // use-chain's merge point, i.e., after a concat vector provided it's safe + // to do so. + LLVM_DEBUG({ + dbgs() << "\tFollowing the Hi/LO pair :\n"; + dbgs() << "\t\tHI - "; + HI->dump(); + dbgs() << "\t\tLO - "; + LO->dump(); + }); + if (!analyzeHiLoUse(HI, LO, ShuffMask, NewLoc, CurShuffUses)) + return false; + for (auto &it : CurShuffUses) + ShuffUses[it.first] = it.second; + return true; + } else { // Single use case + Instruction *UI = nullptr; + if (!isValidUseInstr(UseList, UI)) + return false; + assert(UI && "Expected a valid use, but found none!!"); + + if (HI || LO) { + // If the single use case is either Hi or Lo, it is not safe to relocate + return false; + } + + LLVM_DEBUG(dbgs() << "\tChecking operands in 'use' : \n\t\t"; UI->dump()); + if (!areAllOperandsValid(I, UI, ShuffMask, CurShuffUses)) { + LLVM_DEBUG(dbgs() << "\t\tNOT SAFE -- Exiting!!\n"); + return false; + } + for (auto &it : CurShuffUses) + ShuffUses[it.first] = it.second; + NewLoc = UI; + // Keep looking for the new location until can't proceed any longer. + findNewShuffLoc(UI, ShuffMask, NewLoc); + } + return true; +} + +// Move shuffle instruction 'I' after 'NewLoc'. +bool HexagonOptShuffleVector::relocateShuffVec( + Instruction *I, ArrayRef &M, Value *NewLoc, + std::list &WorkList) { + // Remove original vector shuffles at the input operands. + // However, it can be done only if the replacements have the + // same number of vector elements as the original operands. + std::map InstrMap; + bool CanReplace = true; + unsigned ShuffInstCount = ShuffUses.size(); + for (auto &it : ShuffUses) { + Instruction *J = it.first; + Visited.insert(J); + Value *ShuffleOP = nullptr; + match(J, (m_Shuffle(m_Value(ShuffleOP), m_Poison(), m_Mask(M)))); + VectorType *JTy = cast(J->getType()); + VectorType *ShuffTy = cast(ShuffleOP->getType()); + if (JTy->getElementCount() != ShuffTy->getElementCount()) + CanReplace = false; + + // Relocate shufflevector after a wider instruction only if there are + // at least two or more shufflevectors being relocated in order for the + // relocation to be profitable as otherwise it will require more shuffles. + VectorType *NewShuffTy = cast(NewLoc->getType()); + if (ShuffInstCount == 1 && + NewShuffTy->getElementType() > ShuffTy->getElementType()) + CanReplace = false; + InstrMap[J] = ShuffleOP; + } + if (!CanReplace) { + LLVM_DEBUG(dbgs() << "\tRelocation FAILED!! \n"); + return false; + } + for (auto IM : InstrMap) { + Instruction *J = IM.first; + assert(ShuffUses.count(J)); + SmallVector Uses = ShuffUses[J]; + if (Uses.size() > 0) { + for (auto *U : Uses) + U->replaceUsesOfWith(IM.first, IM.second); + } else + // This is the shuffle we started with, and we have already made sure + // that it has either single use or a HI/LO use pair. So, it's okay + // to replace all its uses with the input to the shuffle instruction. + IM.first->replaceAllUsesWith(IM.second); + } + // Shuffle the output of NewLoc based on the original mask. + Instruction *Pos = dyn_cast(NewLoc); + assert(Pos); + Pos = Pos->getNextNode(); + IRBuilder<> IRB(Pos); + Value *NewShuffV = + IRB.CreateShuffleVector(NewLoc, PoisonValue::get(NewLoc->getType()), M); + Instruction *NewInst = dyn_cast(NewShuffV); + if (!NewInst) { + LLVM_DEBUG(dbgs() << "\tRelocation FAILED!! \n"); + return false; + } + for (auto UI = NewLoc->user_begin(), UE = NewLoc->user_end(); UI != UE;) { + Use &TheUse = UI.getUse(); + ++UI; + Instruction *J = dyn_cast(TheUse.getUser()); + if (J && TheUse.getUser() != NewShuffV) + J->replaceUsesOfWith(NewLoc, NewShuffV); + } + WorkList.push_back(NewInst); + LLVM_DEBUG(dbgs() << "\tRelocation Successfull!! \n"); + LLVM_DEBUG(dbgs() << "\tAdded to Worklist :\n"; NewInst->dump()); + return true; +} + +bool HexagonOptShuffleVector::visitBlock(BasicBlock *B) { + bool Changed = false; + ArrayRef M; + std::list WorkList; + LLVM_DEBUG(dbgs() << "Preparing worklist for BB:\n"); + LLVM_DEBUG(B->dump()); + for (auto &I : *B) { + if (match(&I, (m_Shuffle(m_Value(), m_Value(), m_ZeroMask())))) + continue; // Skip - building vector from a scalar + if (match(&I, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M))))) { + WorkList.push_back(&I); + LLVM_DEBUG(dbgs() << "\tAdded instr - "; I.dump()); + } + } + + LLVM_DEBUG(dbgs() << "Processing worklist:\n"); + while (!WorkList.empty()) { +#ifndef NDEBUG + int Limit = ShuffVecLimit; + if (Limit >= 0) { + if (NumRelocated >= ShuffVecLimit) { + LLVM_DEBUG({ + dbgs() << "Reached maximum limit!! \n"; + dbgs() << "Can't process any more shuffles.... \n"; + }); + return Changed; + } + } +#endif + Instruction *I = WorkList.front(); + WorkList.pop_front(); + LLVM_DEBUG(dbgs() << "\tProcessing instr - "; I->dump()); + Value *NewLoc = nullptr; + + // 'ShuffUses' is used to keep track of the vector shuffles that need to + // be relocated along with their immediate uses that are known to satisfy + // all the safety requirements of the relocation. + // NOTE: The shuffle instr 'I', where the analysis starts, doesn't have + // its immediate uses set in 'ShuffUses'. This can be done but isn't + // necessary. At this point, only shuffles with single use or a HI/LO pair + // are allowed. This is done mostly because those with the multiple uses + // aren't expected to be much profitable and can be extended in the future + // if necessary. For now, all the uses in such cases can be safely updated + // when the corresponding vector shuffle is relocated. + + ShuffUses.clear(); + ShuffUses[I] = SmallVector(); + // Skip if node already visited. + if (!Visited.insert(I).second) { + LLVM_DEBUG(dbgs() << "\t\tSKIPPING - Already visited ...\n"); + continue; + } + if (!match(I, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M))))) { + LLVM_DEBUG(dbgs() << "\t\tSKIPPING - Not a vector shuffle ...\n"); + continue; + } + if (!findNewShuffLoc(I, M, NewLoc) || !NewLoc) { + LLVM_DEBUG(dbgs() << "\t\tSKIPPING - NewLoc not found ...\n"); + continue; + } + LLVM_DEBUG(dbgs() << "\t\tRelocating after -- "; NewLoc->dump()); + Changed |= relocateShuffVec(I, M, NewLoc, WorkList); +#ifndef NDEBUG + NumRelocated++; +#endif + } + return Changed; +} + +bool HexagonOptShuffleVector::runOnFunction(Function &F) { + HST = TM->getSubtargetImpl(F); + // Works only for 128B mode but can be extended for 64B if needed. + if (skipFunction(F) || !HST->useHVX128BOps()) + return false; + + bool Changed = false; + for (auto &B : F) + Changed |= visitBlock(&B); + + return Changed; +} + +FunctionPass * +llvm::createHexagonOptShuffleVector(const HexagonTargetMachine &TM) { + return new HexagonOptShuffleVector(&TM); +} diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 4cb29e7f00317..674d19176a88b 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -261,6 +261,16 @@ let Predicates = [UseHVX] in { defm: NopCast_pat; } +let Predicates = [UseHVXV68] in { + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; +} + let Predicates = [UseHVX, UseHVXFloatingPoint] in { defm: NopCast_pat; defm: NopCast_pat; @@ -307,6 +317,8 @@ let Predicates = [UseHVX] in { (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>; def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index d9824a3154093..d98fe80f453ab 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -47,6 +47,14 @@ static cl::opt DisableHardwareLoops("disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); +static cl::opt + EnableGenWideningVec("hexagon-widening-vectors", cl::init(true), cl::Hidden, + cl::desc("Generate widening vector instructions")); + +static cl::opt + EnableOptShuffleVec("hexagon-opt-shuffvec", cl::init(true), cl::Hidden, + cl::desc("Enable optimization of shuffle vectors")); + static cl::opt DisableAModeOpt("disable-hexagon-amodeopt", cl::Hidden, cl::desc("Disable Hexagon Addressing Mode Optimization")); @@ -321,6 +329,8 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } void HexagonPassConfig::addIRPasses() { + HexagonTargetMachine &HTM = getHexagonTargetMachine(); + TargetPassConfig::addIRPasses(); bool NoOpt = (getOptLevel() == CodeGenOptLevel::None); @@ -350,6 +360,13 @@ void HexagonPassConfig::addIRPasses() { // Replace certain combinations of shifts and ands with extracts. if (EnableGenExtract) addPass(createHexagonGenExtract()); + if (EnableGenWideningVec) { + addPass(createHexagonGenWideningVecInstr(HTM)); + addPass(createHexagonGenWideningVecFloatInstr(HTM)); + addPass(createDeadCodeEliminationPass()); + } + if (EnableOptShuffleVec) + addPass(createHexagonOptShuffleVector(HTM)); } } diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 5c50ec2425b7c..ce5431758b1c7 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -368,8 +368,8 @@ class AlignVectors { const HexagonVectorCombine &HVC; }; -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const AlignVectors::AddrInfo &AI) { OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; OS << "Addr: " << *AI.Addr << '\n'; OS << "Type: " << *AI.ValTy << '\n'; @@ -379,8 +379,8 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { return OS; } -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const AlignVectors::MoveGroup &MG) { OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no"); OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n'; OS << "Main\n"; @@ -398,9 +398,8 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { return OS; } -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, - const AlignVectors::ByteSpan::Block &B) { +[[maybe_unused]] raw_ostream & +operator<<(raw_ostream &OS, const AlignVectors::ByteSpan::Block &B) { OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "; if (B.Seg.Val == reinterpret_cast(&B)) { OS << "(self:" << B.Seg.Val << ')'; @@ -412,8 +411,8 @@ raw_ostream &operator<<(raw_ostream &OS, return OS; } -[[maybe_unused]] -raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const AlignVectors::ByteSpan &BS) { OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; for (const AlignVectors::ByteSpan::Block &B : BS) OS << B << '\n'; @@ -2475,19 +2474,19 @@ Value *HvxIdioms::processVGather(Instruction &In) const { Dst->eraseFromParent(); } else if (Qual == HvxIdioms::LLVM_Scatter) { // Gather feeds directly into scatter. - LLVM_DEBUG({ - auto *DstInpTy = cast(Dst->getOperand(1)->getType()); - assert(DstInpTy && "Cannot handle no vector type for llvm.scatter"); - unsigned DstInpSize = HVC.getSizeOf(DstInpTy); - unsigned DstElements = HVC.length(DstInpTy); - auto *DstElemTy = cast(DstInpTy->getElementType()); - assert(DstElemTy && "llvm.scatter needs vector of ptr argument"); - dbgs() << " Gather feeds into scatter\n Values to scatter : " - << *Dst->getOperand(0) << "\n"; - dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements - << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy - << ") Access alignment(" << *Dst->getOperand(2) << ")\n"; - }); + auto *DstInpTy = cast(Dst->getOperand(1)->getType()); + assert(DstInpTy && "Cannot handle no vector type for llvm.scatter"); + [[maybe_unused]] unsigned DstInpSize = HVC.getSizeOf(DstInpTy); + [[maybe_unused]] unsigned DstElements = HVC.length(DstInpTy); + [[maybe_unused]] auto *DstElemTy = + cast(DstInpTy->getElementType()); + assert(DstElemTy && "llvm.scatter needs vector of ptr argument"); + LLVM_DEBUG(dbgs() << " Gather feeds into scatter\n Values to scatter : " + << *Dst->getOperand(0) << "\n"); + LLVM_DEBUG(dbgs() << " Dst type(" << *DstInpTy << ") elements(" + << DstElements << ") VecLen(" << DstInpSize << ") type(" + << *DstElemTy << ") Access alignment(" + << *Dst->getOperand(2) << ")\n"); // Address of source auto *Src = getPointer(IndexLoad); if (!Src) diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 2f59b7c0fdb15..10c350e0e2bae 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -67,6 +67,11 @@ void HexagonMCELFStreamer::emitInstruction(const MCInst &MCB, assert(MCB.getOpcode() == Hexagon::BUNDLE); assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE); assert(HexagonMCInstrInfo::bundleSize(MCB) > 0); + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + HexagonMCChecker Check(getContext(), *MCII, STI, const_cast(MCB), + *RI); + [[maybe_unused]] bool CheckOk = Check.check(false); + assert(CheckOk); // At this point, MCB is a bundle // Iterate through the bundle and assign addends for the instructions diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 51212837fbb17..1c311d5480883 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -681,6 +681,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); @@ -12657,7 +12658,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ROTL: return LowerROTL(Op, DAG); // For counter-based loop handling. - case ISD::INTRINSIC_W_CHAIN: return SDValue(); + case ISD::INTRINSIC_W_CHAIN: + return SDValue(); case ISD::BITCAST: return LowerBITCAST(Op, DAG); @@ -14568,6 +14570,46 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0) .addImm(0) .addReg(Ptr); + } else if (MI.getOpcode() == PPC::LWAT_PSEUDO || + MI.getOpcode() == PPC::LDAT_PSEUDO) { + DebugLoc DL = MI.getDebugLoc(); + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + Register ValReg = MI.getOperand(2).getReg(); + unsigned FC = MI.getOperand(3).getImm(); + bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO; + Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass); + if (IsLwat) + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64) + .addImm(0) + .addReg(ValReg) + .addImm(PPC::sub_32); + else + Val64 = ValReg; + + Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass); + Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r); + BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair) + .addReg(UndefG8r) + .addImm(PPC::sub_gp8_x0) + .addReg(Val64) + .addImm(PPC::sub_gp8_x1); + + Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass); + BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult) + .addReg(G8rPair) + .addReg(PtrReg) + .addImm(FC); + Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64) + .addReg(PairResult, 0, PPC::sub_gp8_x0); + if (IsLwat) + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(Result64, 0, PPC::sub_32); + else + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(Result64); } else { llvm_unreachable("Unexpected instr type to insert"); } diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index fdca5ebc854ba..620dfd4738226 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -327,12 +327,19 @@ def LQARXL : XForm_1<31, 276, (outs g8prc:$RST), (ins (memrr $RA, $RB):$addr), "lqarx $RST, $addr, 1", IIC_LdStLQARX, []>, isPPC64, isRecordForm; -let hasExtraDefRegAllocReq = 1 in -def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$RST), (ins g8rc:$RA, u5imm:$RB), +let hasExtraDefRegAllocReq = 1, mayStore = 1 in +def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB), "ldat $RST, $RA, $RB", IIC_LdStLoad>, isPPC64, - Requires<[IsISA3_0]>; + Requires<[IsISA3_0]>, + RegConstraint<"$RSTi = $RST">; } +def LDAT_PSEUDO : PPCCustomInserterPseudo< + (outs g8rc:$dst), + (ins ptr_rc_nor0:$ptr, g8rc:$val, u5imm:$fc), + "#LDAT_PSEUDO", + [(set i64:$dst, (int_ppc_amo_ldat ptr_rc_nor0:$ptr, g8rc:$val, timm:$fc))]>; + let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr), "stdcx. $RST, $addr", IIC_LdStSTDCX, []>, isRecordForm; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 3ecc58c04e378..fdccddd86b9b7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2121,14 +2121,21 @@ def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$RST), (ins (memrr $RA, $RB):$ad def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr), "lwarx $RST, $addr, 1", IIC_LdStLWARX, []>, isRecordForm; +} // The atomic instructions use the destination register as well as the next one // or two registers in order (modulo 31). -let hasExtraSrcRegAllocReq = 1 in -def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$RST), (ins gprc:$RA, u5imm:$RB), +let hasExtraSrcRegAllocReq = 1, mayLoad = 1, mayStore = 1 in +def LWAT : X_RD5_RS5_IM5<31, 582, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB), "lwat $RST, $RA, $RB", IIC_LdStLoad>, - Requires<[IsISA3_0]>; -} + Requires<[IsISA3_0]>, + RegConstraint<"$RSTi = $RST">; + +def LWAT_PSEUDO : PPCCustomInserterPseudo< + (outs gprc:$dst), + (ins ptr_rc_nor0:$ptr, gprc:$val, u5imm:$fc), + "#LWAT_PSEUDO", + [(set i32:$dst, (int_ppc_amo_lwat ptr_rc_nor0:$ptr, gprc:$val, timm:$fc))]>; let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr), diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index e23914a050359..5b9ce2400c7cb 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -501,7 +501,7 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { // Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated // at last. def G8pRC : - RegisterClass<"PPC", [i128], 128, + RegisterClass<"PPC", [untyped], 128, (add (sequence "G8p%u", 1, 5), (sequence "G8p%u", 14, 7), G8p15, G8p6, G8p0)> { diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 291fafa986395..1cbb6db1b3f64 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -125,6 +125,33 @@ class SMX60IsWorstCaseMXSEW MxList, bit isF = 0 defvar SMX60VLEN = 256; defvar SMX60DLEN = !div(SMX60VLEN, 2); +class SMX60GetLMulCycles { + int c = !cond( + !eq(mx, "M1") : 1, + !eq(mx, "M2") : 2, + !eq(mx, "M4") : 4, + !eq(mx, "M8") : 8, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +class SMX60GetVLMAX { + defvar LMUL = SMX60GetLMulCycles.c; + int val = !cond( + !eq(mx, "MF2") : !div(!div(SMX60VLEN, 2), sew), + !eq(mx, "MF4") : !div(!div(SMX60VLEN, 4), sew), + !eq(mx, "MF8") : !div(!div(SMX60VLEN, 8), sew), + true: !div(!mul(SMX60VLEN, LMUL), sew) + ); +} + +// Latency for segmented loads and stores are calculated as vl * nf. +class SMX60SegmentedLdStCycles { + int c = !mul(SMX60GetVLMAX.val, nf); +} + def SpacemitX60Model : SchedMachineModel { let IssueWidth = 2; // dual-issue let MicroOpBufferSize = 0; // in-order @@ -367,23 +394,43 @@ foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX.c; // Unit-stride loads and stores - defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>; + defvar VLDELatAndOcc = ConstValueUntilLMULThenDoubleBase<"M2", 3, 4, mx>.c; + let Latency = VLDELatAndOcc, ReleaseAtCycles = [VLDELatAndOcc] in { + defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>; + } + defvar VSTELatAndOcc = GetLMULValue<[2, 2, 2, 3, 4, 8, 19], mx>.c; + let Latency = VSTELatAndOcc, ReleaseAtCycles = [VSTELatAndOcc] in { + defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>; + } + defvar VLDFFLatAndOcc = GetLMULValue<[4, 4, 4, 5, 7, 11, 19], mx>.c; + let Latency = VLDFFLatAndOcc, ReleaseAtCycles = [VLDFFLatAndOcc] in { + defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>; + } // Mask loads and stores - defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>; - defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>; + let ReleaseAtCycles = [2] in { + defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase>; + } + let Latency = 2, ReleaseAtCycles = [2] in { + defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase>; + } // Strided and indexed loads and stores foreach eew = [8, 16, 32, 64] in { - defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>; + defvar StridedLdStLatAndOcc = SMX60GetVLMAX.val; + let Latency = StridedLdStLatAndOcc, ReleaseAtCycles = [StridedLdStLatAndOcc] in { + defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>; + } + + defvar IndexedLdStLatAndOcc = !div(SMX60GetVLMAX.val, 2); + let Latency = IndexedLdStLatAndOcc, ReleaseAtCycles = [IndexedLdStLatAndOcc] in { + defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>; + } } } @@ -393,30 +440,39 @@ foreach mx = SchedMxList in { foreach eew = [8, 16, 32, 64] in { defvar IsWorstCase = SMX60IsWorstCaseMX.c; - // Unit-stride segmented - defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - - // Strided/indexed segmented - defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - - // Indexed segmented - defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defvar SegmentedLdStLatAndOcc = SMX60SegmentedLdStCycles.c; + let Latency = SegmentedLdStLatAndOcc, ReleaseAtCycles = [SegmentedLdStLatAndOcc] in { + // Unit-stride segmented + defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + + // Strided/indexed segmented + defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + + // Indexed segmented + defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + } } } } // Whole register move/load/store foreach LMul = [1, 2, 4, 8] in { - def : WriteRes("WriteVLD" # LMul # "R"), [SMX60_VLS]>; - def : WriteRes("WriteVST" # LMul # "R"), [SMX60_VLS]>; + defvar WholeRegLdStLatAndOcc = !if(!eq(LMul, 1), 3, !mul(LMul, 2)); + let Latency = WholeRegLdStLatAndOcc, ReleaseAtCycles = [WholeRegLdStLatAndOcc] in { + def : WriteRes("WriteVLD" # LMul # "R"), [SMX60_VLS]>; + def : WriteRes("WriteVST" # LMul # "R"), [SMX60_VLS]>; + } - def : WriteRes("WriteVMov" # LMul # "V"), [SMX60_VIEU]>; + defvar VMovLatAndOcc = !if(!eq(LMul, 1), 4, !mul(LMul, 2)); + let Latency = VMovLatAndOcc, ReleaseAtCycles = [VMovLatAndOcc] in { + def : WriteRes("WriteVMov" # LMul # "V"), [SMX60_VIEU]>; + } } // 11. Vector Integer Arithmetic Instructions diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td index d9c50483a029c..6214000ddce5b 100644 --- a/llvm/lib/Target/Sparc/SparcCallingConv.td +++ b/llvm/lib/Target/Sparc/SparcCallingConv.td @@ -37,7 +37,7 @@ def RetCC_Sparc32 : CallingConv<[ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1]>>, // FIXME GCC in soft-float mode passes f128 as if 2xi64 values. - CCIfType<[f128], CCIfInReg>>>, + CCIfType<[f128], CCIfInReg>>, CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">> ]>; diff --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp b/llvm/lib/Transforms/Instrumentation/KCFI.cpp index f4cb4e2d1c9e1..f06b1d3157939 100644 --- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp +++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Support/xxhash.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 596849ecab742..30d7831f06a2b 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -11,17 +11,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Hash.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/xxhash.h" +#include "llvm/Transforms/Instrumentation/KCFI.h" using namespace llvm; @@ -208,10 +209,16 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { std::string Type = MangledType.str(); if (M.getModuleFlag("cfi-normalize-integers")) Type += ".normalized"; + + // Determine which hash algorithm to use + auto *MD = dyn_cast_or_null(M.getModuleFlag("kcfi-hash")); + KCFIHashAlgorithm Algorithm = + parseKCFIHashAlgorithm(MD ? MD->getString() : ""); + F.setMetadata(LLVMContext::MD_kcfi_type, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( Type::getInt32Ty(Ctx), - static_cast(xxHash64(Type)))))); + getKCFITypeID(Type, Algorithm))))); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null( diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 38024aa6897fc..d2f9263e32213 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -757,31 +757,6 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { if (!PhiR) continue; - // Try to narrow wide and replicating recipes to uniform recipes, based on - // VPlan analysis. - // TODO: Apply to all recipes in the future, to replace legacy uniformity - // analysis. - auto Users = collectUsersRecursively(PhiR); - for (VPUser *U : reverse(Users)) { - auto *Def = dyn_cast(U); - auto *RepR = dyn_cast(U); - // Skip recipes that shouldn't be narrowed. - if (!Def || !isa(Def) || - Def->getNumUsers() == 0 || !Def->getUnderlyingValue() || - (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))) - continue; - - // Skip recipes that may have other lanes than their first used. - if (!vputils::isSingleScalar(Def) && !vputils::onlyFirstLaneUsed(Def)) - continue; - - auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), - Def->operands(), /*IsUniform*/ true, - /*Mask*/ nullptr, /*Flags*/ *Def); - Clone->insertAfter(Def); - Def->replaceAllUsesWith(Clone); - } - // Replace wide pointer inductions which have only their scalars used by // PtrAdd(IndStart, ScalarIVSteps (0, Step)). if (auto *PtrIV = dyn_cast(&Phi)) { @@ -1546,8 +1521,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { continue; } - // Skip recipes that aren't single scalars. - if (!vputils::isSingleScalar(RepOrWidenR)) + // Skip recipes that aren't single scalars and don't just have their first + // lane used. + if (!vputils::isSingleScalar(RepOrWidenR) && + (!vputils::onlyFirstLaneUsed(RepOrWidenR) || + RepOrWidenR->getNumUsers() == 0)) continue; // Skip recipes for which conversion to single-scalar does introduce diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll index 9a9a6a7d45931..e041c96371762 100644 --- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll +++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll @@ -180,7 +180,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.1 = add i64 %iv, 1 ; CHECK-NEXT: --> {{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 -; CHECK-NEXT: --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.2 = add i64 %iv, 2 ; CHECK-NEXT: --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 @@ -188,7 +188,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.3 = add i64 %iv, 3 ; CHECK-NEXT: --> {{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4 -; CHECK-NEXT: --> ({{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.4 = add i64 %iv, 4 ; CHECK-NEXT: --> {{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4 @@ -196,11 +196,11 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.5 = add i64 %iv, 5 ; CHECK-NEXT: --> {{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4 -; CHECK-NEXT: --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1 ; CHECK-NEXT: --> {{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4 -; CHECK-NEXT: --> ({{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}-2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3 ; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3 @@ -296,7 +296,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.1 = add i64 %iv, 1 ; CHECK-NEXT: --> {{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 -; CHECK-NEXT: --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.2 = add i64 %iv, 2 ; CHECK-NEXT: --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 @@ -304,7 +304,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.3 = add i64 %iv, 3 ; CHECK-NEXT: --> {{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4 -; CHECK-NEXT: --> ({{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.4 = add i64 %iv, 4 ; CHECK-NEXT: --> {{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4 @@ -312,11 +312,11 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) { ; CHECK-NEXT: %iv.5 = add i64 %iv, 5 ; CHECK-NEXT: --> {{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4 -; CHECK-NEXT: --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1 ; CHECK-NEXT: --> {{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4 -; CHECK-NEXT: --> ({{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: --> ({{\{\{}}-2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3 ; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } ; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3 diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index abc67eec32391..96f5e5a4afb3e 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -5,9 +5,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Assumption Cache Tracker diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index e1481667a4ab7..2102029e608ab 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -5,9 +5,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Type-Based Alias Analysis diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 1f7888a633d62..8364e680bc8c7 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -14,9 +14,11 @@ ; REQUIRES: asserts ; GCN-O0:Target Library Information +; GCN-O0-NEXT:Runtime Library Function Analysis ; GCN-O0-NEXT:Target Pass Configuration ; GCN-O0-NEXT:Machine Module Information ; GCN-O0-NEXT:Target Transform Information +; GCN-O0-NEXT:Library Function Lowering Analysis ; GCN-O0-NEXT:Assumption Cache Tracker ; GCN-O0-NEXT:Profile summary info ; GCN-O0-NEXT:Argument Register Usage Information Storage @@ -161,9 +163,11 @@ ; GCN-O0-NEXT: Free MachineFunction ; GCN-O1:Target Library Information +; GCN-O1-NEXT:Runtime Library Function Analysis ; GCN-O1-NEXT:Target Pass Configuration ; GCN-O1-NEXT:Machine Module Information ; GCN-O1-NEXT:Target Transform Information +; GCN-O1-NEXT:Library Function Lowering Analysis ; GCN-O1-NEXT:Assumption Cache Tracker ; GCN-O1-NEXT:Profile summary info ; GCN-O1-NEXT:AMDGPU Address space based Alias Analysis @@ -453,9 +457,11 @@ ; GCN-O1-NEXT: Free MachineFunction ; GCN-O1-OPTS:Target Library Information +; GCN-O1-OPTS-NEXT:Runtime Library Function Analysis ; GCN-O1-OPTS-NEXT:Target Pass Configuration ; GCN-O1-OPTS-NEXT:Machine Module Information ; GCN-O1-OPTS-NEXT:Target Transform Information +; GCN-O1-OPTS-NEXT:Library Function Lowering Analysis ; GCN-O1-OPTS-NEXT:Assumption Cache Tracker ; GCN-O1-OPTS-NEXT:Profile summary info ; GCN-O1-OPTS-NEXT:AMDGPU Address space based Alias Analysis @@ -773,9 +779,11 @@ ; GCN-O1-OPTS-NEXT: Free MachineFunction ; GCN-O2:Target Library Information +; GCN-O2-NEXT:Runtime Library Function Analysis ; GCN-O2-NEXT:Target Pass Configuration ; GCN-O2-NEXT:Machine Module Information ; GCN-O2-NEXT:Target Transform Information +; GCN-O2-NEXT:Library Function Lowering Analysis ; GCN-O2-NEXT:Assumption Cache Tracker ; GCN-O2-NEXT:Profile summary info ; GCN-O2-NEXT:AMDGPU Address space based Alias Analysis @@ -1098,9 +1106,11 @@ ; GCN-O2-NEXT: Free MachineFunction ; GCN-O3:Target Library Information +; GCN-O3-NEXT:Runtime Library Function Analysis ; GCN-O3-NEXT:Target Pass Configuration ; GCN-O3-NEXT:Machine Module Information ; GCN-O3-NEXT:Target Transform Information +; GCN-O3-NEXT:Library Function Lowering Analysis ; GCN-O3-NEXT:Assumption Cache Tracker ; GCN-O3-NEXT:Profile summary info ; GCN-O3-NEXT:AMDGPU Address space based Alias Analysis diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir index e8c27f2eb3685..21f5515b7fb91 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir @@ -283,11 +283,11 @@ body: | ; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/ $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x544 + ; GCN-NEXT: s_set_vgpr_msb 0x554 ; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/ $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x4410 + ; GCN-NEXT: s_set_vgpr_msb 0x5410 ; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/ $vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec @@ -303,6 +303,10 @@ body: | ; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/ $vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec + ; GCN-NEXT: s_set_vgpr_msb 0xae54 + ; GCN-NEXT: v_dual_fmac_f32 v7 /*v263*/, v1, v1 /*v257*/ :: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ + $vgpr263, $vgpr500 = V_DUAL_FMAC_F32_e32_X_FMAMK_F32_gfx1250 undef $vgpr1, undef $vgpr257, $vgpr263, undef $vgpr0, 10, undef $vgpr300, implicit $mode, implicit $exec + ; ASM: NumVgprs: 777 ... diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir index b05edd046b874..05bbb0f54ef9e 100644 --- a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir @@ -4481,3 +4481,23 @@ body: | $vgpr3 = V_MOV_B32_e32 0, implicit $exec $vgpr0 = V_ADD_F32_e64_dpp $vgpr0, 0, $vgpr2, 0, $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec ... + +--- +name: vopd_no_combine_fmamk_src1 +tracksRegLiveness: true +body: | + bb.0: + ; SCHED-LABEL: name: vopd_no_combine_fmamk_src1 + ; SCHED: $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fmamk_src1 + ; PAIR: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; + ; LOWER-LABEL: name: vopd_no_combine_fmamk_src1 + ; LOWER: S_SET_VGPR_MSB 5, implicit-def $mode + ; LOWER-NEXT: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; LOWER-NEXT: S_SET_VGPR_MSB 1280, implicit-def $mode + $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec + $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec +... diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll index 67d9e19b8975e..c7c5d5f693c55 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll @@ -1,29 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=hexagon < %s | FileCheck %s +; RUN: llc -mtriple=hexagon -mattr=+v75,+hvxv75,+hvx-length128b < %s | FileCheck %s -define void @f0(ptr %a0, ptr %a1, ptr %a2) #0 { +define void @f0(ptr %a0, ptr %a1, ptr %a2) { ; CHECK-LABEL: f0: ; CHECK: // %bb.0: // %b0 ; CHECK-NEXT: { -; CHECK-NEXT: r7 = #-4 -; CHECK-NEXT: v0 = vmem(r0+#0) +; CHECK-NEXT: [[RS:r[0-9]+]] = #-4 +; CHECK-NEXT: [[V0:v[0-9]+]] = vmem([[A0:r[0-9]+]]+#0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vmem(r1+#0) +; CHECK-NEXT: [[ACC:v[0-9]+]]:2.w = vmpy([[V0]].h,[[V1:v[0-9]+]].h) +; CHECK-NEXT: [[V1]].cur = vmem([[A1:r[0-9]+]]+#0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1:0.w = vmpy(v0.h,v1.h) +; CHECK-NEXT: [[ACC]]:2.w += vmpy([[V0]].h,[[V1]].h) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1:0.w = vadd(v1:0.w,v1:0.w) +; CHECK-NEXT: [[VDEAL:v[0-9]+]]:0 = vdeal([[ACC]],[[V2:v[0-9]+]],[[RS]]) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1:0 = vdeal(v1,v0,r7) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: v0.h = vpacko(v1.w,v0.w) +; CHECK-NEXT: [[VOUT:v[0-9]+]].h = vpacko([[VDEAL]].w,[[V0]].w) ; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: vmem(r2+#0) = v0.new +; CHECK-NEXT: vmem([[A2:r[0-9]+]]+#0) = [[VOUT]].new ; CHECK-NEXT: } b0: %v0 = load <64 x i16>, ptr %a0, align 128 @@ -47,6 +45,4 @@ b0: ret void } -declare <64 x i32> @llvm.hexagon.V6.vmpyhv.128B(<32 x i32>, <32 x i32>) #0 - -attributes #0 = { nounwind "target-features"="+v66,+hvxv66,+hvx-length128b" } +declare <64 x i32> @llvm.hexagon.V6.vmpyhv.128B(<32 x i32>, <32 x i32>) diff --git a/llvm/test/CodeGen/Hexagon/autohvx/widen-setcc.ll b/llvm/test/CodeGen/Hexagon/autohvx/widen-setcc.ll index e4765bbcb4ef9..da962143da8f7 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/widen-setcc.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/widen-setcc.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s +; RUN: llc -mtriple=hexagon -mv75 -mhvx -mattr=+hvx-length128b -hexagon-hvx-widen=32 < %s | FileCheck %s ; Make sure that this doesn't crash. ; CHECK-LABEL: f0: @@ -16,5 +16,3 @@ b0: store <16 x i16> %v4, ptr %v0, align 2 ret void } - -attributes #0 = { "target-features"="+hvxv66,+hvx-length128b" } diff --git a/llvm/test/CodeGen/Hexagon/bug54537-vavg.ll b/llvm/test/CodeGen/Hexagon/bug54537-vavg.ll new file mode 100644 index 0000000000000..5ed41e3dbbcac --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bug54537-vavg.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hexagon -mv75 -mhvx -mattr=+hvx-length128b < %s +; REQUIRES: asserts + +define dso_local void @vc1INTERP_PredictMB([64 x i8]* %pPredBlk) local_unnamed_addr { +entry: + %next.gep111 = getelementptr [64 x i8], [64 x i8]* %pPredBlk, i32 0, i32 0 + %wide.load112 = load <32 x i8>, <32 x i8>* poison, align 32 + %0 = zext <32 x i8> %wide.load112 to <32 x i16> + %1 = add nuw nsw <32 x i16> zeroinitializer, + %2 = add nuw nsw <32 x i16> %1, %0 + %3 = lshr <32 x i16> %2, + %4 = trunc <32 x i16> %3 to <32 x i8> + %5 = bitcast i8* %next.gep111 to <32 x i8>* + store <32 x i8> %4, <32 x i8>* %5, align 1 + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull null) + unreachable +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) diff --git a/llvm/test/CodeGen/Hexagon/extend-multiply-for-output-fpext.ll b/llvm/test/CodeGen/Hexagon/extend-multiply-for-output-fpext.ll new file mode 100644 index 0000000000000..c4a221af9cdcd --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/extend-multiply-for-output-fpext.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=hexagon -mattr=+hvx,+hvx-length128b,+hvxv81 %s -o - | FileCheck %s + +; Test that on hexagon computation of a_sq_f32 is done using a widening multiply +; instruction. +define dso_local <64 x float> @a_sq_times_b_sq(<64 x half> %a, <64 x float> %b) { +entry: + %a_sq_f16 = fmul <64 x half> %a, %a + %a_sq_f32 = fpext <64 x half> %a_sq_f16 to <64 x float> + %b_sq = fmul <64 x float> %b, %b + %result = fmul <64 x float> %a_sq_f32, %b_sq + ret <64 x float> %result +} +; CHECK: a_sq_times_b_sq +; CHECK: .qf32 = vmpy({{v[0-9]+}}.hf,{{v[0-9]+}}.hf) +; CHECK: .qf32 = vmpy({{v[0-9]+}}.sf,{{v[0-9]+}}.sf) +; CHECK: .qf32 = vmpy({{v[0-9]+}}.sf,{{v[0-9]+}}.sf) diff --git a/llvm/test/CodeGen/Hexagon/no_widening_of_bf16_vecmul.ll b/llvm/test/CodeGen/Hexagon/no_widening_of_bf16_vecmul.ll new file mode 100644 index 0000000000000..8fa293fc23f5d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/no_widening_of_bf16_vecmul.ll @@ -0,0 +1,60 @@ +;; RUN: llc --mtriple=hexagon --mcpu=hexagonv81 --mattr=+hvxv81,+hvx-length128b %s -o - | FileCheck %s + +; In this file, we check that fmul(exttof32(v1.bf16), exttof32(v2.bf16)) is not +; transformed to exttof32(fmul(v1.hf, v2.hf)). This was a bug in +; hexagon-widening-vector pass. + +define void @halfbf16(ptr readonly %x, ptr %y) { +entry: + %xvec.bf16 = load <64 x bfloat>, ptr %x, align 2 + %xvec.f32 = fpext <64 x bfloat> %xvec.bf16 to <64 x float> + %yvec.f32 = fmul <64 x float> %xvec.f32, splat (float 5.000000e-01) + %yvec.bf16 = fptrunc <64 x float> %yvec.f32 to <64 x bfloat> + store <64 x bfloat> %yvec.bf16, ptr %y, align 2 + ret void +} +;; CHECK: vmpy(v{{[0-9]+}}.sf,v{{[0-9]+}}.sf) +;; CHECK: vmpy(v{{[0-9]+}}.sf,v{{[0-9]+}}.sf) + + +define void @vecmulbf16(ptr readonly %x, ptr readonly %y, ptr %z) { +entry: + %xvec.bf16 = load <64 x bfloat>, ptr %x, align 2 + %yvec.bf16 = load <64 x bfloat>, ptr %y, align 2 + %xvec.f32 = fpext <64 x bfloat> %xvec.bf16 to <64 x float> + %yvec.f32 = fpext <64 x bfloat> %yvec.bf16 to <64 x float> + %zvec.f32 = fmul <64 x float> %xvec.f32, %yvec.f32 + %zvec.bf16 = fptrunc <64 x float> %zvec.f32 to <64 x bfloat> + store <64 x bfloat> %zvec.bf16, ptr %z, align 2 + ret void +} + +;; CHECK: vmpy(v{{[0-9]+}}.sf,v{{[0-9]+}}.sf) +;; CHECK: vmpy(v{{[0-9]+}}.sf,v{{[0-9]+}}.sf) + + +define void @halff16(ptr readonly %x, ptr %y) { +entry: + %xvec.f16 = load <64 x half>, ptr %x, align 2 + %xvec.f32 = fpext <64 x half> %xvec.f16 to <64 x float> + %yvec.f32 = fmul <64 x float> %xvec.f32, splat (float 5.000000e-01) + %yvec.f16 = fptrunc <64 x float> %yvec.f32 to <64 x half> + store <64 x half> %yvec.f16, ptr %y, align 2 + ret void +} +;; CHECK: vmpy(v{{[0-9]+}}.hf,v{{[0-9]+}}.hf) + + +define void @vecmulf16(ptr readonly %x, ptr readonly %y, ptr %z) { +entry: + %xvec.f16 = load <64 x half>, ptr %x, align 2 + %yvec.f16 = load <64 x half>, ptr %y, align 2 + %xvec.f32 = fpext <64 x half> %xvec.f16 to <64 x float> + %yvec.f32 = fpext <64 x half> %yvec.f16 to <64 x float> + %zvec.f32 = fmul <64 x float> %xvec.f32, %yvec.f32 + %zvec.f16 = fptrunc <64 x float> %zvec.f32 to <64 x half> + store <64 x half> %zvec.f16, ptr %z, align 2 + ret void +} + +;; CHECK: vmpy(v{{[0-9]+}}.hf,v{{[0-9]+}}.hf) diff --git a/llvm/test/CodeGen/Hexagon/shortvec-vasrsat.ll b/llvm/test/CodeGen/Hexagon/shortvec-vasrsat.ll new file mode 100644 index 0000000000000..99db9f9c9354a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/shortvec-vasrsat.ll @@ -0,0 +1,68 @@ + +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 -hexagon-widen-short-vector -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; CHECK-LABEL: test_vasr +; CHECK: = vasr{{.*}}:sat + +define dllexport void @test_vasr(i64 %seed0, i64 %seed1, i8* %dst) local_unnamed_addr { +entry: + %1 = trunc i64 %seed0 to i32 + %t.1 = trunc i64 %seed1 to i32 + %2 = lshr i32 %t.1, 23 + %3 = and i32 %2, 255 + %4 = icmp ugt i32 %3, 125 + %5 = select i1 %4, i32 %3, i32 125 + %6 = sub nsw i32 132, %5 + %7 = shl i32 %1, %6 + %8 = trunc i32 %7 to i16 + %9 = trunc i32 %6 to i16 + + %broadcast.splatinsert50 = insertelement <64 x i16> poison, i16 %8, i32 0 + %broadcast.splat51 = shufflevector <64 x i16> %broadcast.splatinsert50, <64 x i16> poison, <64 x i32> zeroinitializer + %broadcast.splatinsert52 = insertelement <64 x i16> poison, i16 %9, i32 0 + %broadcast.splat53 = shufflevector <64 x i16> %broadcast.splatinsert52, <64 x i16> poison, <64 x i32> zeroinitializer + + %11 = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> zeroinitializer, <64 x i16> %broadcast.splat51) + %12 = ashr <64 x i16> %11, %broadcast.splat53 + %13 = icmp slt <64 x i16> %12, + %14 = select <64 x i1> %13, <64 x i16> %12, <64 x i16> + %15 = icmp sgt <64 x i16> %14, zeroinitializer + %16 = select <64 x i1> %15, <64 x i16> %14, <64 x i16> zeroinitializer + %17 = trunc <64 x i16> %16 to <64 x i8> + %elem = extractelement <64 x i8> %17, i32 0 + store i8 %elem, i8* %dst, align 1 + ret void +} + +; CHECK-LABEL: test_vasr_with_intrinsic +; CHECK: v{{[0-9:]+}}.ub = vasr(v{{[0-9]+}}.h,v{{[0-9]+}}.h,r{{[0-9]+}}):sat + +define dllexport void @test_vasr_with_intrinsic(i64 %seed0, i64 %seed1, i8* %dst) local_unnamed_addr { +entry: + %1 = trunc i64 %seed0 to i32 + %t.1 = trunc i64 %seed1 to i32 + %2 = lshr i32 %t.1, 23 + %3 = and i32 %2, 255 + %4 = icmp ugt i32 %3, 125 + %5 = select i1 %4, i32 %3, i32 125 + %6 = sub nsw i32 132, %5 + %7 = shl i32 %1, %6 + %8 = trunc i32 %7 to i16 + %9 = trunc i32 %6 to i16 + + %broadcast.splatinsert50 = insertelement <64 x i16> poison, i16 %8, i32 0 + %broadcast.splat51 = shufflevector <64 x i16> %broadcast.splatinsert50, <64 x i16> poison, <64 x i32> zeroinitializer + + %11 = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> zeroinitializer, <64 x i16> %broadcast.splat51) + %12 = ashr <64 x i16> %11, + %13 = call <64 x i16> @llvm.smin.v64i16(<64 x i16> %12, <64 x i16> ) + %14 = call <64 x i16> @llvm.smax.v64i16(<64 x i16> %13, <64 x i16> zeroinitializer) + %15 = trunc <64 x i16> %14 to <64 x i8> + %elem = extractelement <64 x i8> %15, i32 0 + store i8 %elem, i8* %dst, align 1 + ret void +} + +declare <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16>, <64 x i16>) +declare <64 x i16> @llvm.smin.v64i16(<64 x i16>, <64 x i16>) +declare <64 x i16> @llvm.smax.v64i16(<64 x i16>, <64 x i16>) diff --git a/llvm/test/CodeGen/Hexagon/shortvec-vavg.ll b/llvm/test/CodeGen/Hexagon/shortvec-vavg.ll new file mode 100644 index 0000000000000..38030acf13329 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/shortvec-vavg.ll @@ -0,0 +1,20 @@ + +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 -hexagon-widen-short-vector -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; CHECK: = vavg(v{{[0-9:]+}}.h,v{{[0-9]+}}.h) + +define dllexport void @tvm_vavg(i8 %val0, i8 %val1, i8* %dst) local_unnamed_addr { +entry: + %1 = insertelement <64 x i8> poison, i8 %val0, i32 62 + %2 = insertelement <64 x i8> %1, i8 %val1, i32 63 + %3 = zext <64 x i8> %2 to <64 x i16> + %t.7 = insertelement <64 x i8> poison, i8 %val1, i32 62 + %t.8 = insertelement <64 x i8> %t.7, i8 %val0, i32 63 + %t.9 = zext <64 x i8> %t.8 to <64 x i16> + %t.17 = add nuw nsw <64 x i16> %t.9, %3 + %t.18 = lshr <64 x i16> %t.17, + %t.19 = trunc <64 x i16> %t.18 to <64 x i8> + %t.29 = extractelement <64 x i8> %t.19, i32 6 + store i8 %t.29, i8* %dst, align 2 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/shortvec-vmpy.ll b/llvm/test/CodeGen/Hexagon/shortvec-vmpy.ll new file mode 100644 index 0000000000000..994a847b31aac --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/shortvec-vmpy.ll @@ -0,0 +1,27 @@ + +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b -hexagon-hvx-widen=32 -hexagon-widen-short-vector < %s | FileCheck %s + +; CHECK: {{[0-9]+:[0-9]+}}.uh = vmpy(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub) + +define dllexport void @test_vmpy(i64 %seed, i8 %val, i8* %dst) local_unnamed_addr { +entry: + ; Replace poison loads with args + %t.1 = trunc i64 %seed to i16 + %0 = lshr i16 %t.1, 7 + %1 = and i16 %0, 255 + %broadcast.splatinsert44 = insertelement <64 x i16> poison, i16 %1, i32 0 + %broadcast.splat45 = shufflevector <64 x i16> %broadcast.splatinsert44, <64 x i16> poison, <64 x i32> zeroinitializer + %3 = insertelement <64 x i8> poison, i8 %val, i32 57 + %4 = insertelement <64 x i8> %3, i8 %val, i32 58 + %5 = insertelement <64 x i8> %4, i8 %val, i32 59 + %6 = insertelement <64 x i8> %5, i8 %val, i32 60 + %7 = insertelement <64 x i8> %6, i8 %val, i32 61 + %8 = insertelement <64 x i8> %7, i8 %val, i32 62 + %9 = insertelement <64 x i8> %8, i8 %val, i32 63 + %10 = zext <64 x i8> %9 to <64 x i16> + %11 = mul nuw <64 x i16> %broadcast.splat45, %10 + %12 = trunc <64 x i16> %11 to <64 x i8> + %elem = extractelement <64 x i8> %12, i32 0 + store i8 %elem, i8* %dst, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/vadd-const.ll b/llvm/test/CodeGen/Hexagon/vadd-const.ll new file mode 100644 index 0000000000000..da6ccffc0093d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vadd-const.ll @@ -0,0 +1,114 @@ +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; Make sure that the appropriate vadd instructions are generated when +; addtiplied with a vector of constant values. + +; CHECK-LABEL: test_vadd_const1 +; CHECK: [[REG0:(r[0-9]+)]] = # +; CHECK: [[VREG0:(v[0-9]+)]].b = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.h = vadd(v{{[0-9]+}}.ub,[[VREG0]].ub) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const1(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = add nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const2 +; CHECK: [[REG0:(r[0-9]+)]] = # +; CHECK: [[VREG0:([0-9]+)]].h = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9]+)]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.h = vadd({{.*}}.h,{{v[VREG0]|v[VREG1]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const2(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = add nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const2_1 +; CHECK: [[REG0:(r[0-9]+)]] = #-270 +; CHECK: [[VREG0:([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9:]+)]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}.w,{{v[VREG1]|v[VREG0]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const2_1(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = add nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + + + +; CHECK-LABEL: test_vadd_const3 +; CHECK: [[REG0:(r[0-9]+)]] = # +; CHECK: [[VREG0:(v[0-9]+)]].h = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.w = vadd(v{{[0-9]+}}.uh,[[VREG0]].uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const3(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = add nuw nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const4 +; CHECK: [[REG0:(r[0-9]+)]] = #-23 +; CHECK: [[VREG0:([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9:]+)]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}.w,{{v[VREG1]|v[VREG0]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const4(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = add nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const5 +; CHECK: [[REG0:(r[0-9]+)]] = #-257 +; CHECK: [[VREG0:([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9:]+)]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}.w,{{v[VREG1]|v[VREG0]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const5(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = add nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const6 +; CHECK: [[REG0:(r[0-9]+)]] = #-23 +; CHECK: [[VREG0:(v[0-9]+)]] = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}[[VREG0]].w{{.*}}) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const6(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <32 x i32>, ptr %a, align 4 + %0 = add nsw <32 x i32> %wide.load, + store <32 x i32> %0, ptr %r, align 4 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/vasr-sat.ll b/llvm/test/CodeGen/Hexagon/vasr-sat.ll new file mode 100644 index 0000000000000..9ad9666dd574f --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vasr-sat.ll @@ -0,0 +1,66 @@ + +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; Test for saturating vasr instruction. + +; CHECK-LABEL: test_vasr +; CHECK: = vasr{{.*}}:sat + +define dllexport void @test_vasr(i64 %seed0, i64 %seed1, + i8* %dst) local_unnamed_addr { +entry: + %1 = trunc i64 %seed0 to i32 + %t.1 = trunc i64 %seed1 to i32 + %2 = lshr i32 %t.1, 23 + %3 = and i32 %2, 255 + %4 = icmp ugt i32 %3, 125 + %5 = select i1 %4, i32 %3, i32 125 + %6 = sub nsw i32 132, %5 + %7 = shl i32 %1, %6 + %8 = trunc i32 %7 to i16 + %9 = trunc i32 %6 to i16 + + ; Broadcast splats + %broadcast.splatinsert216 = insertelement <128 x i16> poison, i16 %9, i32 0 + %broadcast.splat217 = shufflevector <128 x i16> %broadcast.splatinsert216, <128 x i16> poison, <128 x i32> zeroinitializer + %broadcast.splatinsert214 = insertelement <128 x i16> poison, i16 %8, i32 0 + %broadcast.splat215 = shufflevector <128 x i16> %broadcast.splatinsert214, <128 x i16> poison, <128 x i32> zeroinitializer + %11 = ashr <128 x i16> %broadcast.splat215, %broadcast.splat217 + %12 = icmp slt <128 x i16> %11, + %13 = select <128 x i1> %12, <128 x i16> %11, <128 x i16> + %14 = icmp sgt <128 x i16> %13, zeroinitializer + %15 = select <128 x i1> %14, <128 x i16> %13, <128 x i16> zeroinitializer + %16 = trunc <128 x i16> %15 to <128 x i8> + %17 = bitcast i8* %dst to <128 x i8>* + store <128 x i8> %16, <128 x i8>* %17, align 1 + ret void +} + +; CHECK-LABEL: test_vasr_with_intrinsic +; CHECK: = vasr{{.*}}:sat + +define dllexport void @test_vasr_with_intrinsic(i64 %seed0, i64 %seed1, + i8* %dst) local_unnamed_addr { +entry: + %1 = trunc i64 %seed0 to i32 + %t.1 = trunc i64 %seed1 to i32 + %2 = lshr i32 %t.1, 23 + %3 = and i32 %2, 255 + %4 = icmp ugt i32 %3, 125 + %5 = select i1 %4, i32 %3, i32 125 + %6 = sub nsw i32 132, %5 + %7 = shl i32 %1, %6 + %8 = trunc i32 %7 to i16 + %broadcast.splatinsert214 = insertelement <128 x i16> poison, i16 %8, i32 0 + %broadcast.splat215 = shufflevector <128 x i16> %broadcast.splatinsert214, <128 x i16> poison, <128 x i32> zeroinitializer + %11 = ashr <128 x i16> %broadcast.splat215, + %12 = call <128 x i16> @llvm.smin.v128i16(<128 x i16> %11, <128 x i16> ) + %13 = call <128 x i16> @llvm.smax.v128i16(<128 x i16> %12, <128 x i16> zeroinitializer) + %14 = trunc <128 x i16> %13 to <128 x i8> + %15 = bitcast i8* %dst to <128 x i8>* + store <128 x i8> %14, <128 x i8>* %15, align 1 + ret void +} + +declare <128 x i16> @llvm.smin.v128i16(<128 x i16>, <128 x i16>) +declare <128 x i16> @llvm.smax.v128i16(<128 x i16>, <128 x i16>) diff --git a/llvm/test/CodeGen/Hexagon/vavg.ll b/llvm/test/CodeGen/Hexagon/vavg.ll new file mode 100644 index 0000000000000..70c0e482937d7 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vavg.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=hexagon -mv73 -mhvx -mattr=+hvx-length128b < %s | FileCheck %s +; Test for vmpa instruction. + +; CHECK: = vavg(v{{[0-9:]+}}.uh,v{{[0-9]+}}.uh) + +define dllexport void @test_vavg(float %f0, float %f1, + <128 x i8> %src, + i16* %dst) local_unnamed_addr { +entry: + %0 = select i1 false, float %f0, float %f1 + %1 = fptosi float %0 to i16 + %2 = lshr i16 %1, 7 + %3 = and i16 %2, 255 + %4 = and i16 %1, 127 + %broadcast.splatinsert212.1 = insertelement <128 x i16> poison, i16 %4, i32 0 + %broadcast.splat213.1 = shufflevector <128 x i16> %broadcast.splatinsert212.1, <128 x i16> poison, <128 x i32> zeroinitializer + %broadcast.splatinsert208.1 = insertelement <128 x i16> poison, i16 %3, i32 0 + %broadcast.splat209.1 = shufflevector <128 x i16> %broadcast.splatinsert208.1, <128 x i16> poison, <128 x i32> zeroinitializer + %7 = zext <128 x i8> %src to <128 x i16> + %8 = mul nuw <128 x i16> %broadcast.splat209.1, %7 + %9 = add <128 x i16> %8, zeroinitializer + %10 = zext <128 x i16> %9 to <128 x i32> + %11 = mul nuw nsw <128 x i16> %broadcast.splat213.1, %7 + %12 = add nuw <128 x i16> %11, zeroinitializer + %13 = lshr <128 x i16> %12, + %14 = zext <128 x i16> %13 to <128 x i32> + %15 = add nuw nsw <128 x i32> %14, %10 + %16 = lshr <128 x i32> %15, + %17 = trunc <128 x i32> %16 to <128 x i16> + %19 = bitcast i16* %dst to <128 x i16>* + store <128 x i16> %17, <128 x i16>* %19, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/vec-shuff-invalid-operand.ll b/llvm/test/CodeGen/Hexagon/vec-shuff-invalid-operand.ll new file mode 100644 index 0000000000000..8479d579e8bdd --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vec-shuff-invalid-operand.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=hexagon -mv75 -mhvx -mattr=+hvx-length128b < %s | FileCheck %s + +; HexagonOptShuffleVector moved the shufflevector instruction to after +; the last add: +; %v5 = add nsw <128 x i32> %v4, %a0 +; That is incorrect, because the order of elements in the %a0 operand +; will not reflect the new shuffle. + +; CHECK: vadd +; CHECK-NOT: vshuff + +define dllexport void @f0(<128 x i32> %a0) local_unnamed_addr { +b0: + %v0 = load <128 x i8>, <128 x i8>* poison, align 128 + %v1 = call <128 x i16> @llvm.hexagon.vmpy.uu.v128i16(<128 x i8> %v0, <128 x i8> ) + %v2 = shufflevector <128 x i16> %v1, <128 x i16> poison, <128 x i32> + %v3 = zext <128 x i16> %v2 to <128 x i32> + %v4 = add nsw <128 x i32> %v3, + %v5 = add nsw <128 x i32> %v4, %a0 + %v6 = getelementptr <128 x i32>, <128 x i32>* null, i32 -1 + store <128 x i32> %v5, <128 x i32>* %v6, align 128 + call void @llvm.trap() + unreachable +} + +; Function Attrs: nounwind readnone +declare <128 x i16> @llvm.hexagon.vmpy.uu.v128i16(<128 x i8>, <128 x i8>) + +; Function Attrs: cold noreturn nounwind +declare void @llvm.trap() diff --git a/llvm/test/CodeGen/Hexagon/vec-shuff-multi-uses.ll b/llvm/test/CodeGen/Hexagon/vec-shuff-multi-uses.ll new file mode 100644 index 0000000000000..1fd5f9b20f8d0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vec-shuff-multi-uses.ll @@ -0,0 +1,290 @@ +; RUN: llc -march=hexagon -mattr=+hvxv68,+hvx-length128b -hexagon-opt-shuffvec=true < %s | FileCheck %s + +; This test corresponds to a case where a shufflevector with multiple uses +; was getting incorrectly relocated. The problem was that only one of the uses +; met the safety checks but the pass didn't keep track of it so both +; uses were getting updated at the time of relocation. + +; CHECK-NOT: Relocating after -- {{.*}} = add nuw nsw <128 x i32> + +@.str = private unnamed_addr constant [6 x i8] c"vbor \00", align 1 + +; Function Attrs: nounwind +define dso_local void @vbor(i32 %ntimes, i32 %n, double %ctime, double %dtime, i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, [128 x i8]* %aa, [128 x i8]* %bb, [128 x i8]* %cc) local_unnamed_addr { +entry: + %s = alloca [128 x i8], align 8 + %0 = getelementptr inbounds [128 x i8], [128 x i8]* %s, i32 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %0) + tail call void @init(i32 %n, i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, [128 x i8]* %aa, [128 x i8]* %bb, [128 x i8]* %cc, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0)) + %call = tail call i32 bitcast (i32 (...)* @second to i32 ()*)() + %cmp3261 = icmp sgt i32 %n, 0 + %cmp263 = icmp sgt i32 %ntimes, 0 + br i1 %cmp263, label %for.cond2.preheader.preheader, label %for.end141 + +for.cond2.preheader.preheader: + %min.iters.check = icmp ult i32 %n, 64 + %min.iters.check272 = icmp ult i32 %n, 128 + %n.vec = and i32 %n, -128 + %cmp.n = icmp eq i32 %n.vec, %n + %n.vec.remaining = and i32 %n, 64 + %min.epilog.iters.check.not.not = icmp eq i32 %n.vec.remaining, 0 + %n.vec278 = and i32 %n, -64 + %cmp.n281 = icmp eq i32 %n.vec278, %n + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.end, %for.cond2.preheader.preheader + %nl.0264 = phi i32 [ %inc140, %for.end ], [ 0, %for.cond2.preheader.preheader ] + br i1 %cmp3261, label %iter.check, label %for.end + +iter.check: ; preds = %for.cond2.preheader + br i1 %min.iters.check, label %for.body5.preheader, label %vector.main.loop.iter.check + +vector.main.loop.iter.check: ; preds = %iter.check + br i1 %min.iters.check272, label %vec.epilog.ph, label %vector.body + +vector.body: ; preds = %vector.body, %vector.main.loop.iter.check + %index = phi i32 [ %index.next, %vector.body ], [ 0, %vector.main.loop.iter.check ] + %wide.load = load <128 x i8>, <128 x i8>* poison, align 1 + %wide.load273 = load <128 x i8>, <128 x i8>* poison, align 1 + %wide.load274 = load <128 x i8>, <128 x i8>* poison, align 1 + %wide.load275 = load <128 x i8>, <128 x i8>* poison, align 1 + %wide.load276 = load <128 x i8>, <128 x i8>* poison, align 1 + %wide.load511 = load <128 x i8>, <128 x i8>* poison, align 1 + %1 = zext <128 x i8> %wide.load to <128 x i32> + %2 = zext <128 x i8> %wide.load273 to <128 x i32> + %3 = mul nuw nsw <128 x i32> %2, %1 + %4 = zext <128 x i8> %wide.load274 to <128 x i32> + %5 = zext <128 x i8> %wide.load275 to <128 x i32> + %6 = zext <128 x i8> %wide.load276 to <128 x i32> + %7 = zext <128 x i8> %wide.load511 to <128 x i32> + %8 = add nuw nsw <128 x i32> %6, %5 + %9 = add nuw nsw <128 x i32> %8, %4 + %10 = add nuw nsw <128 x i32> %9, %7 + %11 = mul nuw nsw <128 x i32> %3, %10 + %12 = mul nuw nsw <128 x i32> %4, %1 + %13 = mul nuw nsw <128 x i32> %12, %5 + %14 = mul nuw nsw <128 x i32> %5, %1 + %15 = mul nuw nsw <128 x i32> %6, %1 + %16 = add nuw nsw <128 x i32> %14, %12 + %17 = add nuw nsw <128 x i32> %16, %15 + %18 = mul nuw nsw <128 x i32> %17, %7 + %19 = mul nuw nsw <128 x i32> %16, %6 + %20 = add nuw nsw <128 x i32> %19, %13 + %21 = add nuw nsw <128 x i32> %20, %11 + %22 = add nuw nsw <128 x i32> %21, %18 + %23 = add nuw nsw <128 x i32> %8, %7 + %24 = mul nuw nsw <128 x i32> %23, %4 + %25 = mul nuw nsw <128 x i32> %7, %6 + %26 = add nuw nsw <128 x i32> %24, %25 + %27 = add nuw nsw <128 x i32> %7, %6 + %28 = mul nuw nsw <128 x i32> %27, %5 + %29 = add nuw nsw <128 x i32> %26, %28 + %30 = mul nuw nsw <128 x i32> %29, %2 + %31 = add <128 x i8> %wide.load511, %wide.load276 + %32 = mul <128 x i8> %31, %wide.load275 + %33 = mul <128 x i8> %wide.load511, %wide.load276 + %34 = add <128 x i8> %32, %33 + %35 = shl <128 x i32> %22, + %36 = ashr exact <128 x i32> %35, + %37 = shl <128 x i32> %30, + %38 = ashr exact <128 x i32> %37, + %39 = mul nsw <128 x i32> %36, %38 + %40 = trunc <128 x i32> %39 to <128 x i8> + %41 = mul <128 x i8> %33, %wide.load274 + %42 = mul <128 x i8> %41, %wide.load275 + %43 = mul <128 x i8> %42, %34 + %44 = mul <128 x i8> %43, %40 + %45 = getelementptr inbounds [128 x i8], [128 x i8]* %s, i32 0, i32 %index + %46 = bitcast i8* %45 to <128 x i8>* + store <128 x i8> %44, <128 x i8>* %46, align 8 + %index.next = add nuw i32 %index, 128 + %47 = icmp eq i32 %index.next, %n.vec + br i1 %47, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + br i1 %cmp.n, label %for.end, label %vec.epilog.iter.check + +vec.epilog.iter.check: ; preds = %middle.block + br i1 %min.epilog.iters.check.not.not, label %for.body5.preheader, label %vec.epilog.ph + +vec.epilog.ph: ; preds = %vec.epilog.iter.check, %vector.main.loop.iter.check + %vec.epilog.resume.val = phi i32 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] + br label %vec.epilog.vector.body + +vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph + %index279 = phi i32 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next280, %vec.epilog.vector.body ] + %48 = getelementptr inbounds i8, i8* %a, i32 %index279 + %49 = bitcast i8* %48 to <64 x i8>* + %wide.load282 = load <64 x i8>, <64 x i8>* %49, align 1 + %50 = getelementptr inbounds i8, i8* %b, i32 %index279 + %51 = bitcast i8* %50 to <64 x i8>* + %wide.load283 = load <64 x i8>, <64 x i8>* %51, align 1 + %52 = getelementptr inbounds i8, i8* %c, i32 %index279 + %53 = bitcast i8* %52 to <64 x i8>* + %wide.load284 = load <64 x i8>, <64 x i8>* %53, align 1 + %54 = getelementptr inbounds i8, i8* %d, i32 %index279 + %55 = bitcast i8* %54 to <64 x i8>* + %wide.load285 = load <64 x i8>, <64 x i8>* %55, align 1 + %56 = getelementptr inbounds i8, i8* %e, i32 %index279 + %57 = bitcast i8* %56 to <64 x i8>* + %wide.load286 = load <64 x i8>, <64 x i8>* %57, align 1 + %wide.load312 = load <64 x i8>, <64 x i8>* poison, align 1 + %58 = zext <64 x i8> %wide.load282 to <64 x i32> + %59 = zext <64 x i8> %wide.load283 to <64 x i32> + %60 = mul nuw nsw <64 x i32> %59, %58 + %61 = zext <64 x i8> %wide.load284 to <64 x i32> + %62 = zext <64 x i8> %wide.load285 to <64 x i32> + %63 = zext <64 x i8> %wide.load286 to <64 x i32> + %64 = zext <64 x i8> %wide.load312 to <64 x i32> + %65 = add nuw nsw <64 x i32> %63, %62 + %66 = add nuw nsw <64 x i32> %65, %61 + %67 = add nuw nsw <64 x i32> %66, %64 + %68 = mul nuw nsw <64 x i32> %60, %67 + %69 = mul nuw nsw <64 x i32> %61, %58 + %70 = mul nuw nsw <64 x i32> %69, %62 + %71 = mul nuw nsw <64 x i32> %62, %58 + %72 = mul nuw nsw <64 x i32> %63, %58 + %73 = add nuw nsw <64 x i32> %71, %69 + %74 = add nuw nsw <64 x i32> %73, %72 + %75 = mul nuw nsw <64 x i32> %74, %64 + %76 = mul nuw nsw <64 x i32> %73, %63 + %77 = add nuw nsw <64 x i32> %76, %70 + %78 = add nuw nsw <64 x i32> %77, %68 + %79 = add nuw nsw <64 x i32> %78, %75 + %80 = add nuw nsw <64 x i32> %65, %64 + %81 = mul nuw nsw <64 x i32> %80, %61 + %82 = mul nuw nsw <64 x i32> %64, %63 + %83 = add nuw nsw <64 x i32> %81, %82 + %84 = add nuw nsw <64 x i32> %64, %63 + %85 = mul nuw nsw <64 x i32> %84, %62 + %86 = add nuw nsw <64 x i32> %83, %85 + %87 = mul nuw nsw <64 x i32> %86, %59 + %88 = add <64 x i8> %wide.load312, %wide.load286 + %89 = mul <64 x i8> %88, %wide.load285 + %90 = mul <64 x i8> %wide.load312, %wide.load286 + %91 = add <64 x i8> %89, %90 + %92 = shl <64 x i32> %79, + %93 = ashr exact <64 x i32> %92, + %94 = shl <64 x i32> %87, + %95 = ashr exact <64 x i32> %94, + %96 = mul nsw <64 x i32> %93, %95 + %97 = trunc <64 x i32> %96 to <64 x i8> + %98 = mul <64 x i8> %90, %wide.load284 + %99 = mul <64 x i8> %98, %wide.load285 + %100 = mul <64 x i8> %99, %91 + %101 = mul <64 x i8> %100, %97 + %102 = getelementptr inbounds [128 x i8], [128 x i8]* %s, i32 0, i32 %index279 + %103 = bitcast i8* %102 to <64 x i8>* + store <64 x i8> %101, <64 x i8>* %103, align 8 + %index.next280 = add nuw i32 %index279, 64 + %104 = icmp eq i32 %index.next280, %n.vec278 + br i1 %104, label %vec.epilog.middle.block, label %vec.epilog.vector.body + +vec.epilog.middle.block: ; preds = %vec.epilog.vector.body + br i1 %cmp.n281, label %for.end, label %for.body5.preheader + +for.body5.preheader: ; preds = %vec.epilog.middle.block, %vec.epilog.iter.check, %iter.check + %i.0262.ph = phi i32 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec278, %vec.epilog.middle.block ] + br label %for.body5 + +for.body5: ; preds = %for.body5, %for.body5.preheader + %i.0262 = phi i32 [ %inc, %for.body5 ], [ %i.0262.ph, %for.body5.preheader ] + %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.0262 + %105 = load i8, i8* %arrayidx, align 1 + %arrayidx6 = getelementptr inbounds i8, i8* %b, i32 %i.0262 + %106 = load i8, i8* %arrayidx6, align 1 + %arrayidx7 = getelementptr inbounds i8, i8* %c, i32 %i.0262 + %107 = load i8, i8* %arrayidx7, align 1 + %arrayidx8 = getelementptr inbounds i8, i8* %d, i32 %i.0262 + %108 = load i8, i8* %arrayidx8, align 1 + %arrayidx9 = getelementptr inbounds i8, i8* %e, i32 %i.0262 + %109 = load i8, i8* %arrayidx9, align 1 + %arrayidx11 = getelementptr inbounds [128 x i8], [128 x i8]* %aa, i32 %i.0262, i32 0 + %110 = load i8, i8* %arrayidx11, align 1 + %conv12266 = zext i8 %105 to i32 + %conv13267 = zext i8 %106 to i32 + %mul = mul nuw nsw i32 %conv13267, %conv12266 + %conv14268 = zext i8 %107 to i32 + %conv19269 = zext i8 %108 to i32 + %conv24270 = zext i8 %109 to i32 + %conv30271 = zext i8 %110 to i32 + %mul20243 = add nuw nsw i32 %conv24270, %conv19269 + %mul25244 = add nuw nsw i32 %mul20243, %conv14268 + %mul31245 = add nuw nsw i32 %mul25244, %conv30271 + %add32 = mul nuw nsw i32 %mul, %mul31245 + %mul35 = mul nuw nsw i32 %conv14268, %conv12266 + %mul37 = mul nuw nsw i32 %mul35, %conv19269 + %mul53 = mul nuw nsw i32 %conv19269, %conv12266 + %mul67 = mul nuw nsw i32 %conv24270, %conv12266 + %reass.add = add nuw nsw i32 %mul53, %mul35 + %reass.add250 = add nuw nsw i32 %reass.add, %mul67 + %reass.mul = mul nuw nsw i32 %reass.add250, %conv30271 + %reass.mul252 = mul nuw nsw i32 %reass.add, %conv24270 + %add56 = add nuw nsw i32 %reass.mul252, %mul37 + %add62 = add nuw nsw i32 %add56, %add32 + %add68 = add nuw nsw i32 %add62, %reass.mul + %mul85247 = add nuw nsw i32 %mul20243, %conv30271 + %add86 = mul nuw nsw i32 %mul85247, %conv14268 + %mul103 = mul nuw nsw i32 %conv30271, %conv24270 + %reass.add253 = add nuw nsw i32 %add86, %mul103 + %reass.add255 = add nuw nsw i32 %conv30271, %conv24270 + %reass.mul256 = mul nuw nsw i32 %reass.add255, %conv19269 + %reass.add259 = add nuw nsw i32 %reass.add253, %reass.mul256 + %reass.mul260 = mul nuw nsw i32 %reass.add259, %conv13267 + %mul115248 = add i8 %110, %109 + %add116 = mul i8 %mul115248, %108 + %mul121 = mul i8 %110, %109 + %reass.add257 = add i8 %add116, %mul121 + %sext = shl i32 %add68, 24 + %conv130 = ashr exact i32 %sext, 24 + %sext249 = shl i32 %reass.mul260, 24 + %conv131 = ashr exact i32 %sext249, 24 + %mul132 = mul nsw i32 %conv130, %conv131 + %111 = trunc i32 %mul132 to i8 + %112 = mul i8 %mul121, %107 + %mul126 = mul i8 %112, %108 + %mul128 = mul i8 %mul126, %reass.add257 + %conv137 = mul i8 %mul128, %111 + %arrayidx138 = getelementptr inbounds [128 x i8], [128 x i8]* %s, i32 0, i32 %i.0262 + store i8 %conv137, i8* %arrayidx138, align 1 + %inc = add nuw nsw i32 %i.0262, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.end, label %for.body5 + +for.end: ; preds = %for.body5, %vec.epilog.middle.block, %middle.block, %for.cond2.preheader + tail call void @dummy(i32 %n, i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, [128 x i8]* %aa, [128 x i8]* %bb, [128 x i8]* %cc, i8 signext 1) + %inc140 = add nuw nsw i32 %nl.0264, 1 + %exitcond265.not = icmp eq i32 %inc140, %ntimes + br i1 %exitcond265.not, label %for.end141, label %for.cond2.preheader + +for.end141: ; preds = %for.end, %entry + %conv = sitofp i32 %call to double + %call142 = tail call i32 bitcast (i32 (...)* @second to i32 ()*)() + %conv143 = sitofp i32 %call142 to double + %sub = fsub double %conv143, %conv + %sub144 = fsub double %sub, %ctime + %conv145 = sitofp i32 %ntimes to double + %mul146 = fmul double %conv145, %dtime + %sub147 = fsub double %sub144, %mul146 + %call148 = call i64 @cs1d(i32 %n, i8* nonnull %0) + %mul149 = mul nsw i32 %n, %ntimes + call void @check(i64 %call148, i32 %mul149, i32 %n, double %sub147, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0)) + call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %0) + ret void +} + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare dso_local void @init(i32, i8*, i8*, i8*, i8*, i8*, [128 x i8]*, [128 x i8]*, [128 x i8]*, i8*) local_unnamed_addr + +declare dso_local i32 @second(...) local_unnamed_addr + +declare dso_local void @dummy(i32, i8*, i8*, i8*, i8*, i8*, [128 x i8]*, [128 x i8]*, [128 x i8]*, i8 signext) local_unnamed_addr + +declare dso_local i64 @cs1d(i32, i8*) local_unnamed_addr + +declare dso_local void @check(i64, i32, i32, double, i8*) local_unnamed_addr + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) diff --git a/llvm/test/CodeGen/Hexagon/vec-shuff2.ll b/llvm/test/CodeGen/Hexagon/vec-shuff2.ll new file mode 100644 index 0000000000000..d5a4091916c74 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vec-shuff2.ll @@ -0,0 +1,106 @@ +; RUN: llc -march=hexagon -hexagon-opt-shuffvec -hexagon-widen-short-vector -hexagon-hvx-widen=32 -mv73 -mhvx -mattr=+hvx-length128b < %s +; REQUIRES: asserts + +define dllexport i32 @test(ptr noalias align 128 %0, ptr noalias align 128 %1, ptr noalias align 128 %2) local_unnamed_addr { +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %0, i32 128) ] + call void @llvm.assume(i1 true) [ "align"(ptr %1, i32 128) ] + call void @llvm.assume(i1 true) [ "align"(ptr %2, i32 128) ] + %3 = load <32 x i8>, ptr %2, align 128 + %4 = zext <32 x i8> %3 to <32 x i32> + %5 = mul nuw nsw <32 x i32> %4, + %scevgep = getelementptr i32, ptr %0, i32 128 + %scevgep13 = getelementptr i8, ptr %1, i32 128 + br label %for_begin1.preheader + +for_begin1.preheader: ; preds = %for_end3, %entry + %lsr.iv14 = phi ptr [ %scevgep15, %for_end3 ], [ %scevgep13, %entry ] + %lsr.iv1 = phi ptr [ %scevgep2, %for_end3 ], [ %scevgep, %entry ] + %6 = phi i32 [ 0, %entry ], [ %47, %for_end3 ] + br label %for_body2 + +for_end: ; preds = %for_end3 + ret i32 0 + +for_body2: ; preds = %for_body2, %for_begin1.preheader + %lsr.iv16 = phi ptr [ %scevgep17, %for_body2 ], [ %lsr.iv14, %for_begin1.preheader ] + %lsr.iv3 = phi ptr [ %scevgep4, %for_body2 ], [ %lsr.iv1, %for_begin1.preheader ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for_body2 ], [ 128, %for_begin1.preheader ] + %scevgep20 = getelementptr <32 x i8>, ptr %lsr.iv16, i32 -4 + %7 = load <32 x i8>, ptr %scevgep20, align 128 + %8 = zext <32 x i8> %7 to <32 x i32> + %9 = mul nuw nsw <32 x i32> %8, + %10 = add nsw <32 x i32> %9, + %11 = add nsw <32 x i32> %10, %5 + %scevgep6 = getelementptr <32 x i32>, ptr %lsr.iv3, i32 -4 + store <32 x i32> %11, ptr %scevgep6, align 128 + %scevgep21 = getelementptr <32 x i8>, ptr %lsr.iv16, i32 -3 + %12 = load <32 x i8>, ptr %scevgep21, align 32 + %13 = zext <32 x i8> %12 to <32 x i32> + %14 = mul nuw nsw <32 x i32> %13, + %15 = add nsw <32 x i32> %14, + %16 = add nsw <32 x i32> %15, %5 + %scevgep8 = getelementptr <32 x i32>, ptr %lsr.iv3, i32 -3 + store <32 x i32> %16, ptr %scevgep8, align 128 + %scevgep22 = getelementptr <32 x i8>, ptr %lsr.iv16, i32 -2 + %17 = load <32 x i8>, ptr %scevgep22, align 64 + %18 = zext <32 x i8> %17 to <32 x i32> + %19 = mul nuw nsw <32 x i32> %18, + %20 = add nsw <32 x i32> %19, + %21 = add nsw <32 x i32> %20, %5 + %scevgep9 = getelementptr <32 x i32>, ptr %lsr.iv3, i32 -2 + store <32 x i32> %21, ptr %scevgep9, align 128 + %scevgep23 = getelementptr <32 x i8>, ptr %lsr.iv16, i32 -1 + %22 = load <32 x i8>, ptr %scevgep23, align 32 + %23 = zext <32 x i8> %22 to <32 x i32> + %24 = mul nuw nsw <32 x i32> %23, + %25 = add nsw <32 x i32> %24, + %26 = add nsw <32 x i32> %25, %5 + %scevgep10 = getelementptr <32 x i32>, ptr %lsr.iv3, i32 -1 + store <32 x i32> %26, ptr %scevgep10, align 128 + %27 = load <32 x i8>, ptr %lsr.iv16, align 128 + %28 = zext <32 x i8> %27 to <32 x i32> + %29 = mul nuw nsw <32 x i32> %28, + %30 = add nsw <32 x i32> %29, + %31 = add nsw <32 x i32> %30, %5 + store <32 x i32> %31, ptr %lsr.iv3, align 128 + %scevgep24 = getelementptr <32 x i8>, ptr %lsr.iv16, i32 1 + %32 = load <32 x i8>, ptr %scevgep24, align 32 + %33 = zext <32 x i8> %32 to <32 x i32> + %34 = mul nuw nsw <32 x i32> %33, + %35 = add nsw <32 x i32> %34, + %36 = add nsw <32 x i32> %35, %5 + %scevgep12 = getelementptr <32 x i32>, ptr %lsr.iv3, i32 1 + store <32 x i32> %36, ptr %scevgep12, align 128 + %scevgep25 = getelementptr <32 x i8>, ptr %lsr.iv16, i32 2 + %37 = load <32 x i8>, ptr %scevgep25, align 64 + %38 = zext <32 x i8> %37 to <32 x i32> + %39 = mul nuw nsw <32 x i32> %38, + %40 = add nsw <32 x i32> %39, + %41 = add nsw <32 x i32> %40, %5 + %scevgep11 = getelementptr <32 x i32>, ptr %lsr.iv3, i32 2 + store <32 x i32> %41, ptr %scevgep11, align 128 + %scevgep19 = getelementptr <32 x i8>, ptr %lsr.iv16, i32 3 + %42 = load <32 x i8>, ptr %scevgep19, align 32 + %43 = zext <32 x i8> %42 to <32 x i32> + %44 = mul nuw nsw <32 x i32> %43, + %45 = add nsw <32 x i32> %44, + %46 = add nsw <32 x i32> %45, %5 + %scevgep7 = getelementptr <32 x i32>, ptr %lsr.iv3, i32 3 + store <32 x i32> %46, ptr %scevgep7, align 128 + %lsr.iv.next = add nsw i32 %lsr.iv, -8 + %scevgep4 = getelementptr i32, ptr %lsr.iv3, i32 256 + %scevgep17 = getelementptr i8, ptr %lsr.iv16, i32 256 + %exitcond.not.7 = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not.7, label %for_end3, label %for_body2 + +for_end3: ; preds = %for_body2 + %47 = add nuw nsw i32 %6, 1 + %scevgep2 = getelementptr i32, ptr %lsr.iv1, i32 4096 + %scevgep15 = getelementptr i8, ptr %lsr.iv14, i32 4096 + %exitcond4.not = icmp eq i32 %47, 128 + br i1 %exitcond4.not, label %for_end, label %for_begin1.preheader +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) diff --git a/llvm/test/CodeGen/Hexagon/vmpa.ll b/llvm/test/CodeGen/Hexagon/vmpa.ll new file mode 100644 index 0000000000000..10f18195dc1b7 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vmpa.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; Test for vmpa instruction. + +; CHECK-LABEL: test_vmpa8 +; CHECK: = vmpa(v{{[0-9:]+}}.ub,r{{[0-9]+}}.b) + +; Function Attrs: nounwind +define dllexport void @test_vmpa8(i64 %seed0, i64 %seed1, + <128 x i8> %srcA, <128 x i8> %srcB, + i8* %dst) local_unnamed_addr { +entry: + %1 = trunc i64 %seed0 to i16 + %3 = trunc i64 %seed1 to i8 + %4 = and i8 %3, 127 + %5 = insertelement <128 x i8> poison, i8 %4, i32 0 + %6 = shufflevector <128 x i8> %5, <128 x i8> poison, <128 x i32> zeroinitializer + %7 = zext <128 x i8> %6 to <128 x i16> + %8 = and i16 %1, 127 + %9 = insertelement <128 x i16> poison, i16 %8, i32 0 + %10 = shufflevector <128 x i16> %9, <128 x i16> poison, <128 x i32> zeroinitializer + %11 = zext <128 x i8> %srcA to <128 x i16> + %12 = zext <128 x i8> %srcB to <128 x i16> + %13 = mul nuw nsw <128 x i16> %11, %7 + %14 = mul nuw nsw <128 x i16> %10, %12 + %15 = add nuw <128 x i16> %14, %13 + %16 = lshr <128 x i16> %15, + %17 = add <128 x i16> zeroinitializer, %16 + %18 = trunc <128 x i16> %17 to <128 x i8> + %21 = bitcast i8* %dst to <128 x i8>* + store <128 x i8> %18, <128 x i8>* %21, align 128 + ret void +} + +; CHECK-LABEL: test_vmpa16 +; CHECK: = vmpa(v{{[0-9:]+}}.uh,r{{[0-9]+}}.b) + +; Function Attrs: nounwind +define dllexport void @test_vmpa16(i64 %seed0, i64 %seed1, + <64 x i16> %srcA16, <64 x i16> %srcB16, + i16* %dst16) local_unnamed_addr { +entry: + %1 = trunc i64 %seed0 to i32 + %3 = trunc i64 %seed1 to i32 + %4 = and i32 %3, 127 + %5 = insertelement <64 x i32> poison, i32 %4, i32 0 + %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> zeroinitializer + %7 = and i32 %3, 127 + %8 = and i32 %1, 127 + %9 = insertelement <64 x i32> poison, i32 %8, i32 0 + %10 = shufflevector <64 x i32> %9, <64 x i32> poison, <64 x i32> zeroinitializer + %11 = zext <64 x i16> %srcA16 to <64 x i32> + %12 = zext <64 x i16> %srcB16 to <64 x i32> + %13 = mul nuw nsw <64 x i32> %11, %6 + %14 = mul nuw nsw <64 x i32> %10, %12 + %15 = add nuw <64 x i32> %14, %13 + %16 = lshr <64 x i32> %15, + ;, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> + %17 = add <64 x i32> zeroinitializer, %16 + %18 = trunc <64 x i32> %17 to <64 x i16> + %21 = bitcast i16* %dst16 to <64 x i16>* + store <64 x i16> %18, <64 x i16>* %21, align 128 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/vmpy-const.ll b/llvm/test/CodeGen/Hexagon/vmpy-const.ll new file mode 100644 index 0000000000000..6b3f4c96ad364 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vmpy-const.ll @@ -0,0 +1,273 @@ +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; Make sure that the appropriate vmpy instructions are generated when +; multiplied with a vector of constant values. + +; CHECK-LABEL: test_vmpy_const1 +; CHECK: v{{[0-9:]+}}.uh = vmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.ub) +; CHECK: v{{[0-9:]+}}.uw = vunpack(v{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const1(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = mul nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const2 +; CHECK: v{{[0-9:]+}}.h = vmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.b) +; CHECK: v{{[0-9:]+}}.w = vunpack(v{{[0-9]+}}.h) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const2(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = mul nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const2_1 +; CHECK: [[REG0:(r[0-9]+)]] = ##- +; CHECK: [[VREG0:(v[0-9]+)]] = vmem +; CHECK: [[VREG1:(v[0-9]+)]] = vsplat([[REG0]]) +; CHECK: = vunpack([[VREG0]].ub) +; CHECK: v{{[0-9:]+}}.w = vmpy([[VREG1]].h,v{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const2_1(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = mul nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const3 +; CHECK: v{{[0-9:]+}}.uw = vmpy(v{{[0-9]+}}.uh,r{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const3(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = mul nuw nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const4 +; CHECK: [[REG0:(r[0-9]+)]] = #- +; CHECK: [[VREG0:(v[0-9]+)]].h = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.w = vmpy([[VREG0]].h,v{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const4(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = mul nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const5 +; CHECK: [[REG0:(r[0-9]+)]] = #- +; CHECK: [[VREG0:(v[0-9]+)]].h = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.w = vmpy([[VREG0]].h,v{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const5(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = mul nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const6 +; CHECK: [[REG0:(r[0-9]+)]] = #-23 +; CHECK: [[VREG0:(v[0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG1:(v[0-9:]+.w)]] = vmpyieo(v{{[0-9]+}}.h,[[VREG0]].h) +; CHECK: [[VREG1]] += vmpyie + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const6(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <32 x i32>, ptr %a, align 4 + %0 = mul nsw <32 x i32> %wide.load, + store <32 x i32> %0, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const7 +; CHECK: [[REG0:(r[0-9]+)]] = ##.L +; CHECK: [[VREG0:(v[0-9]+)]] = vmemu(r0+#0) +; CHECK: [[VREG1:(v[0-9]+)]] = vmem([[REG0]]+#0) +; CHECK: v{{[0-9:]+}}.h = vmpy([[VREG0]].ub,[[VREG1]].b) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const7(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = mul nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const8 +; CHECK: v{{[0-9:]+}}.uh = vmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.ub) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const8(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i16> + %1 = mul nuw nsw <128 x i16> %0, + store <128 x i16> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const9 +; CHECK: v{{[0-9:]+}}.h = vmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.b) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const9(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i16> + %1 = mul nuw nsw <128 x i16> %0, + store <128 x i16> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const10 +; CHECK: v{{[0-9:]+}}.uw = vmpy(v{{[0-9]+}}.uh,r{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const10(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i16>, ptr %a, align 1 + %0 = zext <128 x i16> %wide.load to <128 x i32> + %1 = mul nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const11 +; CHECK: v{{[0-9:]+}}.w = vmpy(v{{[0-9]+}}.h,r{{[0-9]+}}.h) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const11(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i16>, ptr %a, align 1 + %0 = sext <128 x i16> %wide.load to <128 x i32> + %1 = mul nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const12 +; CHECK: [[VREG0:(v[0-9]+)]] = vmemu(r{{[0-9\+\#0-9]+}}) +; CHECK: v{{[0-9:]+}}.h = vmpy(v{{[0-9]+}}.ub,[[VREG0]].b) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const12(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = sext <128 x i8> %wide.load to <128 x i16> + %1 = mul nuw nsw <128 x i16> %0, + store <128 x i16> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const13 +; CHECK: [[VREG0:(v[0-9]+)]] = vmemu(r{{[0-9\+\#0-9]+}}) +; CHECK: v{{[0-9:]+}}.w = vmpy([[VREG0]].h,v{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const13(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i16>, ptr %a, align 1 + %0 = sext <128 x i16> %wide.load to <128 x i32> + %1 = mul nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const14 +; CHECK: v{{[0-9:]+}}.uh = vmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.ub) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const14(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i16> + %1 = shl nuw nsw <128 x i16> %0, + store <128 x i16> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const15 +; CHECK: v{{[0-9:]+}}.uh = vunpack(v{{[0-9]+}}.ub) +; CHECK: v{{[0-9:]+}}.h = vasl(v{{[0-9]+}}.h,r{{[0-9]+}}) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const15(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i16> + %1 = shl nuw nsw <128 x i16> %0, + store <128 x i16> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const16 +; CHECK: v{{[0-9:]+}}.uw = vmpy(v{{[0-9]+}}.uh,r{{[0-9]+}}.uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const16(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i16>, ptr %a, align 1 + %0 = zext <128 x i16> %wide.load to <128 x i32> + %1 = shl nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy_const17 +; CHECK: v{{[0-9:]+}}.uw = vunpack(v{{[0-9]+}}.uh) +; CHECK: v{{[0-9:]+}}.w = vasl(v{{[0-9]+}}.w,r{{[0-9]+}}) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const17(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i16>, ptr %a, align 1 + %0 = zext <128 x i16> %wide.load to <128 x i32> + %1 = shl nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + + +; CHECK-LABEL: test_vmpy_const18 +; CHECK: r{{[0-9]+}} = #2 +; CHECK: v{{[0-9:]+}}.b = vsplat(r{{[0-9]+}}) +; CHECK: v{{[0-9:]+}}.h = vmpy(v{{[0-9]+}}.ub,v{{[0-9]+}}.b) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy_const18(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = sext <128 x i8> %wide.load to <128 x i32> + %1 = shl nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/vmpy-qfp-const.ll b/llvm/test/CodeGen/Hexagon/vmpy-qfp-const.ll new file mode 100644 index 0000000000000..c1e61fdaacceb --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vmpy-qfp-const.ll @@ -0,0 +1,71 @@ +; In this example operands in fmul instruction are (fpext, constant_vector). The generated assembly +; should contains vsplat instruction followed by multiplication of two halfs whose result is of type qf32. +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b < %s | FileCheck %s + +; CHECK-LABEL: check1 +; CHECK: [[REG0:(r[0-9]+)]] = ## +; CHECK: [[VREG0:(v[0-9]+)]] = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.qf32 = vmpy(v{{[0-9]+}}.hf,[[VREG0]].hf) + +; Function Attrs: norecurse nounwind +define dso_local void @check1(half* nocapture readonly %a, float* nocapture %r) local_unnamed_addr { +entry: + %0 = bitcast half* %a to <64 x half>* + %wide.load = load <64 x half>, <64 x half>* %0, align 2 + %1 = fpext <64 x half> %wide.load to <64 x float> + %2= fmul <64 x float> %1, + %3 = bitcast float* %r to <64 x float>* + store <64 x float> %2, <64 x float>* %3, align 4 + ret void +} + +; Widening float vector with vector-width 128 +; CHECK-LABEL: check2 +; CHECK: v{{[0-9:]+}}.qf32 = vmpy(v{{[0-9]+}}.hf,[[VREG1:(v[0-9]+)]].hf) +; CHECK: [[VREG1]].cur = vmem(r{{[0-9\+\#0-9]+}}) +; CHECK: v{{[0-9:]+}}.qf32 = vmpy(v{{[0-9]+}}.hf,[[VREG1]].hf) +define dllexport void @check2(i8* noalias nocapture writeonly align 128 %0, i8* noalias nocapture readonly align 128 %1) #0 { + %3 = bitcast i8* %0 to <128 x float>* + %4 = bitcast i8* %1 to <128 x half>* + %5 = load <128 x half>, <128 x half>* %4, align 128 + %6 = fpext <128 x half> %5 to <128 x float> + %7 = fmul nnan nsz <128 x float> %6, + store <128 x float> %7, <128 x float>* %3, align 128 + ret void +} + +; Widening float vector pass do not handle instructions with +; vector-width > 128. Instead during ISel, FPExtend on the operands +; of FMUL will generate sequence of vmpy_qf32_hf, shuffle inst to +; convert float16 to float32. +; Later, vmpy_qf32_sf instruction will be generated for multiplying +; two operands of FMUL instruction. +; CHECK-LABEL: check3 +; CHECK: v{{[0-9:]+}}.qf32 = vmpy(v{{[0-9]+}}.sf,v{{[0-9]+}}.sf) +define dllexport void @check3(i8* noalias nocapture writeonly align 256 %0, i8* noalias nocapture readonly align 256 %1) #0 { + %3 = bitcast i8* %0 to <256 x float>* + %4 = bitcast i8* %1 to <256 x half>* + %5 = load <256 x half>, <256 x half>* %4, align 128 + %6 = fpext <256 x half> %5 to <256 x float> + %7 = fmul nnan nsz <256 x float> %6, + store <256 x float> %7, <256 x float>* %3, align 128 + ret void +} + +; Widening float vector 32xf16 +; check4 also serve as a test case for HexagonOptShuffleVector with single Hi/Lo use case, where the pass should prevent relocating shuffle instruction generated by HexagonGenWideningVecFloatInstr, otherwise the function will be broken. +; CHECK-LABEL: check4 +; CHECK: v{{[0-9:]+}}.qf32 = vmpy(v{{[0-9]+}}.hf,v{{[0-9:]+}}.hf) +; CHECK: v{{[0-9:]+}} = vshuff(v{{[0-9]+}},v{{[0-9:]+}},r{{[0-9]+}}) +define dso_local void @check4(half* nocapture readonly %a, half* nocapture readonly %b, float* nocapture %r) local_unnamed_addr #0 { +entry: + %0 = bitcast half* %a to <32 x half>* + %wide.load.0 = load <32 x half>, <32 x half>* %0, align 2 + %1 = bitcast half* %b to <32 x half>* + %wide.load.1 = load <32 x half>, <32 x half>* %1, align 2 + %2 = fpext <32 x half> %wide.load.0 to <32 x float> + %3 = fpext <32 x half> %wide.load.1 to <32 x float> + %4= fmul <32 x float> %2, %3 + store <32 x float> %4, <32 x float>* %r, align 4 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/vsub-const.ll b/llvm/test/CodeGen/Hexagon/vsub-const.ll new file mode 100644 index 0000000000000..236fc0928feb9 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vsub-const.ll @@ -0,0 +1,112 @@ +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; Make sure that the appropriate vadd instructions are generated when +; addtiplied with a vector of constant values. + +; CHECK-LABEL: test_vadd_const1 +; CHECK: [[REG0:(r[0-9]+)]] = # +; CHECK: [[VREG0:(v[0-9]+)]].b = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.h = vadd(v{{[0-9]+}}.ub,[[VREG0]].ub) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const1(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = add nuw nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const2 +; CHECK: [[REG0:(r[0-9]+)]] = #- +; CHECK: [[VREG0:([0-9]+)]].h = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9:])]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.h = vadd(v{{[0-9:]+}}.h,{{v[VREG0]|v[VREG1]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const2(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = add nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const2_1 +; CHECK: [[REG0:(r[0-9]+)]] = #-270 +; CHECK: [[VREG0:([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9:]+)]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}.w,{{v[VREG0]|v[VREG1]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const2_1(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %1 = add nsw <128 x i32> %0, + store <128 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const3 +; CHECK: [[REG0:(r[0-9]+)]] = # +; CHECK: [[VREG0:(v[0-9]+)]].h = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.w = vadd(v{{[0-9]+}}.uh,[[VREG0]].uh) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const3(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = add nuw nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const4 +; CHECK: [[REG0:(r[0-9]+)]] = #-23 +; CHECK: [[VREG0:([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9:]+)]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}.w,{{v[VREG0]|v[VREG1]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const4(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = add nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const5 +; CHECK: [[REG0:(r[0-9]+)]] = #-257 +; CHECK: [[VREG0:([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG1:([0-9:]+)]] = v[[VREG0]] +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}.w,{{v[VREG0]|v[VREG1]}} + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const5(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <64 x i16>, ptr %a, align 2 + %0 = zext <64 x i16> %wide.load to <64 x i32> + %1 = add nsw <64 x i32> %0, + store <64 x i32> %1, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vadd_const6 +; CHECK: [[REG0:(r[0-9]+)]] = #-23 +; CHECK: [[VREG0:(v[0-9]+)]] = vsplat([[REG0]]) +; CHECK: v{{[0-9:]+}}.w = vadd({{.*}}[[VREG0]].w{{.*}}) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd_const6(ptr nocapture readonly %a, ptr nocapture %r) local_unnamed_addr #0 { +entry: + %wide.load = load <32 x i32>, ptr %a, align 4 + %0 = add nsw <32 x i32> %wide.load, + store <32 x i32> %0, ptr %r, align 4 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/widening-float-vec.ll b/llvm/test/CodeGen/Hexagon/widening-float-vec.ll new file mode 100644 index 0000000000000..c696457451e1c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widening-float-vec.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=hexagon -mattr=+hvx-length128b,+hvxv68 < %s + +define void @_Z10range_flatIDF16bEvjT_S0_PS0_(i32 noundef %d, half noundef %start, half noundef %increm, ptr noundef %out) local_unnamed_addr { +entry: + %d.ripple.bcast.splatinsert = insertelement <64 x i32> poison, i32 %d, i64 0 + %d.ripple.bcast.splat = shufflevector <64 x i32> %d.ripple.bcast.splatinsert, <64 x i32> poison, <64 x i32> zeroinitializer + %0 = fpext half %increm to float + %.ripple.bcast.splatinsert = insertelement <64 x float> poison, float %0, i64 0 + %.ripple.bcast.splat = shufflevector <64 x float> %.ripple.bcast.splatinsert, <64 x float> poison, <64 x i32> zeroinitializer + %mul.ripple.vectorized = fmul <64 x float> %.ripple.bcast.splat, + %arrayidx = getelementptr i8, ptr %out, i32 0 + %1 = fptrunc <64 x float> %mul.ripple.vectorized to <64 x half> + store <64 x half> %1, ptr %arrayidx, align 2 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/widening-vec.ll b/llvm/test/CodeGen/Hexagon/widening-vec.ll new file mode 100644 index 0000000000000..7b7c100bd5666 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widening-vec.ll @@ -0,0 +1,96 @@ +; RUN: llc -march=hexagon -mv73 -mhvx -mattr=+hvx-length128b < %s +; REQUIRES: asserts + +; This test checks for an assert. It happens when we attempt to generate widening vector instructions for vector length that isn't not a multiple of HW vector size (1024). + +; Function Attrs: nofree norecurse nounwind +define dllexport i32 @foo(ptr noalias nocapture %0, ptr noalias nocapture readonly %1, ptr noalias nocapture readonly %2) local_unnamed_addr { +entry: + %3 = load <121 x i8>, ptr %2, align 1 + %4 = zext <121 x i8> %3 to <121 x i32> + %5 = mul nuw nsw <121 x i32> %4, + %6 = load <121 x i8>, ptr %1, align 1 + %7 = zext <121 x i8> %6 to <121 x i32> + %8 = mul nuw nsw <121 x i32> %7, + %9 = add nsw <121 x i32> %8, + %10 = add nsw <121 x i32> %9, %5 + store <121 x i32> %10, ptr %0, align 4 + ret i32 0 +} + +; The tests below check lowering of add, sub, mul when inputs are extended from 8 to 32 bits. + +; CHECK-LABEL: test_vadd1 +; CHECK: v{{.*}}.h = vadd(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vadd1(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %wide.load19 = load <128 x i8>, ptr %b, align 1 + %1 = zext <128 x i8> %wide.load19 to <128 x i32> + %2 = add nuw nsw <128 x i32> %1, %0 + store <128 x i32> %2, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vsub1 +; CHECK: v{{.*}}.h = vsub(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vsub1(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %wide.load19 = load <128 x i8>, ptr %b, align 1 + %1 = zext <128 x i8> %wide.load19 to <128 x i32> + %2 = sub nuw nsw <128 x i32> %1, %0 + store <128 x i32> %2, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy1 +; CHECK: v{{.*}}.uh = vmpy(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy1(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = zext <128 x i8> %wide.load to <128 x i32> + %wide.load19 = load <128 x i8>, ptr %b, align 1 + %1 = zext <128 x i8> %wide.load19 to <128 x i32> + %2 = mul nuw nsw <128 x i32> %1, %0 + store <128 x i32> %2, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy4 +; CHECK: v{{[0-9:]+}}.h = vmpy(v{{[0-9]+}}.b,v{{[0-9]+}}.b) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy4(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = sext <128 x i8> %wide.load to <128 x i32> + %wide.load19 = load <128 x i8>, ptr %b, align 1 + %1 = sext <128 x i8> %wide.load19 to <128 x i32> + %2 = mul nuw nsw <128 x i32> %1, %0 + store <128 x i32> %2, ptr %r, align 4 + ret void +} + +; CHECK-LABEL: test_vmpy7 +; CHECK: v{{[0-9:]+}}.h = vmpy(v{{[0-9]+}}.ub,v{{[0-9]+}}.b) + +; Function Attrs: norecurse nounwind +define dso_local void @test_vmpy7(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %r) local_unnamed_addr { +entry: + %wide.load = load <128 x i8>, ptr %a, align 1 + %0 = sext <128 x i8> %wide.load to <128 x i32> + %wide.load19 = load <128 x i8>, ptr %b, align 1 + %1 = zext <128 x i8> %wide.load19 to <128 x i32> + %2 = mul nuw nsw <128 x i32> %1, %0 + store <128 x i32> %2, ptr %r, align 4 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/widening-vec2.ll b/llvm/test/CodeGen/Hexagon/widening-vec2.ll new file mode 100644 index 0000000000000..3fb288cca388c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widening-vec2.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +; Test to make sure that the widening vector instructions are being generated. + +; CHECK: .uh = vmpy(v{{[0-9:]+}}.ub,v{{[0-9]+}}.ub) + +define dllexport void @test1() local_unnamed_addr { + %1 = load i64, i64* poison, align 8 + %2 = trunc i64 %1 to i16 + %3 = lshr i16 %2, 7 + %4 = and i16 %3, 255 + %broadcast.splatinsert.1 = insertelement <128 x i16> poison, i16 %4, i32 0 + %broadcast.splat.1 = shufflevector <128 x i16> %broadcast.splatinsert.1, <128 x i16> poison, <128 x i32> zeroinitializer + %scevgep = getelementptr i8, i8* null, i32 128 + %lsr.iv13 = bitcast i8* %scevgep to <128 x i8>* + %wide.load.1 = load <128 x i8>, <128 x i8>* poison, align 1 + %5 = zext <128 x i8> %wide.load.1 to <128 x i16> + %6 = mul nuw <128 x i16> %broadcast.splat.1, %5 + %7 = add <128 x i16> zeroinitializer, %6 + %trun = trunc <128 x i16> %7 to <128 x i8> + store <128 x i8> %trun, <128 x i8>* %lsr.iv13, align 1 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll index 9006b5c8d6fe1..5f4fccdd72b12 100644 --- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -9,9 +9,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 661f67d4989c4..546ed6cec5c4a 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -17,9 +17,11 @@ ; LAXX-LABEL: Pass Arguments: ; LAXX-NEXT: Target Library Information +; LAXX-NEXT: Runtime Library Function Analysis ; LAXX-NEXT: Target Pass Configuration ; LAXX-NEXT: Machine Module Information ; LAXX-NEXT: Target Transform Information +; LAXX-NEXT: Library Function Lowering Analysis ; LAXX-NEXT: Assumption Cache Tracker ; LAXX-NEXT: Type-Based Alias Analysis ; LAXX-NEXT: Scoped NoAlias Alias Analysis diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll index 38b1074e55d22..ac04be436f6a1 100644 --- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll @@ -6,9 +6,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index 7cbb1a1c98873..fd8fd5fa34a17 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -5,9 +5,11 @@ ; REQUIRES: asserts ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis diff --git a/llvm/test/CodeGen/PowerPC/amo-enable.ll b/llvm/test/CodeGen/PowerPC/amo-enable.ll new file mode 100644 index 0000000000000..33739cca12492 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/amo-enable.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-BE + +define void @test_us_lwat(ptr noundef %ptr, i32 noundef %value, ptr nocapture %resp) { +; CHECK-LABEL: test_us_lwat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r4 +; CHECK-NEXT: lwat r6, r3, 0 +; CHECK-NEXT: stw r6, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_us_lwat: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r7, r4 +; CHECK-BE-NEXT: lwat r6, r3, 0 +; CHECK-BE-NEXT: stw r6, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.amo.lwat(ptr %ptr, i32 %value, i32 0) + store i32 %0, ptr %resp, align 4 + ret void +} + +define void @test_us_ldat(ptr noundef %ptr, i64 noundef %value, ptr nocapture %resp) { +; CHECK-LABEL: test_us_ldat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r4 +; CHECK-NEXT: ldat r6, r3, 3 +; CHECK-NEXT: std r6, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_us_ldat: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r7, r4 +; CHECK-BE-NEXT: ldat r6, r3, 3 +; CHECK-BE-NEXT: std r6, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call i64 @llvm.ppc.amo.ldat(ptr %ptr, i64 %value, i32 3) + store i64 %0, ptr %resp, align 8 + ret void +} + +declare i64 @llvm.ppc.amo.ldat(ptr, i64, i32 immarg) +declare i32 @llvm.ppc.amo.lwat(ptr, i32, i32 immarg) + diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 8714b286374a5..42d30fcef2a9b 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -9,9 +9,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 3e2de780524b6..85027a56a1348 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -9,9 +9,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Type-Based Alias Analysis diff --git a/llvm/test/CodeGen/SPARC/fp128-abi.ll b/llvm/test/CodeGen/SPARC/fp128-abi.ll index 341e05d80e71e..b598d1b004832 100644 --- a/llvm/test/CodeGen/SPARC/fp128-abi.ll +++ b/llvm/test/CodeGen/SPARC/fp128-abi.ll @@ -87,3 +87,78 @@ define fp128 @f128_direct_spill(i32 %o0, i32 %o1, i32 %o2, i32 %o3, i32 %o4, i32 ret fp128 %ret } declare fp128 @f128_callee_spill(i32 %o0, i32 %o1, i32 %o2, i32 %o3, i32 %o4, i32 %o5, fp128 %a) + +define inreg { fp128, fp128 } @f128_complex(fp128 %num) nounwind { +; SPARC32-LABEL: f128_complex: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -192, %sp +; SPARC32-NEXT: ldd [%i0], %f0 +; SPARC32-NEXT: ldd [%i0+8], %f4 +; SPARC32-NEXT: std %f4, [%fp+-24] +; SPARC32-NEXT: std %f0, [%fp+-32] +; SPARC32-NEXT: std %f4, [%fp+-8] +; SPARC32-NEXT: add %fp, -16, %o0 +; SPARC32-NEXT: add %fp, -32, %o1 +; SPARC32-NEXT: call f128_complex_callee +; SPARC32-NEXT: std %f0, [%fp+-16] +; SPARC32-NEXT: sethi %hi(.LCPI2_0), %i0 +; SPARC32-NEXT: ldd [%i0+%lo(.LCPI2_0)], %f8 +; SPARC32-NEXT: add %i0, %lo(.LCPI2_0), %i0 +; SPARC32-NEXT: ldd [%i0+8], %f12 +; SPARC32-NEXT: std %f4, [%fp+-96] +; SPARC32-NEXT: std %f6, [%fp+-88] ! 16-byte Folded Spill +; SPARC32-NEXT: std %f8, [%fp+-80] +; SPARC32-NEXT: std %f12, [%fp+-72] +; SPARC32-NEXT: std %f2, [%fp+-56] +; SPARC32-NEXT: std %f0, [%fp+-64] +; SPARC32-NEXT: add %fp, -48, %i0 +; SPARC32-NEXT: add %fp, -64, %o0 +; SPARC32-NEXT: add %fp, -80, %o1 +; SPARC32-NEXT: call _Q_add +; SPARC32-NEXT: st %i0, [%sp+64] +; SPARC32-NEXT: unimp 16 +; SPARC32-NEXT: ldd [%fp+-48], %f0 +; SPARC32-NEXT: ldd [%fp+-40], %f2 +; SPARC32-NEXT: ldd [%fp+-96], %f4 +; SPARC32-NEXT: ldd [%fp+-88], %f6 ! 16-byte Folded Reload +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC64-LABEL: f128_complex: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: save %sp, -240, %sp +; SPARC64-NEXT: fmovd %f0, %f4 +; SPARC64-NEXT: fmovd %f2, %f6 +; SPARC64-NEXT: call f128_complex_callee +; SPARC64-NEXT: nop +; SPARC64-NEXT: std %f4, [%fp+1983] +; SPARC64-NEXT: std %f6, [%fp+1991] ! 16-byte Folded Spill +; SPARC64-NEXT: sethi %h44(.LCPI2_0), %i0 +; SPARC64-NEXT: add %i0, %m44(.LCPI2_0), %i0 +; SPARC64-NEXT: sllx %i0, 12, %i0 +; SPARC64-NEXT: ldd [%i0+%l44(.LCPI2_0)], %f4 +; SPARC64-NEXT: add %i0, %l44(.LCPI2_0), %i0 +; SPARC64-NEXT: ldd [%i0+8], %f8 +; SPARC64-NEXT: std %f2, [%fp+2023] +; SPARC64-NEXT: std %f0, [%fp+2015] +; SPARC64-NEXT: std %f4, [%fp+1999] +; SPARC64-NEXT: std %f8, [%fp+2007] +; SPARC64-NEXT: add %fp, 2031, %o0 +; SPARC64-NEXT: add %fp, 2015, %o1 +; SPARC64-NEXT: call _Qp_add +; SPARC64-NEXT: add %fp, 1999, %o2 +; SPARC64-NEXT: ldd [%fp+2031], %f0 +; SPARC64-NEXT: ldd [%fp+2039], %f2 +; SPARC64-NEXT: ldd [%fp+1983], %f4 +; SPARC64-NEXT: ldd [%fp+1991], %f6 ! 16-byte Folded Reload +; SPARC64-NEXT: ret +; SPARC64-NEXT: restore + %call = call inreg { fp128, fp128 } @f128_complex_callee(fp128 %num, fp128 %num) + %real = extractvalue { fp128, fp128 } %call, 0 + %imag = extractvalue { fp128, fp128 } %call, 1 + %add = fadd fp128 %real, 0xL00000000000000003FFF000000000000 + %tmp = insertvalue { fp128, fp128 } poison, fp128 %add, 0 + %ret = insertvalue { fp128, fp128 } %tmp, fp128 %imag, 1 + ret { fp128, fp128 } %ret +} +declare inreg { fp128, fp128 } @f128_complex_callee(fp128 %a, fp128 %b) diff --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll index 6db375445e4a3..3a1d0f7b5d218 100644 --- a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll +++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll @@ -11,9 +11,11 @@ ; REQUIRES:asserts ; SPIRV-O0:Target Library Information +; SPIRV-O0-NEXT:Runtime Library Function Analysis ; SPIRV-O0-NEXT:Target Pass Configuration ; SPIRV-O0-NEXT:Machine Module Information ; SPIRV-O0-NEXT:Target Transform Information +; SPIRV-O0-NEXT:Library Function Lowering Analysis ; SPIRV-O0-NEXT:Create Garbage Collector Module Metadata ; SPIRV-O0-NEXT:Assumption Cache Tracker ; SPIRV-O0-NEXT:Profile summary info @@ -83,9 +85,11 @@ ; SPIRV-O0-NEXT: Free MachineFunction ; SPIRV-Opt:Target Library Information +; SPIRV-Opt-NEXT:Runtime Library Function Analysis ; SPIRV-Opt-NEXT:Target Pass Configuration ; SPIRV-Opt-NEXT:Machine Module Information ; SPIRV-Opt-NEXT:Target Transform Information +; SPIRV-Opt-NEXT:Library Function Lowering Analysis ; SPIRV-Opt-NEXT:Assumption Cache Tracker ; SPIRV-Opt-NEXT:Type-Based Alias Analysis ; SPIRV-Opt-NEXT:Scoped NoAlias Alias Analysis diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 78a02b11b17bb..9223348abbcb9 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -7,9 +7,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 276232e27c000..9f08658e067ab 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -13,9 +13,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll index f70f0d25f172d..54ece8d52f08a 100644 --- a/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK %s -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s ; Check the handling of potentially infinite numerators in the frem ; expansion at different optimization levels and with different diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll index 4c0f9db147c96..5cd6f1e8a6086 100644 --- a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o - | FileCheck %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -o - | FileCheck %s define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_f16( diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll new file mode 100644 index 0000000000000..2d5f2a7223e3a --- /dev/null +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll @@ -0,0 +1,6 @@ +; RUN: not opt -mtriple=amdgcn -passes=expand-fp -disable-output %s 2>&1 | FileCheck %s + +; CHECK: 'LibcallLoweringModuleAnalysis' analysis required +define void @empty() { + ret void +} diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll index 03cafd4ff1160..794d5805291b0 100644 --- a/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll @@ -1,18 +1,18 @@ -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output -; RUN: not opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=TOO-LARGE %s +; RUN: not opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output 2>&1 | FileCheck --check-prefix=TOO-LARGE %s ; TOO-LARGE: {{.*}}invalid optimization level for expand-fp pass: 4 -; RUN: not opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=NON-NUMERIC %s +; RUN: not opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NON-NUMERIC %s ; NON-NUMERIC: {{.*}}invalid expand-fp pass parameter -; RUN: not opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=NEGATIVE %s +; RUN: not opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NEGATIVE %s ; NEGATIVE: {{.*}}invalid expand-fp pass parameter 'O-1' -; RUN: not opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=NO-O-PREFIX %s +; RUN: not opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NO-O-PREFIX %s ; NO-O-PREFIX: {{.*}}invalid expand-fp pass parameter 'foo' define void @empty() { diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll index f5bf8bb61a16e..0cf8829aec037 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s -; RUN: opt -S -mtriple=x86_64-- -passes=expand-fp < %s | FileCheck %s +; RUN: opt -S -mtriple=x86_64-- -passes='require,expand-fp' < %s | FileCheck %s define i129 @halftosi129(half %a) { ; CHECK-LABEL: @halftosi129( diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll index 94ed32abe46f8..055e3e0dc261d 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s -; RUN: opt -S -mtriple=x86_64-- -passes=expand-fp < %s | FileCheck %s +; RUN: opt -S -mtriple=x86_64-- -passes='require,expand-fp' < %s | FileCheck %s define i129 @halftoui129(half %a) { ; CHECK-LABEL: @halftoui129( diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll index 8820b873f3818..af053e82a62a4 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s -; RUN: opt -S -mtriple=x86_64-- -passes=expand-fp < %s | FileCheck %s +; RUN: opt -S -mtriple=x86_64-- -passes='require,expand-fp' < %s | FileCheck %s define half @si129tohalf(i129 %a) { ; CHECK-LABEL: @si129tohalf( diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll index b58d88bc02c79..ede9b2a4cd049 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s -; RUN: opt -S -mtriple=x86_64-- -passes=expand-fp < %s | FileCheck %s +; RUN: opt -S -mtriple=x86_64-- -passes='require,expand-fp' < %s | FileCheck %s define half @ui129tohalf(i129 %a) { ; CHECK-LABEL: @ui129tohalf( diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll index 78bc0006fda23..e78eaeb70fbf1 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-optnone.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s -; RUN: opt -S -mtriple=x86_64-- -passes=expand-fp < %s | FileCheck %s +; RUN: opt -S -mtriple=x86_64-- -passes='require,expand-fp' < %s | FileCheck %s ; expand-fp must also run with optnone diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll index 2e4fc55a8f16d..e3dabfaedbdef 100644 --- a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll @@ -3,6 +3,7 @@ ; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s ; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s ; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s +; RUN: opt -p loop-unroll -mcpu=apple-a17 -S %s | FileCheck --check-prefix=APPLE-A17 %s ; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" @@ -20,56 +21,56 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) { ; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] ; APPLE-NEXT: br label %[[LOOP:.*]] ; APPLE: [[LOOP]]: -; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[LOOP]] ] -; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL]], [[SCALE]] -; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]] -; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4 -; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL]] -; APPLE-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4 -; APPLE-NEXT: [[IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[IV_EPIL]], 1 -; APPLE-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT_EPIL]], [[SCALE]] +; APPLE-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]] +; APPLE-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]] +; APPLE-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; APPLE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]] +; APPLE-NEXT: store float [[L]], ptr [[GEP_DST]], align 4 +; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; APPLE-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]] ; APPLE-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4 -; APPLE-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_EPIL]] +; APPLE-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]] ; APPLE-NEXT: store float [[L_1]], ptr [[GEP_DST_1]], align 4 -; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_EPIL]], 2 +; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; APPLE-NEXT: [[SCALED_IV_2:%.*]] = mul nuw nsw i64 [[IV_NEXT_1]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_2]] ; APPLE-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 ; APPLE-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_1]] ; APPLE-NEXT: store float [[L_2]], ptr [[GEP_DST_2]], align 4 -; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_EPIL]], 3 +; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; APPLE-NEXT: [[SCALED_IV_3:%.*]] = mul nuw nsw i64 [[IV_NEXT_2]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_3]] ; APPLE-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_SRC_3]], align 4 ; APPLE-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_2]] ; APPLE-NEXT: store float [[L_3]], ptr [[GEP_DST_3]], align 4 -; APPLE-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV_EPIL]], 4 +; APPLE-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4 ; APPLE-NEXT: [[SCALED_IV_4:%.*]] = mul nuw nsw i64 [[IV_NEXT_3]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_4]] ; APPLE-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_SRC_4]], align 4 ; APPLE-NEXT: [[GEP_DST_4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_3]] ; APPLE-NEXT: store float [[L_4]], ptr [[GEP_DST_4]], align 4 -; APPLE-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV_EPIL]], 5 +; APPLE-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5 ; APPLE-NEXT: [[SCALED_IV_5:%.*]] = mul nuw nsw i64 [[IV_NEXT_4]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_5]] ; APPLE-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_SRC_5]], align 4 ; APPLE-NEXT: [[GEP_DST_5:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_4]] ; APPLE-NEXT: store float [[L_5]], ptr [[GEP_DST_5]], align 4 -; APPLE-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV_EPIL]], 6 +; APPLE-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6 ; APPLE-NEXT: [[SCALED_IV_6:%.*]] = mul nuw nsw i64 [[IV_NEXT_5]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_6:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_6]] ; APPLE-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_SRC_6]], align 4 ; APPLE-NEXT: [[GEP_DST_6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_5]] ; APPLE-NEXT: store float [[L_6]], ptr [[GEP_DST_6]], align 4 -; APPLE-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV_EPIL]], 7 +; APPLE-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7 ; APPLE-NEXT: [[SCALED_IV_7:%.*]] = mul nuw nsw i64 [[IV_NEXT_6]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_7:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_7]] ; APPLE-NEXT: [[L_7:%.*]] = load float, ptr [[GEP_SRC_7]], align 4 ; APPLE-NEXT: [[GEP_DST_7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_6]] ; APPLE-NEXT: store float [[L_7]], ptr [[GEP_DST_7]], align 4 -; APPLE-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV_EPIL]], 8 +; APPLE-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 ; APPLE-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 ; APPLE-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] ; APPLE-NEXT: br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]] @@ -83,15 +84,15 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) { ; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]]) ; APPLE-NEXT: br label %[[LOOP_EPIL:.*]] ; APPLE: [[LOOP_EPIL]]: -; APPLE-NEXT: [[IV_EPIL1:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL1:%.*]], %[[LOOP_EPIL]] ] +; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] ; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ] -; APPLE-NEXT: [[SCALED_IV_EPIL1:%.*]] = mul nuw nsw i64 [[IV_EPIL1]], [[SCALE]] -; APPLE-NEXT: [[GEP_SRC_EPIL1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL1]] -; APPLE-NEXT: [[L_EPIL1:%.*]] = load float, ptr [[GEP_SRC_EPIL1]], align 4 -; APPLE-NEXT: [[GEP_DST_EPIL1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL1]] -; APPLE-NEXT: store float [[L_EPIL1]], ptr [[GEP_DST_EPIL1]], align 4 -; APPLE-NEXT: [[IV_NEXT_EPIL1]] = add nuw nsw i64 [[IV_EPIL1]], 1 -; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL1]], [[N]] +; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL]], [[SCALE]] +; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]] +; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4 +; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL]] +; APPLE-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4 +; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1 +; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]] ; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 ; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] ; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]] @@ -100,6 +101,23 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) { ; APPLE: [[EXIT]]: ; APPLE-NEXT: ret void ; +; APPLE-A17-LABEL: define void @small_load_store_loop( +; APPLE-A17-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP:.*]] +; APPLE-A17: [[LOOP]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]] +; APPLE-A17-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]] +; APPLE-A17-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; APPLE-A17-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]] +; APPLE-A17-NEXT: store float [[L]], ptr [[GEP_DST]], align 4 +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: ret void +; ; OTHER-LABEL: define void @small_load_store_loop( ; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] { ; OTHER-NEXT: [[ENTRY:.*]]: @@ -129,19 +147,19 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) { ; OTHER-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] ; OTHER-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]] ; OTHER: [[EXIT_UNR_LCSSA]]: -; OTHER-NEXT: [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] ; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] ; OTHER: [[LOOP_EPIL_PREHEADER]]: -; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ] +; OTHER-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] ; OTHER-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; OTHER-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]]) ; OTHER-NEXT: br label %[[LOOP_EPIL:.*]] ; OTHER: [[LOOP_EPIL]]: -; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]] +; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL_INIT]], [[SCALE]] ; OTHER-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]] ; OTHER-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4 -; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]] +; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL_INIT]] ; OTHER-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4 ; OTHER-NEXT: br label %[[EXIT]] ; OTHER: [[EXIT]]: @@ -197,25 +215,43 @@ define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k ; APPLE-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] ; APPLE-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]] ; APPLE: [[EXIT_UNR_LCSSA]]: -; APPLE-NEXT: [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] ; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] ; APPLE: [[LOOP_EPIL_PREHEADER]]: -; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ] +; APPLE-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] ; APPLE-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]]) ; APPLE-NEXT: br label %[[LOOP_EPIL:.*]] ; APPLE: [[LOOP_EPIL]]: -; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]] +; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL_INIT]], [[SCALE]] ; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]] ; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4 ; APPLE-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]] -; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]] +; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL_INIT]] ; APPLE-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4 ; APPLE-NEXT: br label %[[EXIT]] ; APPLE: [[EXIT]]: ; APPLE-NEXT: ret void ; +; APPLE-A17-LABEL: define void @load_op_store_loop( +; APPLE-A17-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP:.*]] +; APPLE-A17: [[LOOP]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]] +; APPLE-A17-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]] +; APPLE-A17-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; APPLE-A17-NEXT: [[O:%.*]] = fadd float [[L]], [[K]] +; APPLE-A17-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]] +; APPLE-A17-NEXT: store float [[O]], ptr [[GEP_DST]], align 4 +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: ret void +; ; OTHER-LABEL: define void @load_op_store_loop( ; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] { ; OTHER-NEXT: [[ENTRY:.*]]: @@ -247,20 +283,20 @@ define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k ; OTHER-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] ; OTHER-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]] ; OTHER: [[EXIT_UNR_LCSSA]]: -; OTHER-NEXT: [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] ; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] ; OTHER: [[LOOP_EPIL_PREHEADER]]: -; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ] +; OTHER-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] ; OTHER-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; OTHER-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]]) ; OTHER-NEXT: br label %[[LOOP_EPIL:.*]] ; OTHER: [[LOOP_EPIL]]: -; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]] +; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL_INIT]], [[SCALE]] ; OTHER-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]] ; OTHER-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4 ; OTHER-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]] -; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]] +; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL_INIT]] ; OTHER-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4 ; OTHER-NEXT: br label %[[EXIT]] ; OTHER: [[EXIT]]: @@ -312,6 +348,32 @@ define void @load_op_store_loop_multiblock(ptr %src, ptr %dst, i64 %N, i64 %scal ; APPLE: [[EXIT]]: ; APPLE-NEXT: ret void ; +; APPLE-A17-LABEL: define void @load_op_store_loop_multiblock( +; APPLE-A17-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP:.*]] +; APPLE-A17: [[LOOP]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOPCONT:.*]] ] +; APPLE-A17-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]] +; APPLE-A17-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]] +; APPLE-A17-NEXT: [[L1:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; APPLE-A17-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 +; APPLE-A17-NEXT: [[ODD:%.*]] = icmp eq i64 [[AND]], 1 +; APPLE-A17-NEXT: br i1 [[ODD]], label %[[LOOPODD:.*]], label %[[LOOPCONT]] +; APPLE-A17: [[LOOPCONT]]: +; APPLE-A17-NEXT: [[D:%.*]] = phi float [ [[L2:%.*]], %[[LOOPODD]] ], [ [[L1]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[O:%.*]] = fadd float [[D]], [[K]] +; APPLE-A17-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]] +; APPLE-A17-NEXT: store float [[O]], ptr [[GEP_DST]], align 4 +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE-A17: [[LOOPODD]]: +; APPLE-A17-NEXT: [[L2]] = fneg float [[L1]] +; APPLE-A17-NEXT: br label %[[LOOPCONT]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: ret void +; ; OTHER-LABEL: define void @load_op_store_loop_multiblock( ; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] { ; OTHER-NEXT: [[ENTRY:.*]]: @@ -380,58 +442,58 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x ; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]] ; APPLE-NEXT: br label %[[LOOP_HEADER:.*]] ; APPLE: [[LOOP_HEADER]]: -; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 1, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_LATCH_3:.*]] ] +; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_LATCH_3:.*]] ] ; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP_LATCH_3]] ] -; APPLE-NEXT: [[GEP_EPIL:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_EPIL]] -; APPLE-NEXT: [[L_1_EPIL:%.*]] = load i32, ptr [[GEP_EPIL]], align 4 -; APPLE-NEXT: [[CMP6_NOT_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_1]] -; APPLE-NEXT: br i1 [[CMP6_NOT_EPIL]], label %[[THEN:.*]], label %[[LOOP_LATCH:.*]] +; APPLE-NEXT: [[GEP:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV]] +; APPLE-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP]], align 4 +; APPLE-NEXT: [[C_1:%.*]] = icmp sgt i32 [[L_1]], [[T_1]] +; APPLE-NEXT: br i1 [[C_1]], label %[[THEN:.*]], label %[[LOOP_LATCH:.*]] ; APPLE: [[THEN]]: -; APPLE-NEXT: [[GEP_4_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 4 -; APPLE-NEXT: [[L_2_EPIL:%.*]] = load i8, ptr [[GEP_4_EPIL]], align 4 -; APPLE-NEXT: [[OR_COND_EPIL:%.*]] = icmp ugt i8 [[L_2_EPIL]], 7 -; APPLE-NEXT: br i1 [[OR_COND_EPIL]], label %[[MERGE:.*]], label %[[ELSE:.*]] +; APPLE-NEXT: [[GEP_4:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 4 +; APPLE-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_4]], align 4 +; APPLE-NEXT: [[C_2:%.*]] = icmp ugt i8 [[L_2]], 7 +; APPLE-NEXT: br i1 [[C_2]], label %[[MERGE:.*]], label %[[ELSE:.*]] ; APPLE: [[ELSE]]: -; APPLE-NEXT: [[CONV_I_EPIL:%.*]] = zext nneg i8 [[L_2_EPIL]] to i64 -; APPLE-NEXT: [[ARRAYIDX_I_EPIL:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_EPIL]] -; APPLE-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX_I_EPIL]], align 1 -; APPLE-NEXT: [[IDXPROM_I_EPIL:%.*]] = sext i8 [[TMP27]] to i64 -; APPLE-NEXT: [[ARRAYIDX_I37_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_EPIL]] -; APPLE-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX_I37_EPIL]], align 4 -; APPLE-NEXT: [[ARRAYIDX_I42_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_EPIL]] -; APPLE-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX_I42_EPIL]], align 4 +; APPLE-NEXT: [[CONV_I:%.*]] = zext nneg i8 [[L_2]] to i64 +; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I]] +; APPLE-NEXT: [[L_3:%.*]] = load i8, ptr [[GEP_A]], align 1 +; APPLE-NEXT: [[IDXPROM_I:%.*]] = sext i8 [[L_3]] to i64 +; APPLE-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I]] +; APPLE-NEXT: [[L_4:%.*]] = load i32, ptr [[GEP_B]], align 4 +; APPLE-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I]] +; APPLE-NEXT: [[L_5:%.*]] = load i32, ptr [[GEP_C]], align 4 ; APPLE-NEXT: br label %[[MERGE]] ; APPLE: [[MERGE]]: -; APPLE-NEXT: [[RETVAL_0_I3851_EPIL:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP28]], %[[ELSE]] ] -; APPLE-NEXT: [[RETVAL_0_I43_EPIL:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP29]], %[[ELSE]] ] -; APPLE-NEXT: [[ADD14_EPIL:%.*]] = add nsw i32 [[RETVAL_0_I43_EPIL]], [[X]] -; APPLE-NEXT: [[MUL15_EPIL:%.*]] = mul nsw i32 [[ADD14_EPIL]], [[WIDTH]] -; APPLE-NEXT: [[TMP30:%.*]] = trunc nuw nsw i64 [[IV_EPIL]] to i32 -; APPLE-NEXT: [[ADD16_EPIL:%.*]] = add nsw i32 [[RETVAL_0_I3851_EPIL]], [[TMP30]] -; APPLE-NEXT: [[ADD17_EPIL:%.*]] = add nsw i32 [[ADD16_EPIL]], [[MUL15_EPIL]] -; APPLE-NEXT: [[IDXPROM18_EPIL:%.*]] = sext i32 [[ADD17_EPIL]] to i64 -; APPLE-NEXT: [[ARRAYIDX19_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_EPIL]] -; APPLE-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX19_EPIL]], align 4 -; APPLE-NEXT: [[SUB_EPIL:%.*]] = sub nsw i32 [[X]], [[RETVAL_0_I43_EPIL]] -; APPLE-NEXT: [[MUL21_EPIL:%.*]] = mul nsw i32 [[SUB_EPIL]], [[WIDTH]] -; APPLE-NEXT: [[SUB22_EPIL:%.*]] = sub i32 [[TMP30]], [[RETVAL_0_I3851_EPIL]] -; APPLE-NEXT: [[ADD23_EPIL:%.*]] = add nsw i32 [[SUB22_EPIL]], [[MUL21_EPIL]] -; APPLE-NEXT: [[IDXPROM24_EPIL:%.*]] = sext i32 [[ADD23_EPIL]] to i64 -; APPLE-NEXT: [[ARRAYIDX25_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_EPIL]] -; APPLE-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX25_EPIL]], align 4 -; APPLE-NEXT: [[CMP27_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[TMP31]] -; APPLE-NEXT: [[CMP28_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[TMP32]] -; APPLE-NEXT: [[AND34_EPIL:%.*]] = and i1 [[CMP27_EPIL]], [[CMP28_EPIL]] -; APPLE-NEXT: br i1 [[AND34_EPIL]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]] +; APPLE-NEXT: [[MERGE_1:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_4]], %[[ELSE]] ] +; APPLE-NEXT: [[MERGE_2:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_5]], %[[ELSE]] ] +; APPLE-NEXT: [[ADD14:%.*]] = add nsw i32 [[MERGE_2]], [[X]] +; APPLE-NEXT: [[MUL15:%.*]] = mul nsw i32 [[ADD14]], [[WIDTH]] +; APPLE-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[IV]] to i32 +; APPLE-NEXT: [[ADD16:%.*]] = add nsw i32 [[MERGE_1]], [[TMP3]] +; APPLE-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD16]], [[MUL15]] +; APPLE-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64 +; APPLE-NEXT: [[GEP_P_2:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18]] +; APPLE-NEXT: [[L_6:%.*]] = load i32, ptr [[GEP_P_2]], align 4 +; APPLE-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[MERGE_2]] +; APPLE-NEXT: [[MUL21:%.*]] = mul nsw i32 [[SUB]], [[WIDTH]] +; APPLE-NEXT: [[SUB22:%.*]] = sub i32 [[TMP3]], [[MERGE_1]] +; APPLE-NEXT: [[ADD23:%.*]] = add nsw i32 [[SUB22]], [[MUL21]] +; APPLE-NEXT: [[IDXPROM24:%.*]] = sext i32 [[ADD23]] to i64 +; APPLE-NEXT: [[GEP_P2_1:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24]] +; APPLE-NEXT: [[L_7:%.*]] = load i32, ptr [[GEP_P2_1]], align 4 +; APPLE-NEXT: [[C_3:%.*]] = icmp sgt i32 [[L_1]], [[L_6]] +; APPLE-NEXT: [[C_4:%.*]] = icmp sgt i32 [[L_1]], [[L_7]] +; APPLE-NEXT: [[AND34:%.*]] = and i1 [[C_3]], [[C_4]] +; APPLE-NEXT: br i1 [[AND34]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]] ; APPLE: [[STORE_RES]]: -; APPLE-NEXT: [[CMP32_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_2]] -; APPLE-NEXT: [[GEP_5_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 5 -; APPLE-NEXT: [[RES_EPIL:%.*]] = select i1 [[CMP32_EPIL]], i8 1, i8 2 -; APPLE-NEXT: store i8 [[RES_EPIL]], ptr [[GEP_5_EPIL]], align 1 +; APPLE-NEXT: [[C_5:%.*]] = icmp sgt i32 [[L_1]], [[T_2]] +; APPLE-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 5 +; APPLE-NEXT: [[RES:%.*]] = select i1 [[C_5]], i8 1, i8 2 +; APPLE-NEXT: store i8 [[RES]], ptr [[GEP_5]], align 1 ; APPLE-NEXT: br label %[[LOOP_LATCH]] ; APPLE: [[LOOP_LATCH]]: -; APPLE-NEXT: [[IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[IV_EPIL]], 1 -; APPLE-NEXT: [[GEP_1:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_NEXT_EPIL]] +; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; APPLE-NEXT: [[GEP_1:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_NEXT]] ; APPLE-NEXT: [[L_1_1:%.*]] = load i32, ptr [[GEP_1]], align 4 ; APPLE-NEXT: [[C_1_1:%.*]] = icmp sgt i32 [[L_1_1]], [[T_1]] ; APPLE-NEXT: br i1 [[C_1_1]], label %[[THEN_1:.*]], label %[[LOOP_LATCH_1:.*]] @@ -455,7 +517,7 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x ; APPLE-NEXT: [[MERGE_2_1:%.*]] = phi i32 [ 0, %[[THEN_1]] ], [ [[L_5_1]], %[[ELSE_1]] ] ; APPLE-NEXT: [[ADD14_1:%.*]] = add nsw i32 [[MERGE_2_1]], [[X]] ; APPLE-NEXT: [[MUL15_1:%.*]] = mul nsw i32 [[ADD14_1]], [[WIDTH]] -; APPLE-NEXT: [[TMP4:%.*]] = trunc nuw nsw i64 [[IV_NEXT_EPIL]] to i32 +; APPLE-NEXT: [[TMP4:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32 ; APPLE-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[MERGE_1_1]], [[TMP4]] ; APPLE-NEXT: [[ADD17_1:%.*]] = add nsw i32 [[ADD16_1]], [[MUL15_1]] ; APPLE-NEXT: [[IDXPROM18_1:%.*]] = sext i32 [[ADD17_1]] to i64 @@ -479,7 +541,7 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x ; APPLE-NEXT: store i8 [[RES_1]], ptr [[GEP_5_1]], align 1 ; APPLE-NEXT: br label %[[LOOP_LATCH_1]] ; APPLE: [[LOOP_LATCH_1]]: -; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_EPIL]], 2 +; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; APPLE-NEXT: [[GEP_2:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_NEXT_1]] ; APPLE-NEXT: [[L_1_2:%.*]] = load i32, ptr [[GEP_2]], align 4 ; APPLE-NEXT: [[C_1_2:%.*]] = icmp sgt i32 [[L_1_2]], [[T_1]] @@ -528,7 +590,7 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x ; APPLE-NEXT: store i8 [[RES_2]], ptr [[GEP_5_2]], align 1 ; APPLE-NEXT: br label %[[LOOP_LATCH_2]] ; APPLE: [[LOOP_LATCH_2]]: -; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_EPIL]], 3 +; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; APPLE-NEXT: [[GEP_3:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_NEXT_2]] ; APPLE-NEXT: [[L_1_3:%.*]] = load i32, ptr [[GEP_3]], align 4 ; APPLE-NEXT: [[C_1_3:%.*]] = icmp sgt i32 [[L_1_3]], [[T_1]] @@ -577,7 +639,7 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x ; APPLE-NEXT: store i8 [[RES_3]], ptr [[GEP_5_3]], align 1 ; APPLE-NEXT: br label %[[LOOP_LATCH_3]] ; APPLE: [[LOOP_LATCH_3]]: -; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_EPIL]], 4 +; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; APPLE-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4 ; APPLE-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]] ; APPLE-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]] @@ -591,58 +653,58 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x ; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]]) ; APPLE-NEXT: br label %[[LOOP_HEADER_EPIL:.*]] ; APPLE: [[LOOP_HEADER_EPIL]]: -; APPLE-NEXT: [[IV_EPIL1:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL1:%.*]], %[[LOOP_LATCH_EPIL:.*]] ] +; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_LATCH_EPIL:.*]] ] ; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_LATCH_EPIL]] ] -; APPLE-NEXT: [[GEP_EPIL1:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_EPIL1]] -; APPLE-NEXT: [[L_1_EPIL1:%.*]] = load i32, ptr [[GEP_EPIL1]], align 4 -; APPLE-NEXT: [[C_1_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL1]], [[T_1]] +; APPLE-NEXT: [[GEP_EPIL:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_EPIL]] +; APPLE-NEXT: [[L_1_EPIL:%.*]] = load i32, ptr [[GEP_EPIL]], align 4 +; APPLE-NEXT: [[C_1_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_1]] ; APPLE-NEXT: br i1 [[C_1_EPIL]], label %[[THEN_EPIL:.*]], label %[[LOOP_LATCH_EPIL]] ; APPLE: [[THEN_EPIL]]: -; APPLE-NEXT: [[GEP_4_EPIL1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL1]], i64 4 -; APPLE-NEXT: [[L_2_EPIL1:%.*]] = load i8, ptr [[GEP_4_EPIL1]], align 4 -; APPLE-NEXT: [[C_2_EPIL:%.*]] = icmp ugt i8 [[L_2_EPIL1]], 7 +; APPLE-NEXT: [[GEP_4_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 4 +; APPLE-NEXT: [[L_2_EPIL:%.*]] = load i8, ptr [[GEP_4_EPIL]], align 4 +; APPLE-NEXT: [[C_2_EPIL:%.*]] = icmp ugt i8 [[L_2_EPIL]], 7 ; APPLE-NEXT: br i1 [[C_2_EPIL]], label %[[MERGE_EPIL:.*]], label %[[ELSE_EPIL:.*]] ; APPLE: [[ELSE_EPIL]]: -; APPLE-NEXT: [[CONV_I_EPIL1:%.*]] = zext nneg i8 [[L_2_EPIL1]] to i64 -; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_EPIL1]] +; APPLE-NEXT: [[CONV_I_EPIL:%.*]] = zext nneg i8 [[L_2_EPIL]] to i64 +; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_EPIL]] ; APPLE-NEXT: [[L_3_EPIL:%.*]] = load i8, ptr [[GEP_A_EPIL]], align 1 -; APPLE-NEXT: [[IDXPROM_I_EPIL1:%.*]] = sext i8 [[L_3_EPIL]] to i64 -; APPLE-NEXT: [[GEP_B_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_EPIL1]] +; APPLE-NEXT: [[IDXPROM_I_EPIL:%.*]] = sext i8 [[L_3_EPIL]] to i64 +; APPLE-NEXT: [[GEP_B_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_EPIL]] ; APPLE-NEXT: [[L_4_EPIL:%.*]] = load i32, ptr [[GEP_B_EPIL]], align 4 -; APPLE-NEXT: [[GEP_C_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_EPIL1]] +; APPLE-NEXT: [[GEP_C_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_EPIL]] ; APPLE-NEXT: [[L_5_EPIL:%.*]] = load i32, ptr [[GEP_C_EPIL]], align 4 ; APPLE-NEXT: br label %[[MERGE_EPIL]] ; APPLE: [[MERGE_EPIL]]: ; APPLE-NEXT: [[MERGE_1_EPIL:%.*]] = phi i32 [ 0, %[[THEN_EPIL]] ], [ [[L_4_EPIL]], %[[ELSE_EPIL]] ] ; APPLE-NEXT: [[MERGE_2_EPIL:%.*]] = phi i32 [ 0, %[[THEN_EPIL]] ], [ [[L_5_EPIL]], %[[ELSE_EPIL]] ] -; APPLE-NEXT: [[ADD14_EPIL1:%.*]] = add nsw i32 [[MERGE_2_EPIL]], [[X]] -; APPLE-NEXT: [[MUL15_EPIL1:%.*]] = mul nsw i32 [[ADD14_EPIL1]], [[WIDTH]] -; APPLE-NEXT: [[TMP7:%.*]] = trunc nuw nsw i64 [[IV_EPIL1]] to i32 -; APPLE-NEXT: [[ADD16_EPIL1:%.*]] = add nsw i32 [[MERGE_1_EPIL]], [[TMP7]] -; APPLE-NEXT: [[ADD17_EPIL1:%.*]] = add nsw i32 [[ADD16_EPIL1]], [[MUL15_EPIL1]] -; APPLE-NEXT: [[IDXPROM18_EPIL1:%.*]] = sext i32 [[ADD17_EPIL1]] to i64 -; APPLE-NEXT: [[GEP_P_2_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_EPIL1]] +; APPLE-NEXT: [[ADD14_EPIL:%.*]] = add nsw i32 [[MERGE_2_EPIL]], [[X]] +; APPLE-NEXT: [[MUL15_EPIL:%.*]] = mul nsw i32 [[ADD14_EPIL]], [[WIDTH]] +; APPLE-NEXT: [[TMP7:%.*]] = trunc nuw nsw i64 [[IV_EPIL]] to i32 +; APPLE-NEXT: [[ADD16_EPIL:%.*]] = add nsw i32 [[MERGE_1_EPIL]], [[TMP7]] +; APPLE-NEXT: [[ADD17_EPIL:%.*]] = add nsw i32 [[ADD16_EPIL]], [[MUL15_EPIL]] +; APPLE-NEXT: [[IDXPROM18_EPIL:%.*]] = sext i32 [[ADD17_EPIL]] to i64 +; APPLE-NEXT: [[GEP_P_2_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_EPIL]] ; APPLE-NEXT: [[L_6_EPIL:%.*]] = load i32, ptr [[GEP_P_2_EPIL]], align 4 -; APPLE-NEXT: [[SUB_EPIL1:%.*]] = sub nsw i32 [[X]], [[MERGE_2_EPIL]] -; APPLE-NEXT: [[MUL21_EPIL1:%.*]] = mul nsw i32 [[SUB_EPIL1]], [[WIDTH]] -; APPLE-NEXT: [[SUB22_EPIL1:%.*]] = sub i32 [[TMP7]], [[MERGE_1_EPIL]] -; APPLE-NEXT: [[ADD23_EPIL1:%.*]] = add nsw i32 [[SUB22_EPIL1]], [[MUL21_EPIL1]] -; APPLE-NEXT: [[IDXPROM24_EPIL1:%.*]] = sext i32 [[ADD23_EPIL1]] to i64 -; APPLE-NEXT: [[GEP_P2_1_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_EPIL1]] +; APPLE-NEXT: [[SUB_EPIL:%.*]] = sub nsw i32 [[X]], [[MERGE_2_EPIL]] +; APPLE-NEXT: [[MUL21_EPIL:%.*]] = mul nsw i32 [[SUB_EPIL]], [[WIDTH]] +; APPLE-NEXT: [[SUB22_EPIL:%.*]] = sub i32 [[TMP7]], [[MERGE_1_EPIL]] +; APPLE-NEXT: [[ADD23_EPIL:%.*]] = add nsw i32 [[SUB22_EPIL]], [[MUL21_EPIL]] +; APPLE-NEXT: [[IDXPROM24_EPIL:%.*]] = sext i32 [[ADD23_EPIL]] to i64 +; APPLE-NEXT: [[GEP_P2_1_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_EPIL]] ; APPLE-NEXT: [[L_7_EPIL:%.*]] = load i32, ptr [[GEP_P2_1_EPIL]], align 4 -; APPLE-NEXT: [[C_3_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL1]], [[L_6_EPIL]] -; APPLE-NEXT: [[C_4_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL1]], [[L_7_EPIL]] -; APPLE-NEXT: [[AND34_EPIL1:%.*]] = and i1 [[C_3_EPIL]], [[C_4_EPIL]] -; APPLE-NEXT: br i1 [[AND34_EPIL1]], label %[[STORE_RES_EPIL:.*]], label %[[LOOP_LATCH_EPIL]] +; APPLE-NEXT: [[C_3_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[L_6_EPIL]] +; APPLE-NEXT: [[C_4_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[L_7_EPIL]] +; APPLE-NEXT: [[AND34_EPIL:%.*]] = and i1 [[C_3_EPIL]], [[C_4_EPIL]] +; APPLE-NEXT: br i1 [[AND34_EPIL]], label %[[STORE_RES_EPIL:.*]], label %[[LOOP_LATCH_EPIL]] ; APPLE: [[STORE_RES_EPIL]]: -; APPLE-NEXT: [[C_5_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL1]], [[T_2]] -; APPLE-NEXT: [[GEP_5_EPIL1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL1]], i64 5 -; APPLE-NEXT: [[RES_EPIL1:%.*]] = select i1 [[C_5_EPIL]], i8 1, i8 2 -; APPLE-NEXT: store i8 [[RES_EPIL1]], ptr [[GEP_5_EPIL1]], align 1 +; APPLE-NEXT: [[C_5_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_2]] +; APPLE-NEXT: [[GEP_5_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 5 +; APPLE-NEXT: [[RES_EPIL:%.*]] = select i1 [[C_5_EPIL]], i8 1, i8 2 +; APPLE-NEXT: store i8 [[RES_EPIL]], ptr [[GEP_5_EPIL]], align 1 ; APPLE-NEXT: br label %[[LOOP_LATCH_EPIL]] ; APPLE: [[LOOP_LATCH_EPIL]]: -; APPLE-NEXT: [[IV_NEXT_EPIL1]] = add nuw nsw i64 [[IV_EPIL1]], 1 -; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL1]], [[N]] +; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1 +; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]] ; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 ; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] ; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_HEADER_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]] @@ -651,6 +713,66 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x ; APPLE: [[EXIT]]: ; APPLE-NEXT: ret void ; +; APPLE-A17-LABEL: define void @early_continue_dep_on_load_large( +; APPLE-A17-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP_HEADER:.*]] +; APPLE-A17: [[LOOP_HEADER]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; APPLE-A17-NEXT: [[GEP:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV]] +; APPLE-A17-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP]], align 4 +; APPLE-A17-NEXT: [[C_1:%.*]] = icmp sgt i32 [[L_1]], [[T_1]] +; APPLE-A17-NEXT: br i1 [[C_1]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; APPLE-A17: [[THEN]]: +; APPLE-A17-NEXT: [[GEP_4:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 4 +; APPLE-A17-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_4]], align 4 +; APPLE-A17-NEXT: [[C_2:%.*]] = icmp ugt i8 [[L_2]], 7 +; APPLE-A17-NEXT: br i1 [[C_2]], label %[[MERGE:.*]], label %[[ELSE:.*]] +; APPLE-A17: [[ELSE]]: +; APPLE-A17-NEXT: [[CONV_I:%.*]] = zext nneg i8 [[L_2]] to i64 +; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I]] +; APPLE-A17-NEXT: [[L_3:%.*]] = load i8, ptr [[GEP_A]], align 1 +; APPLE-A17-NEXT: [[IDXPROM_I:%.*]] = sext i8 [[L_3]] to i64 +; APPLE-A17-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I]] +; APPLE-A17-NEXT: [[L_4:%.*]] = load i32, ptr [[GEP_B]], align 4 +; APPLE-A17-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I]] +; APPLE-A17-NEXT: [[L_5:%.*]] = load i32, ptr [[GEP_C]], align 4 +; APPLE-A17-NEXT: br label %[[MERGE]] +; APPLE-A17: [[MERGE]]: +; APPLE-A17-NEXT: [[MERGE_1:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_4]], %[[ELSE]] ] +; APPLE-A17-NEXT: [[MERGE_2:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_5]], %[[ELSE]] ] +; APPLE-A17-NEXT: [[ADD14:%.*]] = add nsw i32 [[MERGE_2]], [[X]] +; APPLE-A17-NEXT: [[MUL15:%.*]] = mul nsw i32 [[ADD14]], [[WIDTH]] +; APPLE-A17-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[IV]] to i32 +; APPLE-A17-NEXT: [[ADD16:%.*]] = add nsw i32 [[MERGE_1]], [[TMP0]] +; APPLE-A17-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD16]], [[MUL15]] +; APPLE-A17-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64 +; APPLE-A17-NEXT: [[GEP_P_2:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18]] +; APPLE-A17-NEXT: [[L_6:%.*]] = load i32, ptr [[GEP_P_2]], align 4 +; APPLE-A17-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[MERGE_2]] +; APPLE-A17-NEXT: [[MUL21:%.*]] = mul nsw i32 [[SUB]], [[WIDTH]] +; APPLE-A17-NEXT: [[SUB22:%.*]] = sub i32 [[TMP0]], [[MERGE_1]] +; APPLE-A17-NEXT: [[ADD23:%.*]] = add nsw i32 [[SUB22]], [[MUL21]] +; APPLE-A17-NEXT: [[IDXPROM24:%.*]] = sext i32 [[ADD23]] to i64 +; APPLE-A17-NEXT: [[GEP_P2_1:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24]] +; APPLE-A17-NEXT: [[L_7:%.*]] = load i32, ptr [[GEP_P2_1]], align 4 +; APPLE-A17-NEXT: [[C_3:%.*]] = icmp sgt i32 [[L_1]], [[L_6]] +; APPLE-A17-NEXT: [[C_4:%.*]] = icmp sgt i32 [[L_1]], [[L_7]] +; APPLE-A17-NEXT: [[AND34:%.*]] = and i1 [[C_3]], [[C_4]] +; APPLE-A17-NEXT: br i1 [[AND34]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]] +; APPLE-A17: [[STORE_RES]]: +; APPLE-A17-NEXT: [[C_5:%.*]] = icmp sgt i32 [[L_1]], [[T_2]] +; APPLE-A17-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 5 +; APPLE-A17-NEXT: [[RES:%.*]] = select i1 [[C_5]], i8 1, i8 2 +; APPLE-A17-NEXT: store i8 [[RES]], ptr [[GEP_5]], align 1 +; APPLE-A17-NEXT: br label %[[LOOP_LATCH]] +; APPLE-A17: [[LOOP_LATCH]]: +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: ret void +; ; OTHER-LABEL: define void @early_continue_dep_on_load_large( ; OTHER-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] { ; OTHER-NEXT: [[ENTRY:.*]]: @@ -813,6 +935,23 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) { ; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]] ; APPLE-NEXT: ret i32 [[BIN_RDX2]] ; +; APPLE-A17-LABEL: define i32 @test_add_reduction_unroll_partial( +; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP:.*]] +; APPLE-A17: [[LOOP]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; APPLE-A17-NEXT: ret i32 [[RES]] +; ; OTHER-LABEL: define i32 @test_add_reduction_unroll_partial( ; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { ; OTHER-NEXT: [[ENTRY:.*]]: @@ -826,11 +965,11 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) { ; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] ; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2 -; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]] +; OTHER-NEXT: [[RDX_NEXT_1:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]] ; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]] ; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2 -; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP2]] +; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_NEXT_1]], [[TMP2]] ; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]] ; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2 @@ -839,8 +978,8 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) { ; OTHER-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024 ; OTHER-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]] ; OTHER: [[EXIT]]: -; OTHER-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] -; OTHER-NEXT: ret i32 [[BIN_RDX2]] +; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; OTHER-NEXT: ret i32 [[RES]] ; entry: br label %loop @@ -886,6 +1025,29 @@ define i32 @test_add_reduction_multi_block(ptr %a, i64 noundef %n) { ; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ] ; APPLE-NEXT: ret i32 [[RES]] ; +; APPLE-A17-LABEL: define i32 @test_add_reduction_multi_block( +; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP:.*]] +; APPLE-A17: [[LOOP]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ] +; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-A17-NEXT: [[C:%.*]] = call i1 @cond() +; APPLE-A17-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; APPLE-A17: [[THEN]]: +; APPLE-A17-NEXT: store i32 0, ptr [[GEP_A]], align 4 +; APPLE-A17-NEXT: br label %[[LOOP_LATCH]] +; APPLE-A17: [[LOOP_LATCH]]: +; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ] +; APPLE-A17-NEXT: ret i32 [[RES]] +; ; OTHER-LABEL: define i32 @test_add_reduction_multi_block( ; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { ; OTHER-NEXT: [[ENTRY:.*]]: @@ -942,19 +1104,19 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) { ; APPLE-NEXT: br label %[[LOOP:.*]] ; APPLE: [[LOOP]]: ; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] -; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX_21:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] -; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RES_2:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] ; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 -; APPLE-NEXT: [[RES_2]] = add i32 [[RDX]], [[TMP0]] +; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]] ; APPLE-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]] ; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] ; APPLE-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2 -; APPLE-NEXT: [[BIN_RDX3]] = add i32 [[RDX_1]], [[TMP1]] +; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]] ; APPLE-NEXT: [[RDX_2_NEXT_1:%.*]] = mul i32 [[RDX_2_NEXT]], [[TMP1]] ; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]] @@ -971,12 +1133,33 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) { ; APPLE-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]] ; APPLE: [[EXIT]]: ; APPLE-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] -; APPLE-NEXT: [[RES_3:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ] +; APPLE-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ] +; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]] +; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] +; APPLE-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]] ; APPLE-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX3]], [[RES_2]] -; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[SUM]] -; APPLE-NEXT: [[BIN_RDX4:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]] -; APPLE-NEXT: [[SUM1:%.*]] = add i32 [[BIN_RDX4]], [[RES_3]] -; APPLE-NEXT: ret i32 [[SUM1]] +; APPLE-NEXT: ret i32 [[SUM]] +; +; APPLE-A17-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial( +; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP:.*]] +; APPLE-A17: [[LOOP]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; APPLE-A17-NEXT: [[RDX_2_NEXT]] = mul i32 [[RDX_2]], [[TMP0]] +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[SUM:%.*]] = add i32 [[RES_1]], [[RES_2]] +; APPLE-A17-NEXT: ret i32 [[SUM]] ; ; OTHER-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial( ; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { @@ -999,9 +1182,9 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) { ; OTHER-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_NEXT_1]], 1024 ; OTHER-NEXT: br i1 [[EC_1]], label %[[EXIT:.*]], label %[[LOOP]] ; OTHER: [[EXIT]]: -; OTHER-NEXT: [[BIN_RDX:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ] +; OTHER-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ] ; OTHER-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_1]], %[[LOOP]] ] -; OTHER-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX]], [[RES_2]] +; OTHER-NEXT: [[SUM:%.*]] = add i32 [[RES_1]], [[RES_2]] ; OTHER-NEXT: ret i32 [[SUM]] ; entry: @@ -1039,28 +1222,28 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) { ; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] ; APPLE-NEXT: br label %[[LOOP:.*]] ; APPLE: [[LOOP]]: -; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] ; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ] -; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]] -; APPLE-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2 -; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP6]] -; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV_EPIL]], 1 +; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP2]] +; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] ; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2 ; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP3]] -; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_EPIL]], 2 +; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]] ; APPLE-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2 ; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP4]] -; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_EPIL]], 3 +; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]] ; APPLE-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2 ; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP5]] -; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_EPIL]], 4 +; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; APPLE-NEXT: [[NITER_NEXT_3]] = add nuw i64 [[NITER]], 4 ; APPLE-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]] ; APPLE-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]] @@ -1069,24 +1252,24 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) { ; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ] ; APPLE-NEXT: [[RDX_UNR:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] ; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]] -; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] -; APPLE-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]] +; APPLE-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] +; APPLE-NEXT: [[BIN_RDX4:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX3]] ; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] ; APPLE: [[LOOP_EPIL_PREHEADER]]: ; APPLE-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] -; APPLE-NEXT: [[RDX_EPIL_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA]] ] +; APPLE-NEXT: [[RDX_EPIL_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX4]], %[[EXIT_UNR_LCSSA]] ] ; APPLE-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 ; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]]) ; APPLE-NEXT: br label %[[LOOP_EPIL:.*]] ; APPLE: [[LOOP_EPIL]]: -; APPLE-NEXT: [[IV_EPIL1:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] +; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] ; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] ; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ] -; APPLE-NEXT: [[GEP_A_EPIL1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL1]] -; APPLE-NEXT: [[TMP7:%.*]] = load i32, ptr [[GEP_A_EPIL1]], align 2 -; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP7]] -; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL1]], 1 +; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]] +; APPLE-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2 +; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]] +; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1 ; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]] ; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 ; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] @@ -1095,9 +1278,26 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) { ; APPLE-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] ; APPLE-NEXT: br label %[[EXIT]] ; APPLE: [[EXIT]]: -; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ] +; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[BIN_RDX4]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ] ; APPLE-NEXT: ret i32 [[RES]] ; +; APPLE-A17-LABEL: define i32 @test_add_reduction_runtime( +; APPLE-A17-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-A17-NEXT: [[ENTRY:.*]]: +; APPLE-A17-NEXT: br label %[[LOOP:.*]] +; APPLE-A17: [[LOOP]]: +; APPLE-A17-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; APPLE-A17-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-A17-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-A17-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; APPLE-A17-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-A17-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; APPLE-A17-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE-A17: [[EXIT]]: +; APPLE-A17-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; APPLE-A17-NEXT: ret i32 [[RES]] +; ; OTHER-LABEL: define i32 @test_add_reduction_runtime( ; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { ; OTHER-NEXT: [[ENTRY:.*]]: @@ -1118,11 +1318,11 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) { ; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] ; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2 -; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]] +; OTHER-NEXT: [[RDX_NEXT_1:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]] ; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]] ; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2 -; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP4]] +; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_NEXT_1]], [[TMP4]] ; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]] ; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll index b612bfb88198e..02c0b676374f4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll @@ -48,4 +48,37 @@ exit: ret void } +define i64 @sdiv_arg_outer_iv(ptr noalias %dst, ptr %src) { +; CHECK: 'sdiv_arg_outer_iv' +; CHECK: Cost of 0 for VF 2: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8> +; CHECK: Cost of 0 for VF 4: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8> +; CHECK: Cost of 0 for VF 8: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8> +; CHECK: Cost of 0 for VF 16: REPLICATE ir<%div> = sdiv ir<%add.offset>, ir<8> +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %offset = shl nsw i32 %outer.iv, 7 + br label %loop + +loop: + %iv = phi i64 [ 0, %outer.header ], [ %iv.next, %loop ] + %iv.trunc = trunc i64 %iv to i32 + %add.offset = add i32 %offset, %iv.trunc + %div = sdiv i32 %add.offset, 8 + %div.ext = sext i32 %div to i64 + %gep.src = getelementptr i8, ptr %src, i64 %div.ext + %l = load i8, ptr %gep.src, align 1 + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store i8 %l, ptr %gep.dst, align 1 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 64 + br i1 %ec, label %outer.latch, label %loop + +outer.latch: + %outer.iv.next = add nsw i32 %outer.iv, 1 + br label %outer.header +} + attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" } diff --git a/llvm/test/Transforms/LoopVectorize/select-smin-first-index.ll b/llvm/test/Transforms/LoopVectorize/select-smin-first-index.ll index bf5362f2e740f..49d6ac548c330 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smin-first-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smin-first-index.ll @@ -9,15 +9,15 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: @@ -216,3 +216,47 @@ exit: %res = phi i32 [ %min.idx.next, %loop ] ret i32 %res } + +define i64 @test_vectorize_select_smin_idx_iv_start_different(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_smin_idx_iv_start_different( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 20, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 20, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr i64, ptr %src, i64 %iv + %l = load i64, ptr %gep + %cmp = icmp sgt i64 %min.val, %l + %min.val.next = tail call i64 @llvm.smin.i64(i64 %min.val, i64 %l) + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + + diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll index 192740fcf6dec..ce6c23225b13f 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll @@ -11,7 +11,7 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -30,7 +30,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -53,7 +53,7 @@ define i64 @test_vectorize_select_umin_idx_signed_sentinel_possible(ptr %src, i6 ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -2, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -72,7 +72,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -2, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -95,7 +95,7 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[L]], [[MIN_VAL]] @@ -114,7 +114,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ult i64 %l, %min.val @@ -137,7 +137,7 @@ define i64 @test_vectorize_select_umin_idx_select_ops_flipped(ptr %src, i64 %n) ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[L]], [[MIN_VAL]] @@ -156,7 +156,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ult i64 %l, %min.val @@ -179,7 +179,7 @@ define i64 @test_vectorize_select_umin_via_select_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -198,7 +198,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -221,7 +221,7 @@ define i64 @test_vectorize_select_umin_idx_all_exit_inst(ptr %src, ptr %umin, i6 ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -20, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -242,7 +242,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -20, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -267,7 +267,7 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -286,7 +286,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -309,7 +309,7 @@ define i64 @test_not_vectorize_select_no_min_reduction(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RED_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[RED_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[RED_VAL]], [[L]] @@ -328,7 +328,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %red.val = phi i64 [ 0, %entry ], [ %red.val.next, %loop ] + %red.val = phi i64 [ -1, %entry ], [ %red.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %red.val, %l @@ -351,7 +351,7 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[X]] @@ -370,7 +370,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %x @@ -393,7 +393,7 @@ define i32 @test_vectorize_select_umin_idx_with_trunc(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -413,7 +413,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i32 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -437,7 +437,7 @@ define i32 @test_vectorize_select_umin_idx_with_trunc_valid(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -457,7 +457,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i32 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -481,7 +481,7 @@ define ptr @test_with_ptr_index(ptr %start, ptr %end) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[IV]], align 4 ; CHECK-NEXT: [[CMP7_US:%.*]] = icmp ult i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) @@ -499,7 +499,7 @@ entry: loop: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] %min.idx = phi ptr [ null, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %l = load i64, ptr %iv %cmp7.us = icmp ult i64 %l, %min.val %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) @@ -521,7 +521,7 @@ define i64 @test_no_vectorize_select_iv_decrement(ptr %src) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1000, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -540,7 +540,7 @@ entry: loop: %iv = phi i64 [ 1000, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -563,7 +563,7 @@ define i64 @test_no_vectorize_select_iv_sub(ptr %src) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1000, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -582,7 +582,7 @@ entry: loop: %iv = phi i64 [ 1000, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -605,7 +605,7 @@ define i64 @test_no_vectorize_select_iv_mul(ptr %src) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -624,7 +624,7 @@ entry: loop: %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -648,7 +648,7 @@ define i64 @test_vectorize_select_umin_idx_wraps(ptr %src, i64 %n, i64 %start) { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -669,7 +669,7 @@ loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %idx = phi i64 [ %start, %entry ], [ %idx.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -685,6 +685,47 @@ exit: ret i64 %res } +define i64 @test_vectorize_select_umin_idx_iv_start_different(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_iv_start_different( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 10, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 10000 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 10, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi i64 [ -1, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr i64, ptr %src, i64 %iv + %l = load i64, ptr %gep + %cmp = icmp ugt i64 %min.val, %l + %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 10000 + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} declare i64 @llvm.umin.i64(i64, i64) declare i16 @llvm.umin.i16(i16, i16) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-permutation.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-permutation.s index e23a3e09ca897..c0fe5b94c9256 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-permutation.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-permutation.s @@ -1418,181 +1418,181 @@ vfslide1up.vf v8, v16, ft0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu # CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV_S_X vmv.s.x v8, s0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV1R_V vmv1r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV1R_V vmv1r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV2R_V vmv2r.v v8, v8 +# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VMV2R_V vmv2r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV4R_V vmv4r.v v8, v8 +# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VMV4R_V vmv4r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VMV8R_V vmv8r.v v8, v8 +# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VMV8R_V vmv8r.v v8, v8 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu # CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VIOTA_M viota.m v8, v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu @@ -2354,7 +2354,7 @@ vfslide1up.vf v8, v16, ft0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] -# CHECK-NEXT: - 572.00 - - - 303.00 5585.00 - +# CHECK-NEXT: - 572.00 - - - 303.00 6201.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: @@ -2579,181 +2579,181 @@ vfslide1up.vf v8, v16, ft0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu # CHECK-NEXT: - - - - - - 4.00 - vmv.s.x v8, s0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv1r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv1r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv2r.v v8, v8 +# CHECK-NEXT: - - - - - - 4.00 - vmv2r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv4r.v v8, v8 +# CHECK-NEXT: - - - - - - 8.00 - vmv4r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e16, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e32, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e64, m8, tu, mu -# CHECK-NEXT: - - - - - - 1.00 - vmv8r.v v8, v8 +# CHECK-NEXT: - - - - - - 16.00 - vmv8r.v v8, v8 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf2, tu, mu # CHECK-NEXT: - - - - - - 1.00 - viota.m v8, v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli t3, zero, e8, mf4, tu, mu diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vle-vse-vlm.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vle-vse-vlm.s index 5209c897698a9..e68b13e47c6ac 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vle-vse-vlm.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vle-vse-vlm.s @@ -202,165 +202,165 @@ vle64ff.v v8, (a0) # CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8_V vle8.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE8_V vle8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8_V vle8.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE8_V vle8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8_V vle8.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE8_V vle8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8_V vle8.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE8_V vle8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8_V vle8.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE8_V vle8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8_V vle8.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLE8_V vle8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8_V vle8.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLE8_V vle8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16_V vle16.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE16_V vle16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16_V vle16.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE16_V vle16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16_V vle16.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE16_V vle16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16_V vle16.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE16_V vle16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16_V vle16.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLE16_V vle16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16_V vle16.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLE16_V vle16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32_V vle32.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE32_V vle32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32_V vle32.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE32_V vle32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32_V vle32.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE32_V vle32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32_V vle32.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLE32_V vle32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32_V vle32.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLE32_V vle32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64_V vle64.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VLE64_V vle64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64_V vle64.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE64_V vle64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64_V vle64.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLE64_V vle64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64_V vle64.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLE64_V vle64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE8_V vse8.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSE8_V vse8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE8_V vse8.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSE8_V vse8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE8_V vse8.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSE8_V vse8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE8_V vse8.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VSE8_V vse8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE8_V vse8.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSE8_V vse8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE8_V vse8.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSE8_V vse8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE8_V vse8.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VSE8_V vse8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE16_V vse16.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSE16_V vse16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE16_V vse16.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSE16_V vse16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE16_V vse16.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VSE16_V vse16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE16_V vse16.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSE16_V vse16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE16_V vse16.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSE16_V vse16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE16_V vse16.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VSE16_V vse16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE32_V vse32.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSE32_V vse32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE32_V vse32.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VSE32_V vse32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE32_V vse32.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSE32_V vse32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE32_V vse32.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSE32_V vse32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE32_V vse32.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VSE32_V vse32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE64_V vse64.v v8, (a0) +# CHECK-NEXT: 1 3 3.00 * 3 SMX60_VLS[3] VSE64_V vse64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE64_V vse64.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSE64_V vse64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE64_V vse64.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSE64_V vse64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSE64_V vse64.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VSE64_V vse64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLM_V vlm.v v8, (a0) +# CHECK-NEXT: 1 1 2.00 * 1 SMX60_VLS[2] VLM_V vlm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLM_V vlm.v v8, (a0) +# CHECK-NEXT: 1 1 2.00 * 1 SMX60_VLS[2] VLM_V vlm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLM_V vlm.v v8, (a0) +# CHECK-NEXT: 1 1 2.00 * 1 SMX60_VLS[2] VLM_V vlm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLM_V vlm.v v8, (a0) +# CHECK-NEXT: 1 1 2.00 * 1 SMX60_VLS[2] VLM_V vlm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLM_V vlm.v v8, (a0) +# CHECK-NEXT: 1 1 2.00 * 1 SMX60_VLS[2] VLM_V vlm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLM_V vlm.v v8, (a0) +# CHECK-NEXT: 1 1 2.00 * 1 SMX60_VLS[2] VLM_V vlm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLM_V vlm.v v8, (a0) +# CHECK-NEXT: 1 1 2.00 * 1 SMX60_VLS[2] VLM_V vlm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSM_V vsm.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSM_V vsm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSM_V vsm.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSM_V vsm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSM_V vsm.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSM_V vsm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSM_V vsm.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSM_V vsm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSM_V vsm.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSM_V vsm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSM_V vsm.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSM_V vsm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSM_V vsm.v v8, (a0) +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSM_V vsm.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8FF_V vle8ff.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE8FF_V vle8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8FF_V vle8ff.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE8FF_V vle8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8FF_V vle8ff.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE8FF_V vle8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8FF_V vle8ff.v v8, (a0) +# CHECK-NEXT: 1 5 5.00 * 5 SMX60_VLS[5] VLE8FF_V vle8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8FF_V vle8ff.v v8, (a0) +# CHECK-NEXT: 1 7 7.00 * 7 SMX60_VLS[7] VLE8FF_V vle8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8FF_V vle8ff.v v8, (a0) +# CHECK-NEXT: 1 11 11.00 * 11 SMX60_VLS[11] VLE8FF_V vle8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE8FF_V vle8ff.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VLE8FF_V vle8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16FF_V vle16ff.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE16FF_V vle16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16FF_V vle16ff.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE16FF_V vle16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16FF_V vle16ff.v v8, (a0) +# CHECK-NEXT: 1 5 5.00 * 5 SMX60_VLS[5] VLE16FF_V vle16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16FF_V vle16ff.v v8, (a0) +# CHECK-NEXT: 1 7 7.00 * 7 SMX60_VLS[7] VLE16FF_V vle16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16FF_V vle16ff.v v8, (a0) +# CHECK-NEXT: 1 11 11.00 * 11 SMX60_VLS[11] VLE16FF_V vle16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE16FF_V vle16ff.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VLE16FF_V vle16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32FF_V vle32ff.v v8, (a0) +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLE32FF_V vle32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32FF_V vle32ff.v v8, (a0) +# CHECK-NEXT: 1 5 5.00 * 5 SMX60_VLS[5] VLE32FF_V vle32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32FF_V vle32ff.v v8, (a0) +# CHECK-NEXT: 1 7 7.00 * 7 SMX60_VLS[7] VLE32FF_V vle32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32FF_V vle32ff.v v8, (a0) +# CHECK-NEXT: 1 11 11.00 * 11 SMX60_VLS[11] VLE32FF_V vle32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE32FF_V vle32ff.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VLE32FF_V vle32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64FF_V vle64ff.v v8, (a0) +# CHECK-NEXT: 1 5 5.00 * 5 SMX60_VLS[5] VLE64FF_V vle64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64FF_V vle64ff.v v8, (a0) +# CHECK-NEXT: 1 7 7.00 * 7 SMX60_VLS[7] VLE64FF_V vle64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64FF_V vle64ff.v v8, (a0) +# CHECK-NEXT: 1 11 11.00 * 11 SMX60_VLS[11] VLE64FF_V vle64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLE64FF_V vle64ff.v v8, (a0) +# CHECK-NEXT: 1 19 19.00 * 19 SMX60_VLS[19] VLE64FF_V vle64ff.v v8, (a0) # CHECK: Resources: # CHECK-NEXT: [0] - SMX60_FP @@ -374,167 +374,167 @@ vle64ff.v v8, (a0) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] -# CHECK-NEXT: - 80.00 - - - - - 80.00 +# CHECK-NEXT: - 80.00 - - - - - 510.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vle8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vle8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vle16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vle16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vle32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vle32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vle64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vle64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vle64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vse8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vse8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vse8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vse8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vse8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vse8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vse8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vse16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vse16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vse16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vse16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vse16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vse16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vse32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vse32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vse32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vse32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vse32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 3.00 vse64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vse64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vse64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vse64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vse64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vlm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vlm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vlm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vlm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vlm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vlm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vlm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vsm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vsm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vsm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vsm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vsm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vsm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsm.v v8, (a0) +# CHECK-NEXT: - - - - - - - 2.00 vsm.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 5.00 vle8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 7.00 vle8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 11.00 vle8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vle8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 5.00 vle16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 7.00 vle16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 11.00 vle16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vle16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 4.00 vle32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 5.00 vle32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 7.00 vle32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 11.00 vle32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vle32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 5.00 vle64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 7.00 vle64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 11.00 vle64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vle64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 19.00 vle64ff.v v8, (a0) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlse-vsse.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlse-vsse.s index dfd7da53f0b7d..8d97cd252abae 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlse-vsse.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlse-vsse.s @@ -120,93 +120,93 @@ vsse64.v v8, (a0), t0 # CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE8_V vlse8.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLSE8_V vlse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE8_V vlse8.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSE8_V vlse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE8_V vlse8.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSE8_V vlse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE8_V vlse8.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSE8_V vlse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE8_V vlse8.v v8, (a0), t0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSE8_V vlse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE8_V vlse8.v v8, (a0), t0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSE8_V vlse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE8_V vlse8.v v8, (a0), t0 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSE8_V vlse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE16_V vlse16.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLSE16_V vlse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE16_V vlse16.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSE16_V vlse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE16_V vlse16.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSE16_V vlse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE16_V vlse16.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSE16_V vlse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE16_V vlse16.v v8, (a0), t0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSE16_V vlse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE16_V vlse16.v v8, (a0), t0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSE16_V vlse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE32_V vlse32.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLSE32_V vlse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE32_V vlse32.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSE32_V vlse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE32_V vlse32.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSE32_V vlse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE32_V vlse32.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSE32_V vlse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE32_V vlse32.v v8, (a0), t0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSE32_V vlse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE64_V vlse64.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLSE64_V vlse64.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE64_V vlse64.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSE64_V vlse64.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE64_V vlse64.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSE64_V vlse64.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSE64_V vlse64.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSE64_V vlse64.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE8_V vsse8.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSSE8_V vsse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE8_V vsse8.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSE8_V vsse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE8_V vsse8.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSE8_V vsse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE8_V vsse8.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSE8_V vsse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE8_V vsse8.v v8, (a0), t0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSE8_V vsse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE8_V vsse8.v v8, (a0), t0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSE8_V vsse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE8_V vsse8.v v8, (a0), t0 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSSE8_V vsse8.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE16_V vsse16.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSSE16_V vsse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE16_V vsse16.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSE16_V vsse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE16_V vsse16.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSE16_V vsse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE16_V vsse16.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSE16_V vsse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE16_V vsse16.v v8, (a0), t0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSE16_V vsse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE16_V vsse16.v v8, (a0), t0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSE16_V vsse16.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE32_V vsse32.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSSE32_V vsse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE32_V vsse32.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSE32_V vsse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE32_V vsse32.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSE32_V vsse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE32_V vsse32.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSE32_V vsse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE32_V vsse32.v v8, (a0), t0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSE32_V vsse32.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE64_V vsse64.v v8, (a0), t0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSSE64_V vsse64.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE64_V vsse64.v v8, (a0), t0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSE64_V vsse64.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE64_V vsse64.v v8, (a0), t0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSE64_V vsse64.v v8, (a0), t0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSE64_V vsse64.v v8, (a0), t0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSE64_V vsse64.v v8, (a0), t0 # CHECK: Resources: # CHECK-NEXT: [0] - SMX60_FP @@ -220,95 +220,95 @@ vsse64.v v8, (a0), t0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] -# CHECK-NEXT: - 44.00 - - - - - 44.00 +# CHECK-NEXT: - 44.00 - - - - - 1888.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vlse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vlse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vlse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vlse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 64.00 vlse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 128.00 vlse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 256.00 vlse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vlse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vlse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vlse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vlse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 64.00 vlse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 128.00 vlse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vlse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vlse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vlse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vlse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 64.00 vlse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vlse64.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vlse64.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vlse64.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vlse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vlse64.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vsse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vsse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vsse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vsse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 64.00 vsse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 128.00 vsse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse8.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 256.00 vsse8.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vsse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vsse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vsse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vsse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 64.00 vsse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse16.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 128.00 vsse16.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vsse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vsse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vsse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vsse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse32.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 64.00 vsse32.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 4.00 vsse64.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 8.00 vsse64.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 16.00 vsse64.v v8, (a0), t0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsse64.v v8, (a0), t0 +# CHECK-NEXT: - - - - - - - 32.00 vsse64.v v8, (a0), t0 diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlseg-vsseg.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlseg-vsseg.s index 6ad505e3c741d..c660b94353be2 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlseg-vsseg.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlseg-vsseg.s @@ -1627,1545 +1627,1545 @@ vsoxseg8ei64.v v8, (a0), v16 # CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8_V vlseg2e8.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E8_V vlseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8_V vlseg2e8.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E8_V vlseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8_V vlseg2e8.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E8_V vlseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8_V vlseg2e8.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG2E8_V vlseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8_V vlseg2e8.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG2E8_V vlseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8_V vlseg2e8.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSEG2E8_V vlseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16_V vlseg2e16.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E16_V vlseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16_V vlseg2e16.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E16_V vlseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16_V vlseg2e16.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E16_V vlseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16_V vlseg2e16.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG2E16_V vlseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16_V vlseg2e16.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG2E16_V vlseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32_V vlseg2e32.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E32_V vlseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32_V vlseg2e32.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E32_V vlseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32_V vlseg2e32.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E32_V vlseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32_V vlseg2e32.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG2E32_V vlseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E64_V vlseg2e64.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E64_V vlseg2e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E64_V vlseg2e64.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E64_V vlseg2e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E64_V vlseg2e64.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E64_V vlseg2e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8_V vlseg3e8.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E8_V vlseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8_V vlseg3e8.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E8_V vlseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8_V vlseg3e8.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG3E8_V vlseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8_V vlseg3e8.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG3E8_V vlseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8_V vlseg3e8.v v8, (a0) +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLSEG3E8_V vlseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16_V vlseg3e16.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E16_V vlseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16_V vlseg3e16.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E16_V vlseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16_V vlseg3e16.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG3E16_V vlseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16_V vlseg3e16.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG3E16_V vlseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E32_V vlseg3e32.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E32_V vlseg3e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E32_V vlseg3e32.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E32_V vlseg3e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E32_V vlseg3e32.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG3E32_V vlseg3e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E64_V vlseg3e64.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E64_V vlseg3e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E64_V vlseg3e64.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E64_V vlseg3e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8_V vlseg4e8.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E8_V vlseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8_V vlseg4e8.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E8_V vlseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8_V vlseg4e8.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG4E8_V vlseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8_V vlseg4e8.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG4E8_V vlseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8_V vlseg4e8.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSEG4E8_V vlseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16_V vlseg4e16.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E16_V vlseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16_V vlseg4e16.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E16_V vlseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16_V vlseg4e16.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG4E16_V vlseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16_V vlseg4e16.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG4E16_V vlseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E32_V vlseg4e32.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E32_V vlseg4e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E32_V vlseg4e32.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E32_V vlseg4e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E32_V vlseg4e32.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG4E32_V vlseg4e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E64_V vlseg4e64.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E64_V vlseg4e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E64_V vlseg4e64.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E64_V vlseg4e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8_V vlseg5e8.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E8_V vlseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8_V vlseg5e8.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSEG5E8_V vlseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8_V vlseg5e8.v v8, (a0) +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLSEG5E8_V vlseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8_V vlseg5e8.v v8, (a0) +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VLSEG5E8_V vlseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E16_V vlseg5e16.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E16_V vlseg5e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E16_V vlseg5e16.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSEG5E16_V vlseg5e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E16_V vlseg5e16.v v8, (a0) +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLSEG5E16_V vlseg5e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E32_V vlseg5e32.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E32_V vlseg5e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E32_V vlseg5e32.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSEG5E32_V vlseg5e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E64_V vlseg5e64.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E64_V vlseg5e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8_V vlseg6e8.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E8_V vlseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8_V vlseg6e8.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG6E8_V vlseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8_V vlseg6e8.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG6E8_V vlseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8_V vlseg6e8.v v8, (a0) +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLSEG6E8_V vlseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E16_V vlseg6e16.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E16_V vlseg6e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E16_V vlseg6e16.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG6E16_V vlseg6e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E16_V vlseg6e16.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG6E16_V vlseg6e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E32_V vlseg6e32.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E32_V vlseg6e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E32_V vlseg6e32.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG6E32_V vlseg6e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E64_V vlseg6e64.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E64_V vlseg6e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8_V vlseg7e8.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E8_V vlseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8_V vlseg7e8.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSEG7E8_V vlseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8_V vlseg7e8.v v8, (a0) +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLSEG7E8_V vlseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8_V vlseg7e8.v v8, (a0) +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VLSEG7E8_V vlseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E16_V vlseg7e16.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E16_V vlseg7e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E16_V vlseg7e16.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSEG7E16_V vlseg7e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E16_V vlseg7e16.v v8, (a0) +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLSEG7E16_V vlseg7e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E32_V vlseg7e32.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E32_V vlseg7e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E32_V vlseg7e32.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSEG7E32_V vlseg7e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E64_V vlseg7e64.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E64_V vlseg7e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8_V vlseg8e8.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E8_V vlseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8_V vlseg8e8.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG8E8_V vlseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8_V vlseg8e8.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG8E8_V vlseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8_V vlseg8e8.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSEG8E8_V vlseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E16_V vlseg8e16.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E16_V vlseg8e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E16_V vlseg8e16.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG8E16_V vlseg8e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E16_V vlseg8e16.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG8E16_V vlseg8e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E32_V vlseg8e32.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E32_V vlseg8e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E32_V vlseg8e32.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG8E32_V vlseg8e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E64_V vlseg8e64.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E64_V vlseg8e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E8_V vsseg2e8.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSEG2E8_V vsseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E8_V vsseg2e8.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG2E8_V vsseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E8_V vsseg2e8.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG2E8_V vsseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E8_V vsseg2e8.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG2E8_V vsseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E8_V vsseg2e8.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSEG2E8_V vsseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E8_V vsseg2e8.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSSEG2E8_V vsseg2e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E16_V vsseg2e16.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSEG2E16_V vsseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E16_V vsseg2e16.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG2E16_V vsseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E16_V vsseg2e16.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG2E16_V vsseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E16_V vsseg2e16.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG2E16_V vsseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E16_V vsseg2e16.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSEG2E16_V vsseg2e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E32_V vsseg2e32.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSEG2E32_V vsseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E32_V vsseg2e32.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG2E32_V vsseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E32_V vsseg2e32.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG2E32_V vsseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E32_V vsseg2e32.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG2E32_V vsseg2e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E64_V vsseg2e64.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSEG2E64_V vsseg2e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E64_V vsseg2e64.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG2E64_V vsseg2e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG2E64_V vsseg2e64.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG2E64_V vsseg2e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E8_V vsseg3e8.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSEG3E8_V vsseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E8_V vsseg3e8.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG3E8_V vsseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E8_V vsseg3e8.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSEG3E8_V vsseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E8_V vsseg3e8.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSEG3E8_V vsseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E8_V vsseg3e8.v v8, (a0) +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSSEG3E8_V vsseg3e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E16_V vsseg3e16.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSEG3E16_V vsseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E16_V vsseg3e16.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG3E16_V vsseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E16_V vsseg3e16.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSEG3E16_V vsseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E16_V vsseg3e16.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSEG3E16_V vsseg3e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E32_V vsseg3e32.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSEG3E32_V vsseg3e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E32_V vsseg3e32.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG3E32_V vsseg3e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E32_V vsseg3e32.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSEG3E32_V vsseg3e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E64_V vsseg3e64.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSEG3E64_V vsseg3e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG3E64_V vsseg3e64.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG3E64_V vsseg3e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E8_V vsseg4e8.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG4E8_V vsseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E8_V vsseg4e8.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG4E8_V vsseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E8_V vsseg4e8.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG4E8_V vsseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E8_V vsseg4e8.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSEG4E8_V vsseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E8_V vsseg4e8.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSSEG4E8_V vsseg4e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E16_V vsseg4e16.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG4E16_V vsseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E16_V vsseg4e16.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG4E16_V vsseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E16_V vsseg4e16.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG4E16_V vsseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E16_V vsseg4e16.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSEG4E16_V vsseg4e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E32_V vsseg4e32.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG4E32_V vsseg4e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E32_V vsseg4e32.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG4E32_V vsseg4e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E32_V vsseg4e32.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG4E32_V vsseg4e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E64_V vsseg4e64.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSEG4E64_V vsseg4e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG4E64_V vsseg4e64.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG4E64_V vsseg4e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E8_V vsseg5e8.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSEG5E8_V vsseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E8_V vsseg5e8.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSSEG5E8_V vsseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E8_V vsseg5e8.v v8, (a0) +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSSEG5E8_V vsseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E8_V vsseg5e8.v v8, (a0) +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VSSEG5E8_V vsseg5e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E16_V vsseg5e16.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSEG5E16_V vsseg5e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E16_V vsseg5e16.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSSEG5E16_V vsseg5e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E16_V vsseg5e16.v v8, (a0) +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSSEG5E16_V vsseg5e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E32_V vsseg5e32.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSEG5E32_V vsseg5e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E32_V vsseg5e32.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSSEG5E32_V vsseg5e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG5E64_V vsseg5e64.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSEG5E64_V vsseg5e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E8_V vsseg6e8.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG6E8_V vsseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E8_V vsseg6e8.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSEG6E8_V vsseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E8_V vsseg6e8.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSEG6E8_V vsseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E8_V vsseg6e8.v v8, (a0) +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSSEG6E8_V vsseg6e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E16_V vsseg6e16.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG6E16_V vsseg6e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E16_V vsseg6e16.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSEG6E16_V vsseg6e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E16_V vsseg6e16.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSEG6E16_V vsseg6e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E32_V vsseg6e32.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG6E32_V vsseg6e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E32_V vsseg6e32.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSEG6E32_V vsseg6e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG6E64_V vsseg6e64.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSEG6E64_V vsseg6e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E8_V vsseg7e8.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSEG7E8_V vsseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E8_V vsseg7e8.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSSEG7E8_V vsseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E8_V vsseg7e8.v v8, (a0) +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSSEG7E8_V vsseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E8_V vsseg7e8.v v8, (a0) +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VSSEG7E8_V vsseg7e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E16_V vsseg7e16.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSEG7E16_V vsseg7e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E16_V vsseg7e16.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSSEG7E16_V vsseg7e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E16_V vsseg7e16.v v8, (a0) +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSSEG7E16_V vsseg7e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E32_V vsseg7e32.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSEG7E32_V vsseg7e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E32_V vsseg7e32.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSSEG7E32_V vsseg7e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG7E64_V vsseg7e64.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSEG7E64_V vsseg7e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E8_V vsseg8e8.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG8E8_V vsseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E8_V vsseg8e8.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG8E8_V vsseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E8_V vsseg8e8.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSEG8E8_V vsseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E8_V vsseg8e8.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSSEG8E8_V vsseg8e8.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E16_V vsseg8e16.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG8E16_V vsseg8e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E16_V vsseg8e16.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG8E16_V vsseg8e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E16_V vsseg8e16.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSEG8E16_V vsseg8e16.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E32_V vsseg8e32.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG8E32_V vsseg8e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E32_V vsseg8e32.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSEG8E32_V vsseg8e32.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSEG8E64_V vsseg8e64.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSEG8E64_V vsseg8e64.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSSEG2E8_V vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSSEG2E16_V vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG2E32_V vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E64_V vlsseg2e64.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSSEG2E64_V vlsseg2e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E64_V vlsseg2e64.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG2E64_V vlsseg2e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG2E64_V vlsseg2e64.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG2E64_V vlsseg2e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLSSEG3E8_V vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSSEG3E16_V vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E32_V vlsseg3e32.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSSEG3E32_V vlsseg3e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E32_V vlsseg3e32.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG3E32_V vlsseg3e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E32_V vlsseg3e32.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSSEG3E32_V vlsseg3e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E64_V vlsseg3e64.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSSEG3E64_V vlsseg3e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG3E64_V vlsseg3e64.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG3E64_V vlsseg3e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSSEG4E8_V vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSSEG4E16_V vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E32_V vlsseg4e32.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG4E32_V vlsseg4e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E32_V vlsseg4e32.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG4E32_V vlsseg4e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E32_V vlsseg4e32.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG4E32_V vlsseg4e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E64_V vlsseg4e64.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSSEG4E64_V vlsseg4e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG4E64_V vlsseg4e64.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG4E64_V vlsseg4e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VLSSEG5E8_V vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E16_V vlsseg5e16.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSSEG5E16_V vlsseg5e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E16_V vlsseg5e16.v v8, (a0), a1 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSSEG5E16_V vlsseg5e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E16_V vlsseg5e16.v v8, (a0), a1 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLSSEG5E16_V vlsseg5e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E32_V vlsseg5e32.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSSEG5E32_V vlsseg5e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E32_V vlsseg5e32.v v8, (a0), a1 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSSEG5E32_V vlsseg5e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG5E64_V vlsseg5e64.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSSEG5E64_V vlsseg5e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLSSEG6E8_V vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E16_V vlsseg6e16.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG6E16_V vlsseg6e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E16_V vlsseg6e16.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSSEG6E16_V vlsseg6e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E16_V vlsseg6e16.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSSEG6E16_V vlsseg6e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E32_V vlsseg6e32.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG6E32_V vlsseg6e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E32_V vlsseg6e32.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSSEG6E32_V vlsseg6e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG6E64_V vlsseg6e64.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSSEG6E64_V vlsseg6e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VLSSEG7E8_V vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E16_V vlsseg7e16.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSSEG7E16_V vlsseg7e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E16_V vlsseg7e16.v v8, (a0), a1 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSSEG7E16_V vlsseg7e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E16_V vlsseg7e16.v v8, (a0), a1 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLSSEG7E16_V vlsseg7e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E32_V vlsseg7e32.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSSEG7E32_V vlsseg7e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E32_V vlsseg7e32.v v8, (a0), a1 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSSEG7E32_V vlsseg7e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG7E64_V vlsseg7e64.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSSEG7E64_V vlsseg7e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSSEG8E8_V vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E16_V vlsseg8e16.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG8E16_V vlsseg8e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E16_V vlsseg8e16.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG8E16_V vlsseg8e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E16_V vlsseg8e16.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSSEG8E16_V vlsseg8e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E32_V vlsseg8e32.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG8E32_V vlsseg8e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E32_V vlsseg8e32.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSSEG8E32_V vlsseg8e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSSEG8E64_V vlsseg8e64.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSSEG8E64_V vlsseg8e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSSSEG2E8_V vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSSEG2E16_V vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG2E32_V vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E64_V vssseg2e64.v v8, (a0), a1 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSSSEG2E64_V vssseg2e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E64_V vssseg2e64.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG2E64_V vssseg2e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG2E64_V vssseg2e64.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG2E64_V vssseg2e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSSSEG3E8_V vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSSEG3E16_V vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E32_V vssseg3e32.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSSEG3E32_V vssseg3e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E32_V vssseg3e32.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG3E32_V vssseg3e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E32_V vssseg3e32.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSSEG3E32_V vssseg3e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E64_V vssseg3e64.v v8, (a0), a1 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSSSEG3E64_V vssseg3e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG3E64_V vssseg3e64.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG3E64_V vssseg3e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSSSEG4E8_V vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSSEG4E16_V vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E32_V vssseg4e32.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG4E32_V vssseg4e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E32_V vssseg4e32.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG4E32_V vssseg4e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E32_V vssseg4e32.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG4E32_V vssseg4e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E64_V vssseg4e64.v v8, (a0), a1 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSSSEG4E64_V vssseg4e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG4E64_V vssseg4e64.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG4E64_V vssseg4e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VSSSEG5E8_V vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E16_V vssseg5e16.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSSEG5E16_V vssseg5e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E16_V vssseg5e16.v v8, (a0), a1 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSSSEG5E16_V vssseg5e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E16_V vssseg5e16.v v8, (a0), a1 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSSSEG5E16_V vssseg5e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E32_V vssseg5e32.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSSEG5E32_V vssseg5e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E32_V vssseg5e32.v v8, (a0), a1 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSSSEG5E32_V vssseg5e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG5E64_V vssseg5e64.v v8, (a0), a1 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSSSEG5E64_V vssseg5e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSSSEG6E8_V vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E16_V vssseg6e16.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG6E16_V vssseg6e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E16_V vssseg6e16.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSSEG6E16_V vssseg6e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E16_V vssseg6e16.v v8, (a0), a1 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSSSEG6E16_V vssseg6e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E32_V vssseg6e32.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG6E32_V vssseg6e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E32_V vssseg6e32.v v8, (a0), a1 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSSSEG6E32_V vssseg6e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG6E64_V vssseg6e64.v v8, (a0), a1 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSSSEG6E64_V vssseg6e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VSSSEG7E8_V vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E16_V vssseg7e16.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSSEG7E16_V vssseg7e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E16_V vssseg7e16.v v8, (a0), a1 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSSSEG7E16_V vssseg7e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E16_V vssseg7e16.v v8, (a0), a1 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSSSEG7E16_V vssseg7e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E32_V vssseg7e32.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSSEG7E32_V vssseg7e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E32_V vssseg7e32.v v8, (a0), a1 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSSSEG7E32_V vssseg7e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG7E64_V vssseg7e64.v v8, (a0), a1 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSSSEG7E64_V vssseg7e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSSSEG8E8_V vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E16_V vssseg8e16.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG8E16_V vssseg8e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E16_V vssseg8e16.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG8E16_V vssseg8e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E16_V vssseg8e16.v v8, (a0), a1 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSSSEG8E16_V vssseg8e16.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E32_V vssseg8e32.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG8E32_V vssseg8e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E32_V vssseg8e32.v v8, (a0), a1 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSSSEG8E32_V vssseg8e32.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSSSEG8E64_V vssseg8e64.v v8, (a0), a1 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSSSEG8E64_V vssseg8e64.v v8, (a0), a1 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSEG2E8FF_V vlseg2e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG2E16FF_V vlseg2e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG2E32FF_V vlseg2e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E64FF_V vlseg2e64ff.v v8, (a0) +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLSEG2E64FF_V vlseg2e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E64FF_V vlseg2e64ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG2E64FF_V vlseg2e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG2E64FF_V vlseg2e64ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG2E64FF_V vlseg2e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLSEG3E8FF_V vlseg3e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG3E16FF_V vlseg3e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E32FF_V vlseg3e32ff.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E32FF_V vlseg3e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E32FF_V vlseg3e32ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E32FF_V vlseg3e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E32FF_V vlseg3e32ff.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG3E32FF_V vlseg3e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E64FF_V vlseg3e64ff.v v8, (a0) +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLSEG3E64FF_V vlseg3e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG3E64FF_V vlseg3e64ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG3E64FF_V vlseg3e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSEG4E8FF_V vlseg4e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG4E16FF_V vlseg4e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E32FF_V vlseg4e32ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E32FF_V vlseg4e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E32FF_V vlseg4e32ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E32FF_V vlseg4e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E32FF_V vlseg4e32ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG4E32FF_V vlseg4e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E64FF_V vlseg4e64ff.v v8, (a0) +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLSEG4E64FF_V vlseg4e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG4E64FF_V vlseg4e64ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG4E64FF_V vlseg4e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VLSEG5E8FF_V vlseg5e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E16FF_V vlseg5e16ff.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E16FF_V vlseg5e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E16FF_V vlseg5e16ff.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSEG5E16FF_V vlseg5e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E16FF_V vlseg5e16ff.v v8, (a0) +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLSEG5E16FF_V vlseg5e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E32FF_V vlseg5e32ff.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E32FF_V vlseg5e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E32FF_V vlseg5e32ff.v v8, (a0) +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLSEG5E32FF_V vlseg5e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG5E64FF_V vlseg5e64ff.v v8, (a0) +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLSEG5E64FF_V vlseg5e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLSEG6E8FF_V vlseg6e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E16FF_V vlseg6e16ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E16FF_V vlseg6e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E16FF_V vlseg6e16ff.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG6E16FF_V vlseg6e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E16FF_V vlseg6e16ff.v v8, (a0) +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLSEG6E16FF_V vlseg6e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E32FF_V vlseg6e32ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E32FF_V vlseg6e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E32FF_V vlseg6e32ff.v v8, (a0) +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLSEG6E32FF_V vlseg6e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG6E64FF_V vlseg6e64ff.v v8, (a0) +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLSEG6E64FF_V vlseg6e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VLSEG7E8FF_V vlseg7e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E16FF_V vlseg7e16ff.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E16FF_V vlseg7e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E16FF_V vlseg7e16ff.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSEG7E16FF_V vlseg7e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E16FF_V vlseg7e16ff.v v8, (a0) +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLSEG7E16FF_V vlseg7e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E32FF_V vlseg7e32ff.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E32FF_V vlseg7e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E32FF_V vlseg7e32ff.v v8, (a0) +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLSEG7E32FF_V vlseg7e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG7E64FF_V vlseg7e64ff.v v8, (a0) +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLSEG7E64FF_V vlseg7e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLSEG8E8FF_V vlseg8e8ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E16FF_V vlseg8e16ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E16FF_V vlseg8e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E16FF_V vlseg8e16ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG8E16FF_V vlseg8e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E16FF_V vlseg8e16ff.v v8, (a0) +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLSEG8E16FF_V vlseg8e16ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E32FF_V vlseg8e32ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E32FF_V vlseg8e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E32FF_V vlseg8e32ff.v v8, (a0) +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLSEG8E32FF_V vlseg8e32ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLSEG8E64FF_V vlseg8e64ff.v v8, (a0) +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLSEG8E64FF_V vlseg8e64ff.v v8, (a0) # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLUXSEG2EI8_V vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLUXSEG2EI16_V vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG2EI32_V vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI64_V vluxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXSEG2EI64_V vluxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI64_V vluxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG2EI64_V vluxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG2EI64_V vluxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG2EI64_V vluxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLUXSEG3EI8_V vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLUXSEG3EI16_V vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI32_V vluxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLUXSEG3EI32_V vluxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI32_V vluxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG3EI32_V vluxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI32_V vluxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLUXSEG3EI32_V vluxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI64_V vluxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLUXSEG3EI64_V vluxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG3EI64_V vluxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG3EI64_V vluxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLUXSEG4EI8_V vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLUXSEG4EI16_V vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI32_V vluxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG4EI32_V vluxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI32_V vluxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG4EI32_V vluxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI32_V vluxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG4EI32_V vluxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI64_V vluxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXSEG4EI64_V vluxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG4EI64_V vluxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG4EI64_V vluxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VLUXSEG5EI8_V vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI16_V vluxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLUXSEG5EI16_V vluxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI16_V vluxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLUXSEG5EI16_V vluxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI16_V vluxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLUXSEG5EI16_V vluxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI32_V vluxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLUXSEG5EI32_V vluxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI32_V vluxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLUXSEG5EI32_V vluxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG5EI64_V vluxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLUXSEG5EI64_V vluxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLUXSEG6EI8_V vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI16_V vluxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG6EI16_V vluxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI16_V vluxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLUXSEG6EI16_V vluxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI16_V vluxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLUXSEG6EI16_V vluxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI32_V vluxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG6EI32_V vluxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI32_V vluxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLUXSEG6EI32_V vluxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG6EI64_V vluxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLUXSEG6EI64_V vluxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VLUXSEG7EI8_V vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI16_V vluxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLUXSEG7EI16_V vluxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI16_V vluxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLUXSEG7EI16_V vluxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI16_V vluxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLUXSEG7EI16_V vluxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI32_V vluxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLUXSEG7EI32_V vluxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI32_V vluxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLUXSEG7EI32_V vluxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG7EI64_V vluxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLUXSEG7EI64_V vluxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLUXSEG8EI8_V vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI16_V vluxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG8EI16_V vluxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI16_V vluxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG8EI16_V vluxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI16_V vluxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLUXSEG8EI16_V vluxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI32_V vluxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG8EI32_V vluxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI32_V vluxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXSEG8EI32_V vluxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXSEG8EI64_V vluxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXSEG8EI64_V vluxseg8ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLOXSEG2EI8_V vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLOXSEG2EI16_V vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG2EI32_V vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI64_V vloxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXSEG2EI64_V vloxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI64_V vloxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG2EI64_V vloxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG2EI64_V vloxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG2EI64_V vloxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLOXSEG3EI8_V vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLOXSEG3EI16_V vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI32_V vloxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLOXSEG3EI32_V vloxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI32_V vloxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG3EI32_V vloxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI32_V vloxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLOXSEG3EI32_V vloxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI64_V vloxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VLOXSEG3EI64_V vloxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG3EI64_V vloxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG3EI64_V vloxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLOXSEG4EI8_V vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLOXSEG4EI16_V vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI32_V vloxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG4EI32_V vloxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI32_V vloxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG4EI32_V vloxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI32_V vloxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG4EI32_V vloxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI64_V vloxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXSEG4EI64_V vloxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG4EI64_V vloxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG4EI64_V vloxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VLOXSEG5EI8_V vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI16_V vloxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLOXSEG5EI16_V vloxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI16_V vloxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLOXSEG5EI16_V vloxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI16_V vloxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VLOXSEG5EI16_V vloxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI32_V vloxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLOXSEG5EI32_V vloxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI32_V vloxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VLOXSEG5EI32_V vloxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG5EI64_V vloxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VLOXSEG5EI64_V vloxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VLOXSEG6EI8_V vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI16_V vloxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG6EI16_V vloxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI16_V vloxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLOXSEG6EI16_V vloxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI16_V vloxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VLOXSEG6EI16_V vloxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI32_V vloxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG6EI32_V vloxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI32_V vloxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VLOXSEG6EI32_V vloxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG6EI64_V vloxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VLOXSEG6EI64_V vloxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VLOXSEG7EI8_V vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI16_V vloxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLOXSEG7EI16_V vloxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI16_V vloxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLOXSEG7EI16_V vloxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI16_V vloxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VLOXSEG7EI16_V vloxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI32_V vloxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLOXSEG7EI32_V vloxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI32_V vloxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VLOXSEG7EI32_V vloxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG7EI64_V vloxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VLOXSEG7EI64_V vloxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VLOXSEG8EI8_V vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI16_V vloxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG8EI16_V vloxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI16_V vloxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG8EI16_V vloxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI16_V vloxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLOXSEG8EI16_V vloxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI32_V vloxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG8EI32_V vloxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI32_V vloxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXSEG8EI32_V vloxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXSEG8EI64_V vloxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXSEG8EI64_V vloxseg8ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSUXSEG2EI8_V vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG2EI16_V vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI32_V vsuxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXSEG2EI32_V vsuxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI32_V vsuxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG2EI32_V vsuxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI32_V vsuxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG2EI32_V vsuxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI64_V vsuxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXSEG2EI64_V vsuxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG2EI64_V vsuxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG2EI64_V vsuxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSUXSEG3EI8_V vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSUXSEG3EI16_V vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI32_V vsuxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSUXSEG3EI32_V vsuxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI32_V vsuxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG3EI32_V vsuxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI32_V vsuxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSUXSEG3EI32_V vsuxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI64_V vsuxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSUXSEG3EI64_V vsuxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG3EI64_V vsuxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG3EI64_V vsuxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSUXSEG4EI8_V vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSUXSEG4EI16_V vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI32_V vsuxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG4EI32_V vsuxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI32_V vsuxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG4EI32_V vsuxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI32_V vsuxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG4EI32_V vsuxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI64_V vsuxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXSEG4EI64_V vsuxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG4EI64_V vsuxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG4EI64_V vsuxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VSUXSEG5EI8_V vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI16_V vsuxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSUXSEG5EI16_V vsuxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI16_V vsuxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSUXSEG5EI16_V vsuxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI16_V vsuxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSUXSEG5EI16_V vsuxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI32_V vsuxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSUXSEG5EI32_V vsuxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI32_V vsuxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSUXSEG5EI32_V vsuxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG5EI64_V vsuxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSUXSEG5EI64_V vsuxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSUXSEG6EI8_V vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI16_V vsuxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG6EI16_V vsuxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI16_V vsuxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSUXSEG6EI16_V vsuxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI16_V vsuxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSUXSEG6EI16_V vsuxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI32_V vsuxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG6EI32_V vsuxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI32_V vsuxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSUXSEG6EI32_V vsuxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG6EI64_V vsuxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSUXSEG6EI64_V vsuxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VSUXSEG7EI8_V vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI16_V vsuxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSUXSEG7EI16_V vsuxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI16_V vsuxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSUXSEG7EI16_V vsuxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI16_V vsuxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSUXSEG7EI16_V vsuxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI32_V vsuxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSUXSEG7EI32_V vsuxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI32_V vsuxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSUXSEG7EI32_V vsuxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG7EI64_V vsuxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSUXSEG7EI64_V vsuxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSUXSEG8EI8_V vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI16_V vsuxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG8EI16_V vsuxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI16_V vsuxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG8EI16_V vsuxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI16_V vsuxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSUXSEG8EI16_V vsuxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI32_V vsuxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG8EI32_V vsuxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI32_V vsuxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXSEG8EI32_V vsuxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXSEG8EI64_V vsuxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXSEG8EI64_V vsuxseg8ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSOXSEG2EI8_V vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSOXSEG2EI16_V vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG2EI32_V vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI64_V vsoxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXSEG2EI64_V vsoxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI64_V vsoxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG2EI64_V vsoxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG2EI64_V vsoxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG2EI64_V vsoxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSOXSEG3EI8_V vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSOXSEG3EI16_V vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI32_V vsoxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSOXSEG3EI32_V vsoxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI32_V vsoxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG3EI32_V vsoxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI32_V vsoxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSOXSEG3EI32_V vsoxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI64_V vsoxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 12 12.00 * 12 SMX60_VLS[12] VSOXSEG3EI64_V vsoxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG3EI64_V vsoxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG3EI64_V vsoxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSOXSEG4EI8_V vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSOXSEG4EI16_V vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI32_V vsoxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG4EI32_V vsoxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI32_V vsoxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG4EI32_V vsoxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI32_V vsoxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG4EI32_V vsoxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI64_V vsoxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXSEG4EI64_V vsoxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG4EI64_V vsoxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG4EI64_V vsoxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 160 160.00 * 160 SMX60_VLS[160] VSOXSEG5EI8_V vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI16_V vsoxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSOXSEG5EI16_V vsoxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI16_V vsoxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSOXSEG5EI16_V vsoxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI16_V vsoxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 80 80.00 * 80 SMX60_VLS[80] VSOXSEG5EI16_V vsoxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI32_V vsoxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSOXSEG5EI32_V vsoxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI32_V vsoxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 40 40.00 * 40 SMX60_VLS[40] VSOXSEG5EI32_V vsoxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG5EI64_V vsoxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 20 20.00 * 20 SMX60_VLS[20] VSOXSEG5EI64_V vsoxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 192 192.00 * 192 SMX60_VLS[192] VSOXSEG6EI8_V vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI16_V vsoxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG6EI16_V vsoxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI16_V vsoxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSOXSEG6EI16_V vsoxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI16_V vsoxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 96 96.00 * 96 SMX60_VLS[96] VSOXSEG6EI16_V vsoxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI32_V vsoxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG6EI32_V vsoxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI32_V vsoxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 48 48.00 * 48 SMX60_VLS[48] VSOXSEG6EI32_V vsoxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG6EI64_V vsoxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 24 24.00 * 24 SMX60_VLS[24] VSOXSEG6EI64_V vsoxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 224 224.00 * 224 SMX60_VLS[224] VSOXSEG7EI8_V vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI16_V vsoxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSOXSEG7EI16_V vsoxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI16_V vsoxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSOXSEG7EI16_V vsoxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI16_V vsoxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 112 112.00 * 112 SMX60_VLS[112] VSOXSEG7EI16_V vsoxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI32_V vsoxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSOXSEG7EI32_V vsoxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI32_V vsoxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 56 56.00 * 56 SMX60_VLS[56] VSOXSEG7EI32_V vsoxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG7EI64_V vsoxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 28 28.00 * 28 SMX60_VLS[28] VSOXSEG7EI64_V vsoxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: 1 256 256.00 * 256 SMX60_VLS[256] VSOXSEG8EI8_V vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI16_V vsoxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG8EI16_V vsoxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI16_V vsoxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG8EI16_V vsoxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI16_V vsoxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSOXSEG8EI16_V vsoxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI32_V vsoxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG8EI32_V vsoxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI32_V vsoxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXSEG8EI32_V vsoxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXSEG8EI64_V vsoxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXSEG8EI64_V vsoxseg8ei64.v v8, (a0), v16 # CHECK: Resources: # CHECK-NEXT: [0] - SMX60_FP @@ -3179,1547 +3179,1547 @@ vsoxseg8ei64.v v8, (a0), v16 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] -# CHECK-NEXT: - 770.00 - - - - - 770.00 +# CHECK-NEXT: - 770.00 - - - - - 46320.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vlseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 192.00 vlseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg3e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vlseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg4e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vlseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 80.00 vlseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 160.00 vlseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vlseg5e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 80.00 vlseg5e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vlseg5e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 192.00 vlseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg6e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg6e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg6e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vlseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 112.00 vlseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 224.00 vlseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vlseg7e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 112.00 vlseg7e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vlseg7e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vlseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg8e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg8e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg8e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vsseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vsseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vsseg2e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vsseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vsseg2e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vsseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg2e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vsseg2e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg2e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg2e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg2e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vsseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vsseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vsseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 192.00 vsseg3e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vsseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vsseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vsseg3e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vsseg3e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg3e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vsseg3e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vsseg3e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg3e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg3e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vsseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vsseg4e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vsseg4e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg4e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg4e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg4e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vsseg4e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg4e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg4e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vsseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vsseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 80.00 vsseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 160.00 vsseg5e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vsseg5e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vsseg5e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 80.00 vsseg5e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vsseg5e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vsseg5e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg5e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vsseg5e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vsseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vsseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 192.00 vsseg6e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg6e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vsseg6e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vsseg6e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg6e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vsseg6e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg6e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vsseg6e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vsseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vsseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 112.00 vsseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 224.00 vsseg7e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vsseg7e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vsseg7e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 112.00 vsseg7e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vsseg7e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vsseg7e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg7e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vsseg7e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vsseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e8.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vsseg8e8.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg8e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg8e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e16.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vsseg8e16.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg8e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e32.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vsseg8e32.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsseg8e64.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vsseg8e64.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 256.00 vlsseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vlsseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vlsseg2e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg2e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg2e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg2e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 192.00 vlsseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vlsseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vlsseg3e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg3e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vlsseg3e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vlsseg3e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg3e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg3e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 256.00 vlsseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vlsseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg4e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg4e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg4e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vlsseg4e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg4e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg4e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 40.00 vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 80.00 vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 160.00 vlsseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vlsseg5e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 40.00 vlsseg5e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 80.00 vlsseg5e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vlsseg5e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 40.00 vlsseg5e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg5e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vlsseg5e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 192.00 vlsseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg6e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vlsseg6e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vlsseg6e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg6e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vlsseg6e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg6e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vlsseg6e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 56.00 vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 112.00 vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 224.00 vlsseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vlsseg7e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 56.00 vlsseg7e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 112.00 vlsseg7e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vlsseg7e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 56.00 vlsseg7e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg7e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vlsseg7e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 256.00 vlsseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg8e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg8e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vlsseg8e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg8e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vlsseg8e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlsseg8e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vlsseg8e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 256.00 vssseg2e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vssseg2e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg2e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 8.00 vssseg2e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg2e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg2e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg2e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 192.00 vssseg3e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vssseg3e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vssseg3e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg3e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vssseg3e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 12.00 vssseg3e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg3e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg3e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 256.00 vssseg4e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vssseg4e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg4e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg4e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg4e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 16.00 vssseg4e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg4e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg4e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 40.00 vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 80.00 vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 160.00 vssseg5e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vssseg5e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 40.00 vssseg5e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 80.00 vssseg5e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vssseg5e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 40.00 vssseg5e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg5e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 20.00 vssseg5e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 192.00 vssseg6e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg6e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vssseg6e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 96.00 vssseg6e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg6e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 48.00 vssseg6e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg6e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 24.00 vssseg6e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 56.00 vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 112.00 vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 224.00 vssseg7e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vssseg7e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 56.00 vssseg7e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 112.00 vssseg7e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vssseg7e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 56.00 vssseg7e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg7e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 28.00 vssseg7e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e8.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 256.00 vssseg8e8.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg8e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg8e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e16.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 128.00 vssseg8e16.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg8e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e32.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 64.00 vssseg8e32.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vssseg8e64.v v8, (a0), a1 +# CHECK-NEXT: - - - - - - - 32.00 vssseg8e64.v v8, (a0), a1 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg2e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg2e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vlseg2e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg2e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg2e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg2e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 8.00 vlseg2e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg2e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg2e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg2e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg3e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg3e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 192.00 vlseg3e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg3e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg3e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg3e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 12.00 vlseg3e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg3e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg3e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg4e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg4e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vlseg4e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg4e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg4e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg4e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 16.00 vlseg4e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg4e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg4e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vlseg5e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 80.00 vlseg5e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 160.00 vlseg5e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vlseg5e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 80.00 vlseg5e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 40.00 vlseg5e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg5e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 20.00 vlseg5e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg6e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg6e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 192.00 vlseg6e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg6e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 96.00 vlseg6e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 48.00 vlseg6e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg6e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 24.00 vlseg6e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vlseg7e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 112.00 vlseg7e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 224.00 vlseg7e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vlseg7e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 112.00 vlseg7e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 56.00 vlseg7e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg7e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 28.00 vlseg7e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg8e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg8e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e8ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 256.00 vlseg8e8ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg8e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e16ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 128.00 vlseg8e16ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e32ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 64.00 vlseg8e32ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vlseg8e64ff.v v8, (a0) +# CHECK-NEXT: - - - - - - - 32.00 vlseg8e64ff.v v8, (a0) # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vluxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vluxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vluxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vluxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vluxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vluxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vluxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vluxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vluxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vluxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vluxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 160.00 vluxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vluxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vluxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vluxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vluxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vluxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vluxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vluxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vluxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vluxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vluxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vluxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 224.00 vluxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vluxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vluxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vluxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vluxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vluxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vluxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vluxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vluxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vluxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vluxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vluxseg8ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vloxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vloxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vloxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vloxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vloxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vloxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vloxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vloxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vloxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vloxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vloxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 160.00 vloxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vloxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vloxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vloxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vloxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vloxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vloxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vloxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vloxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vloxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vloxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vloxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 224.00 vloxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vloxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vloxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vloxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vloxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vloxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vloxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vloxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vloxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vloxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vloxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vloxseg8ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsuxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsuxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsuxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vsuxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsuxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsuxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsuxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsuxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vsuxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsuxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsuxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 160.00 vsuxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsuxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vsuxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vsuxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsuxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vsuxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsuxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vsuxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsuxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsuxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsuxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsuxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 224.00 vsuxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsuxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vsuxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vsuxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsuxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vsuxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsuxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vsuxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsuxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsuxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsuxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsuxseg8ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vsoxseg2ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsoxseg2ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg2ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 8.00 vsoxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg2ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg2ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vsoxseg3ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsoxseg3ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsoxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsoxseg3ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 12.00 vsoxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg3ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg3ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vsoxseg4ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsoxseg4ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg4ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 16.00 vsoxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg4ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg4ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 160.00 vsoxseg5ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsoxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vsoxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 80.00 vsoxseg5ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsoxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 40.00 vsoxseg5ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg5ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 20.00 vsoxseg5ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 192.00 vsoxseg6ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsoxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 96.00 vsoxseg6ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 48.00 vsoxseg6ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg6ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 24.00 vsoxseg6ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 224.00 vsoxseg7ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsoxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vsoxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 112.00 vsoxseg7ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsoxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 56.00 vsoxseg7ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg7ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 28.00 vsoxseg7ei64.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei8.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 256.00 vsoxseg8ei8.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei16.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 128.00 vsoxseg8ei16.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei32.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 64.00 vsoxseg8ei32.v v8, (a0), v16 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, tu, mu -# CHECK-NEXT: - - - - - - - 1.00 vsoxseg8ei64.v v8, (a0), v16 +# CHECK-NEXT: - - - - - - - 32.00 vsoxseg8ei64.v v8, (a0), v16 diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlxe-vsxe.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlxe-vsxe.s index 8a8503ea5941a..ad7904c55c501 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlxe-vsxe.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/vlxe-vsxe.s @@ -216,181 +216,181 @@ vsoxei64.v v8, (a0), v0 # CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI8_V vluxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLUXEI8_V vluxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI8_V vluxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLUXEI8_V vluxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI8_V vluxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXEI8_V vluxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI8_V vluxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXEI8_V vluxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI8_V vluxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXEI8_V vluxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI8_V vluxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXEI8_V vluxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI8_V vluxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLUXEI8_V vluxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI16_V vluxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLUXEI16_V vluxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI16_V vluxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLUXEI16_V vluxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI16_V vluxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXEI16_V vluxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI16_V vluxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXEI16_V vluxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI16_V vluxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXEI16_V vluxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI16_V vluxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLUXEI16_V vluxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI32_V vluxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLUXEI32_V vluxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI32_V vluxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLUXEI32_V vluxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI32_V vluxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXEI32_V vluxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI32_V vluxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXEI32_V vluxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI32_V vluxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLUXEI32_V vluxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI64_V vluxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLUXEI64_V vluxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI64_V vluxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLUXEI64_V vluxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI64_V vluxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLUXEI64_V vluxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLUXEI64_V vluxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLUXEI64_V vluxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI8_V vloxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLOXEI8_V vloxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI8_V vloxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLOXEI8_V vloxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI8_V vloxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXEI8_V vloxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI8_V vloxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXEI8_V vloxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI8_V vloxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXEI8_V vloxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI8_V vloxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXEI8_V vloxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI8_V vloxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VLOXEI8_V vloxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI16_V vloxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLOXEI16_V vloxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI16_V vloxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLOXEI16_V vloxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI16_V vloxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXEI16_V vloxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI16_V vloxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXEI16_V vloxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI16_V vloxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXEI16_V vloxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI16_V vloxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VLOXEI16_V vloxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI32_V vloxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLOXEI32_V vloxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI32_V vloxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLOXEI32_V vloxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI32_V vloxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXEI32_V vloxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI32_V vloxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXEI32_V vloxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI32_V vloxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VLOXEI32_V vloxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI64_V vloxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VLOXEI64_V vloxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI64_V vloxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VLOXEI64_V vloxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI64_V vloxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VLOXEI64_V vloxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VLOXEI64_V vloxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VLOXEI64_V vloxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI8_V vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSUXEI8_V vsuxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI8_V vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSUXEI8_V vsuxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI8_V vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXEI8_V vsuxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI8_V vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXEI8_V vsuxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI8_V vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXEI8_V vsuxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI8_V vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXEI8_V vsuxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI8_V vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSUXEI8_V vsuxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI16_V vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSUXEI16_V vsuxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI16_V vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSUXEI16_V vsuxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI16_V vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXEI16_V vsuxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI16_V vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXEI16_V vsuxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI16_V vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXEI16_V vsuxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI16_V vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSUXEI16_V vsuxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI32_V vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSUXEI32_V vsuxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI32_V vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSUXEI32_V vsuxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI32_V vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXEI32_V vsuxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI32_V vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXEI32_V vsuxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI32_V vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSUXEI32_V vsuxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI64_V vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSUXEI64_V vsuxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI64_V vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSUXEI64_V vsuxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI64_V vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSUXEI64_V vsuxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSUXEI64_V vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSUXEI64_V vsuxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI8_V vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSOXEI8_V vsoxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI8_V vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSOXEI8_V vsoxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI8_V vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXEI8_V vsoxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI8_V vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXEI8_V vsoxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI8_V vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXEI8_V vsoxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI8_V vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXEI8_V vsoxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI8_V vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: 1 128 128.00 * 128 SMX60_VLS[128] VSOXEI8_V vsoxei8.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI16_V vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSOXEI16_V vsoxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI16_V vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSOXEI16_V vsoxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI16_V vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXEI16_V vsoxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI16_V vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXEI16_V vsoxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI16_V vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXEI16_V vsoxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI16_V vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: 1 64 64.00 * 64 SMX60_VLS[64] VSOXEI16_V vsoxei16.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI32_V vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSOXEI32_V vsoxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI32_V vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSOXEI32_V vsoxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI32_V vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXEI32_V vsoxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI32_V vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXEI32_V vsoxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI32_V vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: 1 32 32.00 * 32 SMX60_VLS[32] VSOXEI32_V vsoxei32.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI64_V vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 2 2.00 * 2 SMX60_VLS[2] VSOXEI64_V vsoxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI64_V vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 4 4.00 * 4 SMX60_VLS[4] VSOXEI64_V vsoxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI64_V vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 8 8.00 * 8 SMX60_VLS[8] VSOXEI64_V vsoxei64.v v8, (a0), v0 # CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: 1 1 1.00 * 1 SMX60_VLS VSOXEI64_V vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: 1 16 16.00 * 16 SMX60_VLS[16] VSOXEI64_V vsoxei64.v v8, (a0), v0 # CHECK: Resources: # CHECK-NEXT: [0] - SMX60_FP @@ -404,183 +404,183 @@ vsoxei64.v v8, (a0), v0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] -# CHECK-NEXT: - 88.00 - - - - - 88.00 +# CHECK-NEXT: - 88.00 - - - - - 1888.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vluxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vluxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vluxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vluxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vluxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vluxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 128.00 vluxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vluxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vluxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vluxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vluxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vluxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vluxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vluxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vluxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vluxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vluxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vluxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vluxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vluxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vluxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vluxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vluxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vloxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vloxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vloxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vloxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vloxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vloxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 128.00 vloxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vloxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vloxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vloxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vloxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vloxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vloxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vloxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vloxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vloxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vloxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vloxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vloxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vloxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vloxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vloxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vloxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsuxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsuxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsuxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsuxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vsuxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vsuxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 128.00 vsuxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsuxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsuxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsuxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsuxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vsuxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vsuxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsuxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsuxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsuxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsuxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vsuxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsuxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsuxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsuxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsuxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsuxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsoxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsoxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsoxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsoxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vsoxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vsoxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e8, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei8.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 128.00 vsoxei8.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsoxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsoxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsoxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsoxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vsoxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e16, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei16.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 64.00 vsoxei16.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, mf2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsoxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsoxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsoxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsoxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e32, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei32.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 32.00 vsoxei32.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m1, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 2.00 vsoxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m2, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 4.00 vsoxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m4, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 8.00 vsoxei64.v v8, (a0), v0 # CHECK-NEXT: - 1.00 - - - - - - vsetvli zero, zero, e64, m8, ta, ma -# CHECK-NEXT: - - - - - - - 1.00 vsoxei64.v v8, (a0), v0 +# CHECK-NEXT: - - - - - - - 16.00 vsoxei64.v v8, (a0), v0 diff --git a/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe b/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe new file mode 100755 index 0000000000000..c4b8af0bf1f2a Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe differ diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test new file mode 100644 index 0000000000000..b96ae9018dae1 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test @@ -0,0 +1,37 @@ +; RUN: rm -rf %t +; RUN: mkdir -p %t +; RUN: cd %t + +; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof + +; Test --load-function-from-symbol=0 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB + +; CHECK-NO-LOAD-SYMTAB: warning: Loading of DWARF info completed, but no binary functions have been retrieved. + +; Test --load-function-from-symbol=1 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB + +; CHECK-LOAD-SYMTAB: main:2:1 +; CHECK-LOAD-SYMTAB-NEXT: 1: 1 +; CHECK-LOAD-SYMTAB-NEXT: 2: 1 foo:1 +; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 281479271677951 +; CHECK-LOAD-SYMTAB-NEXT: foo:0:0 +; CHECK-LOAD-SYMTAB-NEXT: 1: 0 +; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 4294967295 + +; Build instructions: +; missing-dwarf.o: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g -o missing-dwarf.o -c +; missing-dwarf.exe: clang -fdebug-compilation-dir=. missing-dwarf.o -o missing-dwarf.exe -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g + +; Source code: + +int foo() { + return 1; +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp index 7ba17e5b82095..6d4989e278fc1 100644 --- a/llvm/tools/llc/NewPMDriver.cpp +++ b/llvm/tools/llc/NewPMDriver.cpp @@ -14,8 +14,10 @@ #include "NewPMDriver.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/CodeGen/LibcallLoweringInfo.h" #include "llvm/CodeGen/MIRParser/MIRParser.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -136,6 +138,16 @@ int llvm::compileModuleWithNewPM( SI.registerCallbacks(PIC, &MAM); FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); }); + + MAM.registerPass([&] { + const TargetOptions &Options = Target->Options; + return RuntimeLibraryAnalysis( + M->getTargetTriple(), Target->Options.ExceptionModel, + Target->Options.FloatABIType, Target->Options.EABIVersion, + Options.MCOptions.ABIName, Target->Options.VecLib); + }); + MAM.registerPass([&] { return LibcallLoweringModuleAnalysis(); }); + MAM.registerPass([&] { return MachineModuleAnalysis(MMI); }); ModulePassManager MPM; diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index ad31a0ec63387..613780ecbfb40 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/CommandFlags.h" #include "llvm/CodeGen/LinkAllAsmWriterComponents.h" @@ -727,6 +728,10 @@ static int compileModule(char **argv, LLVMContext &Context, // Build up all of the passes that we want to do to the module. legacy::PassManager PM; PM.add(new TargetLibraryInfoWrapperPass(TLII)); + PM.add(new RuntimeLibraryInfoWrapper( + M->getTargetTriple(), Target->Options.ExceptionModel, + Target->Options.FloatABIType, Target->Options.EABIVersion, + Options.MCOptions.ABIName, Target->Options.VecLib)); { raw_pwrite_stream *OS = &Out->os(); diff --git a/llvm/tools/llvm-profgen/Options.h b/llvm/tools/llvm-profgen/Options.h index f94cf9118c06a..b2c941fb01945 100644 --- a/llvm/tools/llvm-profgen/Options.h +++ b/llvm/tools/llvm-profgen/Options.h @@ -22,6 +22,7 @@ extern cl::opt ShowDetailedWarning; extern cl::opt InferMissingFrames; extern cl::opt EnableCSPreInliner; extern cl::opt UseContextCostForPreInliner; +extern cl::opt LoadFunctionFromSymbol; } // end namespace llvm diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 183b248a72320..1dc59321fd91f 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -1284,6 +1284,7 @@ void PerfScriptReader::warnInvalidRange() { uint64_t TotalRangeNum = 0; uint64_t InstNotBoundary = 0; uint64_t UnmatchedRange = 0; + uint64_t RecoveredRange = 0; uint64_t RangeCrossFunc = 0; uint64_t BogusRange = 0; @@ -1309,6 +1310,9 @@ void PerfScriptReader::warnInvalidRange() { continue; } + if (FRange->Func->NameStatus != DwarfNameStatus::Matched) + RecoveredRange += I.second; + if (EndAddress >= FRange->EndAddress) { RangeCrossFunc += I.second; WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg); @@ -1328,6 +1332,9 @@ void PerfScriptReader::warnInvalidRange() { emitWarningSummary( UnmatchedRange, TotalRangeNum, "of samples are from ranges that do not belong to any functions."); + emitWarningSummary(RecoveredRange, TotalRangeNum, + "of samples are from ranges that belong to functions " + "recovered from symbol table."); emitWarningSummary( RangeCrossFunc, TotalRangeNum, "of samples are from ranges that do cross function boundaries."); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 3b875c5de3c09..33931f3ef9934 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -503,8 +503,11 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) { void ProfileGenerator::generateProfile() { collectProfiledFunctions(); - if (Binary->usePseudoProbes()) + if (Binary->usePseudoProbes()) { Binary->decodePseudoProbe(); + if (LoadFunctionFromSymbol) + Binary->loadSymbolsFromPseudoProbe(); + } if (SampleCounters) { if (Binary->usePseudoProbes()) { @@ -732,6 +735,14 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) { if (!FRange || !FRange->IsFuncEntry) return StringRef(); + // DWARF and symbol table may have mismatching function names. Instead, we'll + // try to use its pseudo probe name first. + if (Binary->usePseudoProbes()) { + auto FuncName = Binary->findPseudoProbeName(FRange->Func); + if (FuncName.size()) + return FunctionSamples::getCanonicalFnName(FuncName); + } + return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); } @@ -919,6 +930,8 @@ void CSProfileGenerator::generateProfile() { Binary->decodePseudoProbe(); if (InferMissingFrames) initializeMissingFrameInferrer(); + if (LoadFunctionFromSymbol) + Binary->loadSymbolsFromPseudoProbe(); } if (SampleCounters) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 96db6a714572a..5d8d1ce186b90 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -37,6 +37,14 @@ cl::opt ShowSourceLocations("show-source-locations", cl::desc("Print source locations."), cl::cat(ProfGenCategory)); +cl::opt LoadFunctionFromSymbol( + "load-function-from-symbol", cl::init(true), + cl::desc( + "Gather additional binary function info from symbols (e.g. .symtab) in " + "case dwarf info is incomplete. Only support binaries in ELF format " + "with pseudo probe, for other formats, this flag will be a no-op."), + cl::cat(ProfGenCategory)); + static cl::opt ShowCanonicalFnName("show-canonical-fname", cl::desc("Print canonical function name."), @@ -257,6 +265,9 @@ void ProfiledBinary::load() { if (ShowDisassemblyOnly) decodePseudoProbe(Obj); + if (LoadFunctionFromSymbol && UsePseudoProbes) + loadSymbolsFromSymtab(Obj); + // Disassemble the text sections. disassemble(Obj); @@ -461,6 +472,13 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) { } else { for (auto *F : ProfiledFunctions) { GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName)); + // DWARF name might be broken when a DWARF32 .debug_str.dwo section + // execeeds 4GB. We expect symbol table to contain the correct function + // names which matches the pseudo probe. Adding back all the GUIDs if + // possible. + auto AltGUIDs = AlternativeFunctionGUIDs.equal_range(F); + for (const auto &[_, Func] : make_range(AltGUIDs)) + GuidFilter.insert(Func); for (auto &Range : F->Ranges) { auto GUIDs = StartAddrToSymMap.equal_range(Range.first); for (const auto &[StartAddr, Func] : make_range(GUIDs)) @@ -522,7 +540,9 @@ void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange, // Set IsFuncEntry to ture if there is only one range in the function or the // RangeSymName from ELF is equal to its DWARF-based function name. if (FuncRange->Func->Ranges.size() == 1 || - (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName)) + (!FuncRange->IsFuncEntry && + (FuncRange->getFuncName() == RangeSymName || + FuncRange->Func->NameStatus != DwarfNameStatus::Matched))) FuncRange->IsFuncEntry = true; } @@ -604,13 +624,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, // Record potential call targets for tail frame inference later-on. if (InferMissingFrames && FRange) { uint64_t Target = 0; - MIA->evaluateBranch(Inst, Address, Size, Target); + bool Err = MIA->evaluateBranch(Inst, Address, Size, Target); if (MCDesc.isCall()) { // Indirect call targets are unknown at this point. Recording the // unknown target (zero) for further LBR-based refinement. MissingContextInferrer->CallEdges[Address].insert(Target); } else if (MCDesc.isUnconditionalBranch()) { - assert(Target && + assert(Err && "target should be known for unconditional direct branch"); // Any inter-function unconditional jump is considered tail call at // this point. This is not 100% accurate and could further be @@ -820,6 +840,100 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { } } +void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) { + // Load binary functions from symbol table when Debug info is incomplete. + // Strip the internal suffixes which are not reflected in the DWARF info. + const SmallVector Suffixes( + {// Internal suffixes from CoroSplit pass + ".cleanup", ".destroy", ".resume", + // Internal suffixes from Bolt + ".cold", ".warm", + // Compiler/LTO internal + ".llvm.", ".part.", ".isra.", ".constprop.", ".lto_priv."}); + StringRef FileName = Obj->getFileName(); + // Only apply this to ELF binary. e.g. COFF file format doesn't have `size` + // field in the symbol table. + bool IsELFObject = isa(Obj); + if (!IsELFObject) + return; + for (const SymbolRef &Symbol : Obj->symbols()) { + const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); + const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName); + const StringRef Name = unwrapOrError(Symbol.getName(), FileName); + uint64_t Size = 0; + if (LLVM_LIKELY(IsELFObject)) { + ELFSymbolRef ElfSymbol(Symbol); + Size = ElfSymbol.getSize(); + } + + if (Size == 0 || Type != SymbolRef::ST_Function) + continue; + + const uint64_t EndAddr = StartAddr + Size; + const StringRef SymName = + FunctionSamples::getCanonicalFnName(Name, Suffixes); + assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress() && + "Function range is invalid."); + + auto Range = findFuncRange(StartAddr); + if (!Range) { + assert(findFuncRange(EndAddr - 1) == nullptr && + "Function range overlaps with existing functions."); + // Function from symbol table not found previously in DWARF, store ranges. + auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); + auto &Func = Ret.first->second; + if (Ret.second) { + Func.FuncName = Ret.first->first; + HashBinaryFunctions[Function::getGUIDAssumingExternalLinkage(SymName)] = + &Func; + } + + Func.NameStatus = DwarfNameStatus::Missing; + Func.Ranges.emplace_back(StartAddr, EndAddr); + + auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange()); + FuncRange &FRange = R.first->second; + + FRange.Func = &Func; + FRange.StartAddress = StartAddr; + FRange.EndAddress = EndAddr; + + } else if (SymName != Range->getFuncName()) { + // Function range already found from DWARF, but the symbol name from + // symbol table is inconsistent with debug info. Log this discrepancy and + // the alternative function GUID. + if (ShowDetailedWarning) + WithColor::warning() + << "Conflicting name for symbol " << Name << " with range (" + << format("%8" PRIx64, StartAddr) << ", " + << format("%8" PRIx64, EndAddr) << ")" + << ", but the DWARF symbol " << Range->getFuncName() + << " indicates an overlapping range (" + << format("%8" PRIx64, Range->StartAddress) << ", " + << format("%8" PRIx64, Range->EndAddress) << ")\n"; + + assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress && + "Mismatched function range"); + + Range->Func->NameStatus = DwarfNameStatus::Mismatch; + AlternativeFunctionGUIDs.emplace( + Range->Func, Function::getGUIDAssumingExternalLinkage(SymName)); + + } else if (StartAddr != Range->StartAddress && + EndAddr != Range->EndAddress) { + // Function already found in DWARF, but the address range from symbol + // table conflicts/overlaps with the debug info. + WithColor::warning() << "Conflicting range for symbol " << Name + << " with range (" << format("%8" PRIx64, StartAddr) + << ", " << format("%8" PRIx64, EndAddr) << ")" + << ", but the DWARF symbol " << Range->getFuncName() + << " indicates another range (" + << format("%8" PRIx64, Range->StartAddress) << ", " + << format("%8" PRIx64, Range->EndAddress) << ")\n"; + } + } +} + void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { for (const auto &DieInfo : CompilationUnit.dies()) { llvm::DWARFDie Die(&CompilationUnit, &DieInfo); @@ -1034,6 +1148,58 @@ void ProfiledBinary::computeInlinedContextSizeForFunc( } } +void ProfiledBinary::loadSymbolsFromPseudoProbe() { + if (!UsePseudoProbes) + return; + + const AddressProbesMap &Address2ProbesMap = getAddress2ProbesMap(); + for (auto *Func : ProfiledFunctions) { + if (Func->NameStatus != DwarfNameStatus::Mismatch) + continue; + for (auto &[StartAddr, EndAddr] : Func->Ranges) { + auto Range = findFuncRangeForStartAddr(StartAddr); + if (!Range->IsFuncEntry) + continue; + const auto &Probe = Address2ProbesMap.find(StartAddr, EndAddr); + if (Probe.begin() != Probe.end()) { + const MCDecodedPseudoProbeInlineTree *InlineTreeNode = + Probe.begin()->get().getInlineTreeNode(); + while (!InlineTreeNode->isTopLevelFunc()) + InlineTreeNode = static_cast( + InlineTreeNode->Parent); + + auto TopLevelProbes = InlineTreeNode->getProbes(); + auto TopProbe = TopLevelProbes.begin(); + assert(TopProbe != TopLevelProbes.end() && + TopProbe->getAddress() >= StartAddr && + TopProbe->getAddress() < EndAddr && + "Top level pseudo probe does not match function range"); + + const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid); + auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName); + if (!Ret.second && Ret.first->second != ProbeDesc->FuncName && + ShowDetailedWarning) + WithColor::warning() + << "Mismatched pseudo probe names in function " << Func->FuncName + << " at range: (" << format("%8" PRIx64, StartAddr) << ", " + << format("%8" PRIx64, EndAddr) << "). " + << "The previously found pseudo probe name is " + << Ret.first->second << " but it conflicts with name " + << ProbeDesc->FuncName + << " This likely indicates a DWARF error that produces " + "conflicting symbols at the same starting address.\n"; + } + } + } +} + +StringRef ProfiledBinary::findPseudoProbeName(const BinaryFunction *Func) { + auto ProbeName = PseudoProbeNames.find(Func); + if (ProbeName == PseudoProbeNames.end()) + return StringRef(); + return ProbeName->second; +} + void ProfiledBinary::inferMissingFrames( const SmallVectorImpl &Context, SmallVectorImpl &NewContext) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 5a814b7dbd52d..1a83f8221df11 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -72,10 +72,22 @@ enum SpecialFrameAddr { using RangesTy = std::vector>; +enum DwarfNameStatus { + // Dwarf name matches with the symbol table (or symbol table just doesn't have + // this entry) + Matched = 0, + // Dwarf name is missing, but we fixed it with the name from symbol table + Missing = 1, + // Symbol table has different names on this. Log these GUIDs in + // AlternativeFunctionGUIDs + Mismatch = 2, +}; + struct BinaryFunction { StringRef FuncName; // End of range is an exclusive bound. RangesTy Ranges; + DwarfNameStatus NameStatus = DwarfNameStatus::Matched; uint64_t getFuncSize() { uint64_t Sum = 0; @@ -231,6 +243,14 @@ class ProfiledBinary { // GUID to symbol start address map DenseMap SymbolStartAddrs; + // Binary function to GUID mapping that stores the alternative names in symbol + // table, despite the original name from DWARF info + std::unordered_multimap + AlternativeFunctionGUIDs; + + // Mapping of profiled binary function to its pseudo probe name + std::unordered_map PseudoProbeNames; + // These maps are for temporary use of warning diagnosis. DenseSet AddrsWithMultipleSymbols; DenseSet> AddrsWithInvalidInstruction; @@ -356,6 +376,9 @@ class ProfiledBinary { // Create symbol to its start address mapping. void populateSymbolAddressList(const object::ObjectFile *O); + // Load functions from its symbol table (when DWARF info is missing). + void loadSymbolsFromSymtab(const object::ObjectFile *O); + // A function may be spilt into multiple non-continuous address ranges. We use // this to set whether start a function range is the real entry of the // function and also set false to the non-function label. @@ -599,6 +622,10 @@ class ProfiledBinary { void computeInlinedContextSizeForFunc(const BinaryFunction *Func); + void loadSymbolsFromPseudoProbe(); + + StringRef findPseudoProbeName(const BinaryFunction *Func); + const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { return ProbeDecoder.getCallProbeForAddr(Address); } diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 01d7ac8e3f959..3209b652b44b4 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/CodeGen/LibcallLoweringInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" @@ -352,9 +353,9 @@ static void registerEPCallbacks(PassBuilder &PB) { bool llvm::runPassPipeline( StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII, - RTLIB::RuntimeLibcallsInfo &RTLCI, ToolOutputFile *Out, - ToolOutputFile *ThinLTOLinkOut, ToolOutputFile *OptRemarkFile, - StringRef PassPipeline, ArrayRef PassPlugins, + ToolOutputFile *Out, ToolOutputFile *ThinLTOLinkOut, + ToolOutputFile *OptRemarkFile, StringRef PassPipeline, + ArrayRef PassPlugins, ArrayRef> PassBuilderCallbacks, OutputKind OK, VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, @@ -410,14 +411,24 @@ bool llvm::runPassPipeline( P->CSAction = PGOOptions::CSIRUse; } } - if (TM) - TM->setPGOOption(P); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModuleAnalysisManager MAM; - MAM.registerPass([&] { return RuntimeLibraryAnalysis(std::move(RTLCI)); }); + + if (TM) { + TM->setPGOOption(P); + + MAM.registerPass([&] { + const TargetOptions &Options = TM->Options; + return RuntimeLibraryAnalysis(M.getTargetTriple(), Options.ExceptionModel, + Options.FloatABIType, Options.EABIVersion, + Options.MCOptions.ABIName, Options.VecLib); + }); + + MAM.registerPass([&] { return LibcallLoweringModuleAnalysis(); }); + } PassInstrumentationCallbacks PIC; PrintPassOptions PrintPassOpts; diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h index 31da61b9c0cae..042d5d4bbfe47 100644 --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -31,10 +31,6 @@ class TargetMachine; class ToolOutputFile; class TargetLibraryInfoImpl; -namespace RTLIB { -struct RuntimeLibcallsInfo; -} - extern cl::opt DebugifyEach; extern cl::opt DebugifyExport; @@ -71,9 +67,9 @@ void printPasses(raw_ostream &OS); /// nullptr. bool runPassPipeline( StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII, - RTLIB::RuntimeLibcallsInfo &RTLCI, ToolOutputFile *Out, - ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile, - StringRef PassPipeline, ArrayRef PassPlugins, + ToolOutputFile *Out, ToolOutputFile *ThinLinkOut, + ToolOutputFile *OptRemarkFile, StringRef PassPipeline, + ArrayRef PassPlugins, ArrayRef> PassBuilderCallbacks, opt_tool::OutputKind OK, opt_tool::VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index f8be9f16aada6..ac318e6bc1eb4 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -657,6 +657,13 @@ optMain(int argc, char **argv, return 1; } + TargetOptions CodeGenFlagsOptions; + const TargetOptions *Options = TM ? &TM->Options : &CodeGenFlagsOptions; + if (!TM) { + CodeGenFlagsOptions = + codegen::InitTargetOptionsFromCodeGenFlags(ModuleTriple); + } + // Override function attributes based on CPUStr, FeaturesStr, and command line // flags. codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M); @@ -674,13 +681,8 @@ optMain(int argc, char **argv, M->addModuleFlag(Module::Error, "UnifiedLTO", 1); } - VectorLibrary VecLib = codegen::getVectorLibrary(); // Add an appropriate TargetLibraryInfo pass for the module's triple. - TargetLibraryInfoImpl TLII(ModuleTriple, VecLib); - - RTLIB::RuntimeLibcallsInfo RTLCI(ModuleTriple, codegen::getExceptionModel(), - codegen::getFloatABIForCalls(), - codegen::getEABIVersion(), ABIName, VecLib); + TargetLibraryInfoImpl TLII(ModuleTriple, Options->VecLib); // The -disable-simplify-libcalls flag actually disables all builtin optzns. if (DisableSimplifyLibCalls) @@ -756,7 +758,7 @@ optMain(int argc, char **argv, // string. Hand off the rest of the functionality to the new code for that // layer. if (!runPassPipeline( - argv[0], *M, TM.get(), &TLII, RTLCI, Out.get(), ThinLinkOut.get(), + argv[0], *M, TM.get(), &TLII, Out.get(), ThinLinkOut.get(), RemarksFile.get(), Pipeline, PluginList, PassBuilderCallbacks, OK, VK, /* ShouldPreserveAssemblyUseListOrder */ false, /* ShouldPreserveBitcodeUseListOrder */ true, EmitSummaryIndex, @@ -804,6 +806,9 @@ optMain(int argc, char **argv, (VerifyDebugInfoPreserve && !VerifyEachDebugInfoPreserve); Passes.add(new TargetLibraryInfoWrapperPass(TLII)); + Passes.add(new RuntimeLibraryInfoWrapper( + ModuleTriple, Options->ExceptionModel, Options->FloatABIType, + Options->EABIVersion, Options->MCOptions.ABIName, Options->VecLib)); // Add internal analysis passes from the target machine. Passes.add(createTargetTransformInfoWrapperPass(TM ? TM->getTargetIRAnalysis() diff --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h index 20a95dd2f6aa6..e3139c931ddd6 100644 --- a/llvm/unittests/CAS/CASTestConfig.h +++ b/llvm/unittests/CAS/CASTestConfig.h @@ -74,7 +74,8 @@ class CASTest void SetUp() override { #ifdef _WIN32 - if (llvm::GetWindowsOSVersion() < llvm::VersionTuple(10, 0, 0, 17763)) + // Temporarily disable CAS tests on pre windows 11 OS. + if (llvm::GetWindowsOSVersion() < llvm::VersionTuple(10, 0, 0, 22000)) GTEST_SKIP() << "CAS tests skipped on older windows version"; #endif NextCASIndex = 0; diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 03e5294b03860..37ee18194bb55 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -131,6 +131,7 @@ copy("Headers") { "altivec.h", "amdgpuintrin.h", "ammintrin.h", + "amo.h", "amxavx512intrin.h", "amxcomplexintrin.h", "amxfp16intrin.h", diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index bc4047cb3be52..97780c27fa44c 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -97,6 +97,7 @@ static_library("Support") { "FormattedStream.cpp", "GlobPattern.cpp", "GraphWriter.cpp", + "Hash.cpp", "HexagonAttributeParser.cpp", "HexagonAttributes.cpp", "InitLLVM.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn index 065d33dd7a3bc..b8974ddbaa6e4 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn @@ -61,6 +61,8 @@ static_library("LLVMHexagonCodeGen") { "HexagonGenMemAbsolute.cpp", "HexagonGenMux.cpp", "HexagonGenPredicate.cpp", + "HexagonGenWideningVecFloatInstr.cpp", + "HexagonGenWideningVecInstr.cpp", "HexagonHardwareLoops.cpp", "HexagonHazardRecognizer.cpp", "HexagonISelDAGToDAG.cpp", @@ -77,6 +79,7 @@ static_library("LLVMHexagonCodeGen") { "HexagonMask.cpp", "HexagonNewValueJump.cpp", "HexagonOptAddrMode.cpp", + "HexagonOptShuffleVector.cpp", "HexagonOptimizeSZextends.cpp", "HexagonPeephole.cpp", "HexagonQFPOptimizer.cpp", diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 77d1a6f8d53b5..fcfe959709f09 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2002,8 +2002,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", corresponding `device_type` attributes must be modified as well. }]; - let arguments = (ins - Variadic:$asyncOperands, + let arguments = (ins Variadic:$asyncOperands, OptionalAttr:$asyncOperandsDeviceType, OptionalAttr:$asyncOnly, Variadic:$waitOperands, @@ -2018,12 +2017,12 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", OptionalAttr:$numWorkersDeviceType, Variadic:$vectorLength, OptionalAttr:$vectorLengthDeviceType, - Optional:$ifCond, - Optional:$selfCond, - UnitAttr:$selfAttr, + Optional:$ifCond, Optional:$selfCond, UnitAttr:$selfAttr, + Variadic:$reductionOperands, + Variadic:$privateOperands, + Variadic:$firstprivateOperands, Variadic:$dataClauseOperands, - OptionalAttr:$defaultAttr, - UnitAttr:$combined); + OptionalAttr:$defaultAttr, UnitAttr:$combined); let regions = (region AnyRegion:$region); @@ -2111,6 +2110,18 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", /// types. void addWaitOperands(MLIRContext *, bool hasDevnum, mlir::ValueRange, llvm::ArrayRef); + + /// Adds a private clause variable to this operation, including its recipe. + void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe); + /// Adds a firstprivate clause variable to this operation, including its + /// recipe. + void addFirstPrivatization(MLIRContext *, mlir::acc::FirstprivateOp op, + mlir::acc::FirstprivateRecipeOp recipe); + /// Adds a reduction clause variable to this operation, including its + /// recipe. + void addReduction(MLIRContext *, mlir::acc::ReductionOp op, + mlir::acc::ReductionRecipeOp recipe); }]; let assemblyFormat = [{ @@ -2119,10 +2130,12 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` | `async` `` custom($asyncOperands, type($asyncOperands), $asyncOperandsDeviceType, $asyncOnly) + | `firstprivate` `(` $firstprivateOperands `:` type($firstprivateOperands) `)` | `num_gangs` `(` custom($numGangs, type($numGangs), $numGangsDeviceType, $numGangsSegments) `)` | `num_workers` `(` custom($numWorkers, type($numWorkers), $numWorkersDeviceType) `)` + | `private` `(` $privateOperands `:` type($privateOperands) `)` | `vector_length` `(` custom($vectorLength, type($vectorLength), $vectorLengthDeviceType) `)` | `wait` `` custom($waitOperands, type($waitOperands), @@ -2130,6 +2143,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", $waitOnly) | `self` `(` $selfCond `)` | `if` `(` $ifCond `)` + | `reduction` `(` $reductionOperands `:` type($reductionOperands) `)` ) $region attr-dict-with-keyword }]; diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 43ebcaa03a470..d8ed46c2820fe 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -2605,7 +2605,9 @@ def Vector_ConstantMaskOp : } def Vector_CreateMaskOp : - Vector_Op<"create_mask", [Pure]>, + Vector_Op<"create_mask", [Pure, + DeclareOpInterfaceMethods + ]>, Arguments<(ins Variadic:$operands)>, Results<(outs VectorOfAnyRankOf<[I1]>)> { let summary = "creates a vector mask"; diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 7039bbe1d11ec..9235f89b7969a 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -2675,6 +2675,27 @@ LogicalResult acc::KernelsOp::verify() { return checkDataOperands(*this, getDataClauseOperands()); } +void acc::KernelsOp::addPrivatization(MLIRContext *context, + mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe) { + op.setRecipeAttr(mlir::SymbolRefAttr::get(context, recipe.getSymName())); + getPrivateOperandsMutable().append(op.getResult()); +} + +void acc::KernelsOp::addFirstPrivatization( + MLIRContext *context, mlir::acc::FirstprivateOp op, + mlir::acc::FirstprivateRecipeOp recipe) { + op.setRecipeAttr(mlir::SymbolRefAttr::get(context, recipe.getSymName())); + getFirstprivateOperandsMutable().append(op.getResult()); +} + +void acc::KernelsOp::addReduction(MLIRContext *context, + mlir::acc::ReductionOp op, + mlir::acc::ReductionRecipeOp recipe) { + op.setRecipeAttr(mlir::SymbolRefAttr::get(context, recipe.getSymName())); + getReductionOperandsMutable().append(op.getResult()); +} + void acc::KernelsOp::addNumWorkersOperand( MLIRContext *context, mlir::Value newValue, llvm::ArrayRef effectiveDeviceTypes) { diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp index b60f80534bfb6..462bd8c3dc4a6 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp @@ -1003,6 +1003,97 @@ struct UnrollFromElements : OpRewritePattern { vector::UnrollVectorOptions options; }; +/// This pattern unrolls `vector.create_mask` operations into smaller mask +/// operations based on the target unroll shape. Each unrolled slice computes +/// its local mask size in each dimension (d) as: +/// min(max(originalMaskSize[d] - offset[d], 0), unrolledDimSize[d]). +/// Example: +/// Given a create_mask operation: +/// %0 = vector.create_mask %c6, %c10 : vector<8x16xi1> // mask first 6x10 +/// elements +/// +/// and a target unroll shape of <4x8>, the pattern produces: +/// +/// %false = arith.constant dense : vector<8x16xi1> +/// +/// Slice [0,0]: +/// mask size = min(max(6-0, 0), 4) x min(max(10-0, 0), 8) = 4x8 +/// %mask00 = vector.create_mask %c4, %c8 : vector<4x8xi1> +/// %r0 = vector.insert_strided_slice %mask00, %false [0, 0], [1, 1] +/// : vector<4x8xi1> into vector<8x16xi1> +/// Slice [0,8]: +/// mask size = min(max(6-0, 0), 4) x min(max(10-8, 0), 8) = 4x2 +/// %mask01 = vector.create_mask %c4, %c2 : vector<4x8xi1> +/// %r1 = vector.insert_strided_slice %mask01, %r0 [0, 8], [1, 1] +/// : vector<4x8xi1> into vector<8x16xi1> +/// Slice [4,0]: +/// mask size = min(max(6-4, 0), 4) x min(max(10-0, 0), 8) = 2x8 +/// %mask10 = vector.create_mask %c2, %c8 : vector<4x8xi1> +/// %r2 = vector.insert_strided_slice %mask10, %r1 [4, 0], [1, 1] +/// : vector<4x8xi1> into vector<8x16xi1> +/// Slice [4,8]: +/// mask size = min(max(6-4, 0), 4) x min(max(10-8, 0), 8) = 2x2 +/// %mask11 = vector.create_mask %c2, %c2 : vector<4x8xi1> +/// %result = vector.insert_strided_slice %mask11, %r2 [4, 8], [1, 1] +/// : vector<4x8xi1> into vector<8x16xi1> +struct UnrollCreateMaskPattern : public OpRewritePattern { + UnrollCreateMaskPattern(MLIRContext *context, + const vector::UnrollVectorOptions &options, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + options(options) {} + + LogicalResult matchAndRewrite(vector::CreateMaskOp createMaskOp, + PatternRewriter &rewriter) const override { + auto targetShape = getTargetShape(options, createMaskOp); + if (!targetShape) + return failure(); + + VectorType resultType = createMaskOp.getVectorType(); + SmallVector originalSize = *createMaskOp.getShapeForUnroll(); + Location loc = createMaskOp.getLoc(); + + Value result = arith::ConstantOp::create(rewriter, loc, resultType, + rewriter.getZeroAttr(resultType)); + VectorType targetVectorType = + VectorType::get(*targetShape, rewriter.getI1Type()); + SmallVector strides(targetShape->size(), 1); + + // In each dimension (d), each unrolled vector computes its mask size as: + // min(max(originalMaskOperands[d] - offset[d], 0), unrolledDimSize[d]). + for (SmallVector offsets : + StaticTileOffsetRange(originalSize, *targetShape)) { + SmallVector unrolledOperands; + + for (auto [i, originalMaskOperand] : + llvm::enumerate(createMaskOp.getOperands())) { + Value offsetVal = + arith::ConstantIndexOp::create(rewriter, loc, offsets[i]); + Value adjustedMaskSize = rewriter.createOrFold( + loc, originalMaskOperand, offsetVal); + Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0); + Value unrolledDimSize = + arith::ConstantIndexOp::create(rewriter, loc, (*targetShape)[i]); + Value nonNegative = + rewriter.createOrFold(loc, adjustedMaskSize, zero); + Value unrolledOperand = rewriter.createOrFold( + loc, nonNegative, unrolledDimSize); + unrolledOperands.push_back(unrolledOperand); + } + + auto unrolledMask = rewriter.createOrFold( + loc, targetVectorType, unrolledOperands); + result = rewriter.createOrFold( + loc, unrolledMask, result, offsets, strides); + } + rewriter.replaceOp(createMaskOp, result); + return success(); + } + +private: + vector::UnrollVectorOptions options; +}; + /// Checks whether extractShape is a contiguous slice of shape. /// For extractShape to be contiguous in shape: /// 1) All but the leading dimension of extractShape and shape must match @@ -1202,8 +1293,9 @@ void mlir::vector::populateVectorUnrollPatterns( UnrollReductionPattern, UnrollMultiReductionPattern, UnrollTransposePattern, UnrollGatherPattern, UnrollLoadPattern, UnrollStorePattern, UnrollBroadcastPattern, UnrollFromElements, - UnrollToElements, UnrollStepPattern, UnrollShapeCastPattern>( - patterns.getContext(), options, benefit); + UnrollToElements, UnrollStepPattern, UnrollShapeCastPattern, + UnrollCreateMaskPattern>(patterns.getContext(), options, + benefit); } void mlir::vector::populateVectorToElementsUnrollPatterns( diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index e004a88261c78..5a1c20bcf5a24 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -731,6 +731,59 @@ func.func @testserialop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10 // ----- +// Test acc.kernels with private and firstprivate operands, similar to acc.serial. + +acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { +^bb0(%arg0: memref<10xf32>): + %0 = memref.alloc() : memref<10xf32> + acc.yield %0 : memref<10xf32> +} destroy { +^bb0(%arg0: memref<10xf32>): + memref.dealloc %arg0 : memref<10xf32> + acc.terminator +} + +acc.private.recipe @privatization_memref_10_10_f32 : memref<10x10xf32> init { +^bb0(%arg0: memref<10x10xf32>): + %1 = memref.alloc() : memref<10x10xf32> + acc.yield %1 : memref<10x10xf32> +} destroy { +^bb0(%arg0: memref<10x10xf32>): + memref.dealloc %arg0 : memref<10x10xf32> + acc.terminator +} + +acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init { +^bb0(%arg0: memref<10xf32>): + %2 = memref.alloca() : memref<10xf32> + acc.yield %2 : memref<10xf32> +} copy { +^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): + memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32> + acc.terminator +} destroy { +^bb0(%arg0: memref<10xf32>): + acc.terminator +} + +func.func @testkernelspriv(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { + %priv_a = acc.private varPtr(%a : memref<10xf32>) recipe(@privatization_memref_10_f32) -> memref<10xf32> + %priv_c = acc.private varPtr(%c : memref<10x10xf32>) recipe(@privatization_memref_10_10_f32) -> memref<10x10xf32> + %firstp = acc.firstprivate varPtr(%b : memref<10xf32>) varType(tensor<10xf32>) recipe(@firstprivatization_memref_10xf32) -> memref<10xf32> + acc.kernels firstprivate(%firstp : memref<10xf32>) private(%priv_a, %priv_c : memref<10xf32>, memref<10x10xf32>) { + } + return +} + +// CHECK-LABEL: func.func @testkernelspriv( +// CHECK: %[[PRIV_A:.*]] = acc.private varPtr(%{{.*}} : memref<10xf32>) recipe(@privatization_memref_10_f32) -> memref<10xf32> +// CHECK: %[[PRIV_C:.*]] = acc.private varPtr(%{{.*}} : memref<10x10xf32>) recipe(@privatization_memref_10_10_f32) -> memref<10x10xf32> +// CHECK: %[[FIRSTP:.*]] = acc.firstprivate varPtr(%{{.*}} : memref<10xf32>) varType(tensor<10xf32>) recipe(@firstprivatization_memref_10xf32) -> memref<10xf32> +// CHECK: acc.kernels firstprivate(%[[FIRSTP]] : memref<10xf32>) private(%[[PRIV_A]], %[[PRIV_C]] : memref<10xf32>, memref<10x10xf32>) { +// CHECK-NEXT: } + +// ----- + func.func @testdataop(%a: memref, %b: memref, %c: memref) -> () { %ifCond = arith.constant true @@ -1602,6 +1655,35 @@ func.func @acc_reduc_test(%a : memref) -> () { // ----- +acc.reduction.recipe @reduction_add_memref_i64 : memref reduction_operator init { +^bb0(%arg0: memref): + %c0_i64 = arith.constant 0 : i64 + %alloca = memref.alloca() : memref + memref.store %c0_i64, %alloca[] : memref + acc.yield %alloca : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + %0 = memref.load %arg0[] : memref + %1 = memref.load %arg1[] : memref + %2 = arith.addi %0, %1 : i64 + memref.store %2, %arg0[] : memref + acc.yield %arg0 : memref +} + +func.func @acc_kernels_reduc_test(%a : memref) -> () { + %reduction_a = acc.reduction varPtr(%a : memref) recipe(@reduction_add_memref_i64) -> memref + acc.kernels reduction(%reduction_a : memref) { + } + return +} + +// CHECK-LABEL: func.func @acc_kernels_reduc_test( +// CHECK-SAME: %[[ARG0:.*]]: memref) +// CHECK: %[[REDUCTION_A:.*]] = acc.reduction varPtr(%[[ARG0]] : memref) recipe(@reduction_add_memref_i64) -> memref +// CHECK-NEXT: acc.kernels reduction(%[[REDUCTION_A]] : memref) + +// ----- + func.func @testdeclareop(%a: memref, %b: memref, %c: memref) -> () { %0 = acc.copyin varPtr(%a : memref) -> memref // copyin(zero) diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir index dec32e1c61a9b..805e66f133c59 100644 --- a/mlir/test/Dialect/Vector/vector-unroll-options.mlir +++ b/mlir/test/Dialect/Vector/vector-unroll-options.mlir @@ -497,6 +497,61 @@ func.func @elementwise_4D_to_2D(%v1: vector<2x2x2x2xf32>, %v2: vector<2x2x2x2xf3 // CHECK-NOT: arith.addf // CHECK: return +func.func @vector_create_mask(%size1: index, %size2: index) -> vector<16x16xi1> { + %0 = vector.create_mask %size1, %size2 : vector<16x16xi1> + return %0 : vector<16x16xi1> +} + +// CHECK-LABEL: func @vector_create_mask +// CHECK-SAME: (%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) -> vector<16x16xi1> +// CHECK: %[[CST:.*]] = arith.constant dense : vector<16x16xi1> +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[C8:.*]] = arith.constant 8 : index +// CHECK: %[[MAX0:.*]] = arith.maxsi %[[ARG0]], %[[C0]] : index +// CHECK: %[[MIN0:.*]] = arith.minsi %[[MAX0]], %[[C8]] : index +// CHECK: %[[MAX1:.*]] = arith.maxsi %[[ARG1]], %[[C0]] : index +// CHECK: %[[MIN1:.*]] = arith.minsi %[[MAX1]], %[[C8]] : index +// CHECK: %[[MASK00:.*]] = vector.create_mask %[[MIN0]], %[[MIN1]] : vector<8x8xi1> +// CHECK: %[[INS00:.*]] = vector.insert_strided_slice %[[MASK00]], %[[CST]] {offsets = [0, 0], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: %[[MAX0_2:.*]] = arith.maxsi %[[ARG0]], %[[C0]] : index +// CHECK: %[[MIN0_2:.*]] = arith.minsi %[[MAX0_2]], %[[C8]] : index +// CHECK: %[[SUB1:.*]] = arith.subi %[[ARG1]], %[[C8]] : index +// CHECK: %[[MAX1_2:.*]] = arith.maxsi %[[SUB1]], %[[C0]] : index +// CHECK: %[[MIN1_2:.*]] = arith.minsi %[[MAX1_2]], %[[C8]] : index +// CHECK: %[[MASK01:.*]] = vector.create_mask %[[MIN0_2]], %[[MIN1_2]] : vector<8x8xi1> +// CHECK: %[[INS01:.*]] = vector.insert_strided_slice %[[MASK01]], %[[INS00]] {offsets = [0, 8], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: %[[SUB0:.*]] = arith.subi %[[ARG0]], %[[C8]] : index +// CHECK: %[[MAX0_3:.*]] = arith.maxsi %[[SUB0]], %[[C0]] : index +// CHECK: %[[MIN0_3:.*]] = arith.minsi %[[MAX0_3]], %[[C8]] : index +// CHECK: %[[MAX1_3:.*]] = arith.maxsi %[[ARG1]], %[[C0]] : index +// CHECK: %[[MIN1_3:.*]] = arith.minsi %[[MAX1_3]], %[[C8]] : index +// CHECK: %[[MASK10:.*]] = vector.create_mask %[[MIN0_3]], %[[MIN1_3]] : vector<8x8xi1> +// CHECK: %[[INS10:.*]] = vector.insert_strided_slice %[[MASK10]], %[[INS01]] {offsets = [8, 0], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: %[[SUB0_2:.*]] = arith.subi %[[ARG0]], %[[C8]] : index +// CHECK: %[[MAX0_4:.*]] = arith.maxsi %[[SUB0_2]], %[[C0]] : index +// CHECK: %[[MIN0_4:.*]] = arith.minsi %[[MAX0_4]], %[[C8]] : index +// CHECK: %[[SUB1_2:.*]] = arith.subi %[[ARG1]], %[[C8]] : index +// CHECK: %[[MAX1_4:.*]] = arith.maxsi %[[SUB1_2]], %[[C0]] : index +// CHECK: %[[MIN1_4:.*]] = arith.minsi %[[MAX1_4]], %[[C8]] : index +// CHECK: %[[MASK11:.*]] = vector.create_mask %[[MIN0_4]], %[[MIN1_4]] : vector<8x8xi1> +// CHECK: %[[INS11:.*]] = vector.insert_strided_slice %[[MASK11]], %[[INS10]] {offsets = [8, 8], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: return %[[INS11]] : vector<16x16xi1> + +func.func @vector_create_mask_constant_dim_sizes() -> vector<16x16xi1> { + %cst16 = arith.constant 16 : index + %0 = vector.create_mask %cst16, %cst16 : vector<16x16xi1> + return %0 : vector<16x16xi1> +} + +// CHECK-LABEL: func @vector_create_mask_constant_dim_sizes() -> vector<16x16xi1> { +// CHECK: %[[CST:.*]] = arith.constant dense : vector<16x16xi1> +// CHECK: %[[CST_0:.*]] = arith.constant dense : vector<8x8xi1> +// CHECK: %[[S0:.*]] = vector.insert_strided_slice %[[CST_0]], %[[CST]] {offsets = [0, 0], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: %[[S1:.*]] = vector.insert_strided_slice %[[CST_0]], %[[S0]] {offsets = [0, 8], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: %[[S2:.*]] = vector.insert_strided_slice %[[CST_0]], %[[S1]] {offsets = [8, 0], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: %[[S3:.*]] = vector.insert_strided_slice %[[CST_0]], %[[S2]] {offsets = [8, 8], strides = [1, 1]} : vector<8x8xi1> into vector<16x16xi1> +// CHECK: return %[[S3]] : vector<16x16xi1> + func.func @shape_cast_1D(%v: vector<16xf32>) -> vector<2x2x4xf32> { %0 = vector.shape_cast %v : vector<16xf32> to vector<2x2x4xf32> diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index e8ea0cc02d7f6..f834d0cdd42bd 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -178,6 +178,12 @@ struct TestVectorUnrollingPatterns .setFilterConstraint([](Operation *op) { return success(isa(op)); })); + populateVectorUnrollPatterns( + patterns, UnrollVectorOptions() + .setNativeShape(ArrayRef{8, 8}) + .setFilterConstraint([](Operation *op) { + return success(isa(op)); + })); populateVectorUnrollPatterns( patterns, UnrollVectorOptions()