diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 9c45751..9a2c504 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -70,6 +70,6 @@ jobs: shell: julia --project=docs --color=yes {0} run: | using Documenter: DocMeta, doctest - using CodeDifferences - DocMeta.setdocmeta!(CodeDifferences, :DocTestSetup, :(using CodeDifferences); recursive=true) - doctest(CodeDifferences) + using CodeDiffs + DocMeta.setdocmeta!(CodeDiffs, :DocTestSetup, :(using CodeDiffs); recursive=true) + doctest(CodeDiffs) diff --git a/Project.toml b/Project.toml index 6f98a56..ccd951b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,14 +1,34 @@ -name = "CodeDifferences" +name = "CodeDiffs" uuid = "0d84036a-ccd8-408b-b2b2-9a2d9429e273" authors = ["Luc Briand <34173752+Keluaa@users.noreply.github.com> and contributors"] version = "1.0.0-DEV" +[deps] +DeepDiffs = "ab62b9b5-e342-54a8-a765-a90f495de1a6" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404" +WidthLimitedIO = "b8c1c048-cf81-46c6-9da0-18c1d99e41f2" + [compat] +Aqua = "0.7" +DeepDiffs = "1" +InteractiveUtils = "1" +MacroTools = "0.5" +Markdown = "1" +ReferenceTests = "0.10" +StringDistances = "0.11" +Test = "1" +WidthLimitedIO = "1" julia = "1.6" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +DeepDiffs = "ab62b9b5-e342-54a8-a765-a90f495de1a6" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +ReferenceTests = "324d217c-45ce-50fc-942e-d289b448e8cf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "Test"] +test = ["Aqua", "InteractiveUtils", "ReferenceTests", "Test"] diff --git a/README.md b/README.md index 551bb0c..a6bf0b4 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,46 @@ -# CodeDifferences +# CodeDiffs -[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://Keluaa.github.io/CodeDifferences.jl/stable/) -[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://Keluaa.github.io/CodeDifferences.jl/dev/) -[![Build Status](https://github.com/Keluaa/CodeDifferences.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/Keluaa/CodeDifferences.jl/actions/workflows/CI.yml?query=branch%3Amain) -[![Coverage](https://codecov.io/gh/Keluaa/CodeDifferences.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Keluaa/CodeDifferences.jl) +[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://Keluaa.github.io/CodeDiffs.jl/stable/) +[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://Keluaa.github.io/CodeDiffs.jl/dev/) +[![Build Status](https://github.com/Keluaa/CodeDiffs.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/Keluaa/CodeDiffs.jl/actions/workflows/CI.yml?query=branch%3Amain) +[![Coverage](https://codecov.io/gh/Keluaa/CodeDiffs.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Keluaa/CodeDiffs.jl) [![Aqua](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) + +Compare code and display the difference in the terminal side-by-side. +Supports syntax highlighting. + +The [`@code_diff`](@ref) macro is the main entry point. If possible, the code type will be +detected automatically, otherwise add e.g. `type=:native` for native assembly comparison: +```julia +julia> f1(a) = a + 1 +f1 (generic function with 1 method) + +julia> @code_diff type=:llvm debuginfo=:none f1(Int64(1)) f1(Int8(1)) +; Function Attrs: uwtable ┃ ; Function Attrs: uwtable +define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f1(i8 signext %0) #0 { +top: ┃ top: + %1 = add i64 %0, 1 ⟪╋⟫ %2 = add nsw i64 %1, 1 + ret i64 %1 ⟪╋⟫ ret i64 %2 + ┣⟫ %1 = sext i8 %0 to i64 +} ┃ } + ┃ + +julia> f2(a) = a - 1 +f2 (generic function with 1 method) + +julia> @code_diff type=:llvm debuginfo=:none f1(1) f2(1) +; Function Attrs: uwtable ┃ ; Function Attrs: uwtable +define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f2(i64 signext %0) #0 { +top: ┃ top: + %1 = add i64 %0, 1 ⟪╋⟫ %1 = add i64 %0, -1 + ret i64 %1 ┃ ret i64 %1 +} ┃ } + ┃ +``` + +## Supported languages + + - native CPU assembly (output of `@code_native`) + - LLVM IR (output of `@code_llvm`) + - Typed Julia IR (output of `@code_typed`) + - Julia AST (any `Expr`) diff --git a/docs/Project.toml b/docs/Project.toml index 6c6aaea..aaa0666 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,3 +1,6 @@ [deps] -CodeDifferences = "0d84036a-ccd8-408b-b2b2-9a2d9429e273" +CodeDiffs = "0d84036a-ccd8-408b-b2b2-9a2d9429e273" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "1" diff --git a/docs/make.jl b/docs/make.jl index 1a135d3..923a516 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,14 +1,14 @@ -using CodeDifferences +using CodeDiffs using Documenter -DocMeta.setdocmeta!(CodeDifferences, :DocTestSetup, :(using CodeDifferences); recursive=true) +DocMeta.setdocmeta!(CodeDiffs, :DocTestSetup, :(using CodeDiffs); recursive=true) makedocs(; - modules=[CodeDifferences], + modules=[CodeDiffs], authors="Luc Briand <34173752+Keluaa@users.noreply.github.com> and contributors", - sitename="CodeDifferences.jl", + sitename="CodeDiffs.jl", format=Documenter.HTML(; - canonical="https://Keluaa.github.io/CodeDifferences.jl", + canonical="https://Keluaa.github.io/CodeDiffs.jl", edit_link="main", assets=String[], ), @@ -18,6 +18,6 @@ makedocs(; ) deploydocs(; - repo="github.com/Keluaa/CodeDifferences.jl", + repo="github.com/Keluaa/CodeDiffs.jl", devbranch="main", ) diff --git a/docs/src/index.md b/docs/src/index.md index 5bb4177..76b3772 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,14 +1,76 @@ ```@meta -CurrentModule = CodeDifferences +CurrentModule = CodeDiffs ``` -# CodeDifferences +# CodeDiffs -Documentation for [CodeDifferences](https://github.com/Keluaa/CodeDifferences.jl). +Compare different types of code and display it in the terminal. +For cleaner results, syntax highlighting is separated from the difference calculation. -```@index +Supports: + - native CPU assembly (output of `@code_native`, highlighted by `InteractiveUtils.print_native`) + - LLVM IR (output of `@code_llvm`, highlighted by `InteractiveUtils.print_llvm`) + - Typed Julia IR (output of `@code_typed`, highlighted through the `Base.show` method of `Core.CodeInfo`) + - Julia AST (an `Expr`), highlighting is done with: + - OhMyREPL.jl's Julia syntax highlighting in Markdown code blocks + - (Julia ≥ v1.11) [JuliaSyntaxHighlighting.jl](https://github.com/JuliaLang/JuliaSyntaxHighlighting.jl) + +The [`@code_diff`](@ref) macro is the main entry point. If possible, the code type will be +detected automatically, otherwise add e.g. `type=:native` for native assembly comparison: + +```jldoctest; setup=:(using CodeDiffs) +julia> f1(a) = a + 1 +f1 (generic function with 1 method) + +julia> @code_diff type=:llvm debuginfo=:none color=false f1(Int64(1)) f1(Int8(1)) +; Function Attrs: uwtable ┃ ; Function Attrs: uwtable +define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f1(i8 signext %0) #0 { +top: ┃ top: + %1 = add i64 %0, 1 ⟪╋⟫ %2 = add nsw i64 %1, 1 + ret i64 %1 ⟪╋⟫ ret i64 %2 + ┣⟫ %1 = sext i8 %0 to i64 +} ┃ } + ┃ + +julia> f2(a) = a - 1 +f2 (generic function with 1 method) + +julia> @code_diff type=:llvm debuginfo=:none color=false f1(1) f2(1) +; Function Attrs: uwtable ┃ ; Function Attrs: uwtable +define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f2(i64 signext %0) #0 { +top: ┃ top: + %1 = add i64 %0, 1 ⟪╋⟫ %1 = add i64 %0, -1 + ret i64 %1 ┃ ret i64 %1 +} ┃ } + ┃ ``` -```@autodocs -Modules = [CodeDifferences] +Setting the environment variable `"CODE_DIFFS_LINE_NUMBERS"` to `true` will display line +numbers on each side. + +# Main functions + +```@docs +CodeDiff +compare_code_native +compare_code_llvm +compare_code_typed +compare_ast +code_diff(::AbstractString, ::AbstractString) +code_diff(::Markdown.MD, ::Markdown.MD) +@code_diff +``` + +# Display functions + +```@docs +optimize_line_changes! +replace_llvm_module_name +side_by_side_diff +``` + +# Internals + +```@docs +LLVM_MODULE_NAME_REGEX ``` diff --git a/src/CodeDiff.jl b/src/CodeDiff.jl new file mode 100644 index 0000000..c344145 --- /dev/null +++ b/src/CodeDiff.jl @@ -0,0 +1,114 @@ + +""" + CodeDiff(code₁, code₂) + CodeDiff(code₁, code₂, highlighted₁, highlighted₂) + +A difference between `code₁` and `code₂`. + +`code₁` and `code₂` should have no highlighting. Only `highlighted₁` and `highlighted₂` +should have syntax highlighting. When showing the differences, their formatting will be +re-applied. + +For cleaner differences, use [`replace_llvm_module_name`](@ref) on all codes. + +Use [`optimize_line_changes!`](@ref) to improve the difference. + +Fancy REPL output is done with [`side_by_side_diff`](@ref). +""" +struct CodeDiff <: DeepDiffs.DeepDiff + before::String + after::String + changed::Dict{Int, DeepDiffs.StringDiff} + ignore_added::Set{Int} + diff::DeepDiffs.VectorDiff + highlighted_before::String + highlighted_after::String +end + + +function CodeDiff( + diff::DeepDiffs.StringLineDiff, + highlighted_before::AbstractString, highlighted_after::AbstractString +) + return CodeDiff( + diff.before, diff.after, Dict(), Set(), diff.diff, + highlighted_before, highlighted_after + ) +end + +function CodeDiff(X, Y, highlighted_X, highlighted_Y) + return CodeDiff(DeepDiffs.deepdiff(X, Y), highlighted_X, highlighted_Y) +end + +CodeDiff(X, Y) = CodeDiff(X, Y, X, Y) + + +DeepDiffs.before(diff::CodeDiff) = diff.before +DeepDiffs.after(diff::CodeDiff) = diff.after +DeepDiffs.added(diff::CodeDiff) = DeepDiffs.added(diff.diff) +DeepDiffs.removed(diff::CodeDiff) = DeepDiffs.removed(diff.diff) +DeepDiffs.changed(diff::CodeDiff) = diff.changed + +issame(diff::CodeDiff) = isempty(DeepDiffs.added(diff)) && isempty(DeepDiffs.removed(diff)) + +Base.:(==)(d1::CodeDiff, d2::CodeDiff) = DeepDiffs.fieldequal(d1, d2) + +Base.show(io::IO, ::MIME"text/plain", diff::CodeDiff) = side_by_side_diff(io, diff) + +function Base.show(io::IO, diff::CodeDiff) + xlines = split(diff.before, '\n') + ylines = split(diff.after, '\n') + DeepDiffs.visitall(diff.diff) do idx, state, last + if state == :removed + printstyled(io, "- ", xlines[idx], color=:red) + elseif state == :added + printstyled(io, "+ ", ylines[idx], color=:green) + else + print(io, " ", xlines[idx]) + end + !last && println(io) + end +end + + +""" + optimize_line_changes!(diff::CodeDiff; dist=Levenshtein(), tol=0.7) + +Merges consecutive line removals+additions into single line changes in `diff`, when they +are within the `tol`erance of the normalized string `dist`ance. + +This does not aim to produce an optimal `CodeDiff`, but simply improve its display. +""" +function optimize_line_changes!(diff::CodeDiff; dist=StringDistances.Levenshtein(), tol=0.7) + xlines = split(diff.before, '\n') + ylines = split(diff.after, '\n') + + empty!(diff.changed) + empty!(diff.ignore_added) + previously_removed = Vector{Int}() + removed_start = 1 + iadded = 1 + + DeepDiffs.visitall(diff.diff) do idx, state, _ + if state == :removed + # Removed lines are always iterated first, so they are compared against added lines + push!(previously_removed, idx) + elseif state == :added + iadded += 1 + for (li, removed_line) in enumerate(previously_removed[removed_start:end]) + if StringDistances.compare(xlines[removed_line], ylines[idx], dist) ≥ tol + diff.changed[removed_line] = DeepDiffs.deepdiff(xlines[removed_line], ylines[idx]) + push!(diff.ignore_added, idx) + removed_start += li # The next added lines will start from the next removed line + break + end + end + else + # Treat conserved lines as a "reset" point + empty!(previously_removed) + removed_start = 1 + end + end + + return diff +end diff --git a/src/CodeDifferences.jl b/src/CodeDifferences.jl deleted file mode 100644 index 078f1ac..0000000 --- a/src/CodeDifferences.jl +++ /dev/null @@ -1,5 +0,0 @@ -module CodeDifferences - -# Write your package code here. - -end diff --git a/src/CodeDiffs.jl b/src/CodeDiffs.jl new file mode 100644 index 0000000..5bd833f --- /dev/null +++ b/src/CodeDiffs.jl @@ -0,0 +1,24 @@ +module CodeDiffs + +# TODO: option to ignore differences in code comments (such as when comparing methods in different worlds) +# TODO: add `using CodeTracking: definition`, then do like `Cthuhlu.jl` to retrive the function def from its call: https://github.com/JuliaDebug/Cthulhu.jl/blob/9ba8bfc53efed453cb150c9f3e4c279521c5cb17/src/codeview.jl#L54C9-L54C33 +# TODO: GPU assembly / LLVM IR support +# TODO: explain in the docs how to interface with this package + +using DeepDiffs +using InteractiveUtils +using MacroTools +using Markdown +using StringDistances +using WidthLimitedIO + +export @code_diff + +const ANSI_REGEX = r"(?>\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]))+" +const OhMYREPL_PKG_ID = Base.PkgId(Base.UUID("5fb14364-9ced-5910-84b2-373655c76a03"), "OhMyREPL") + +include("CodeDiff.jl") +include("compare.jl") +include("display.jl") + +end diff --git a/src/compare.jl b/src/compare.jl new file mode 100644 index 0000000..3b3b2e2 --- /dev/null +++ b/src/compare.jl @@ -0,0 +1,511 @@ + +""" + LLVM_MODULE_NAME_REGEX + +Should match the LLVM module of any function which does not have any of `'",;-` or spaces +in it. + +It is `'get_function_name'`, in `'julia/src/codegen.cpp'` which builds the function name +for the LLVM module used to get the function code. The regex is built to match any output +from that function. +Since the `'globalUniqueGeneratedNames'` counter (the number at the end of the module name) +is incremented at each call to `'get_function_name'`, and since `code_llvm` or `code_native` +forces a compilation, it should be guaranteed that the match with the highest number at +the end is the name of our function in `code`. +""" +const LLVM_MODULE_NAME_REGEX = r"(?>julia|japi3|japi1)_([^\"\s,;\-']*)_(\d+)" + + +""" + replace_llvm_module_name(code::AbstractString) + +Remove in `code` the trailing numbers in the LLVM module names, e.g. `"julia_f_2007" => "f"`. +This allows to remove false differences when comparing raw code, since each call to +`code_native` (or `code_llvm`) triggers a new compilation using an unique LLVM module name, +therefore each consecutive call is different even though the actual code does not +change. + +```jldoctest; setup = :(using InteractiveUtils; import CodeDiffs: replace_llvm_module_name) +julia> f() = 1 +f (generic function with 1 method) + +julia> buf = IOBuffer(); + +julia> code_native(buf, f, Tuple{}) # Equivalent to `@code_native f()` + +julia> code₁ = String(take!(buf)); + +julia> code_native(buf, f, Tuple{}) + +julia> code₂ = String(take!(buf)); + +julia> code₁ == code₂ # Different LLVM module names... +false + +julia> replace_llvm_module_name(code₁) == replace_llvm_module_name(code₂) # ...but same code +true +``` +""" +replace_llvm_module_name(code::AbstractString) = replace(code, LLVM_MODULE_NAME_REGEX => s"\1") + + +""" + replace_llvm_module_name(code::AbstractString, function_name) + +Replace only LLVM module names for `function_name`. +""" +function replace_llvm_module_name(code::AbstractString, function_name) + function_name = string(function_name) + if Sys.islinux() && startswith(function_name, '@') + # See 'get_function_name' in 'julia/src/codegen.cpp' + function_name = function_name[2:end] + end + func_re = Regex("(?>julia|japi3|japi1)_\\Q$(function_name)\\E_(\\d+)") + return replace(code, func_re => function_name) +end + + +function compare_code(code₁::AbstractString, code₂::AbstractString, highlight_func; color=true) + io_buf = IOBuffer() + highlight_ctx = IOContext(io_buf, :color => true) + + code₁ = replace_llvm_module_name(code₁) + if color + highlight_func(highlight_ctx, code₁) + code₁_colored = String(take!(io_buf)) + else + code₁_colored = code₁ + end + + code₂ = replace_llvm_module_name(code₂) + if color + highlight_func(highlight_ctx, code₂) + code₂_colored = String(take!(io_buf)) + else + code₂_colored = code₂ + end + + diff = CodeDiff(code₁, code₂, code₁_colored, code₂_colored) + optimize_line_changes!(diff) + return diff +end + + +function compare_show(code₁, code₂; color=true, force_no_ansi=false) + io_buf = IOBuffer() + io_ctx = IOContext(io_buf, :color => false) + highlight_ctx = IOContext(io_buf, :color => true) + + Base.show(io_ctx, MIME"text/plain"(), code₁) + code_str₁ = String(take!(io_buf)) + force_no_ansi && (code_str₁ = replace(code_str₁, ANSI_REGEX => "")) + if !occursin('\n', code_str₁) + code_str₁ *= '\n' # Hack to make sure `deepdiff` creates a `StringLineDiff` + needed_newline = true + else + needed_newline = false + end + + if color + Base.show(highlight_ctx, MIME"text/plain"(), code₁) + code₁_colored = String(take!(io_buf)) * (needed_newline ? "\n" : "") + else + code₁_colored = code_str₁ + end + + Base.show(io_ctx, MIME"text/plain"(), code₂) + code_str₂ = String(take!(io_buf)) + force_no_ansi && (code_str₂ = replace(code_str₂, ANSI_REGEX => "")) + if !occursin('\n', code_str₂) + code_str₂ *= '\n' + needed_newline = true + else + needed_newline = false + end + + if color + Base.show(highlight_ctx, MIME"text/plain"(), code₂) + code₂_colored = String(take!(io_buf)) * (needed_newline ? "\n" : "") + else + code₂_colored = code_str₂ + end + + diff = CodeDiff(code_str₁, code_str₂, code₁_colored, code₂_colored) + optimize_line_changes!(diff) + return diff +end + + +""" + compare_code_native(code₁, code₂; color=true) + +Return a [`CodeDiff`](@ref) between `code₁` and `code₂`. +Codes are cleaned-up with [`replace_llvm_module_name`](@ref) beforehand. + +If `color == true`, then both codes are highlighted using `InteractiveUtils.print_native`. +""" +function compare_code_native(code₁::AbstractString, code₂::AbstractString; color=true) + return compare_code(code₁, code₂, InteractiveUtils.print_native; color) +end + + +""" + compare_code_native( + f₁::Base.Callable, types₁::Type{<:Tuple}, + f₂::Base.Callable, types₂::Type{<:Tuple}; + color=true, kwargs... + ) + +Call `InteractiveUtils.code_native(f₁, types₁)` and `InteractiveUtils.code_native(f₂, types₂)` +and return their [`CodeDiff`](@ref). `kwargs` are passed to `code_native`. +""" +function compare_code_native( + f₁::Base.Callable, types₁::Type{<:Tuple}, + f₂::Base.Callable, types₂::Type{<:Tuple}; + color=true, kwargs... +) + @nospecialize(f₁, types₁, f₂, types₂) + + io_buf = IOBuffer() + io_ctx = IOContext(io_buf, :color => false) + + InteractiveUtils.code_native(io_ctx, f₁, types₁; kwargs...) + code₁ = String(take!(io_buf)) + + InteractiveUtils.code_native(io_buf, f₂, types₂; kwargs...) + code₂ = String(take!(io_buf)) + + return compare_code_native(code₁, code₂; color) +end + + +""" + compare_code_native( + f::Base.Callable, types::Type{<:Tuple}, world₁, world₂; + color=true, kwargs... + ) + +Similar to [`compare_code_native(f₁, types₁, f₂, types₂)`](@ref), but as a difference +between `f` in world ages `world₁` and `world₂`. +""" +function compare_code_native( + f::Base.Callable, types::Type{<:Tuple}, world₁::Integer, world₂::Integer; + color=true, dump_module=true, syntax=:intel, raw=false, debuginfo=:default, binary=false +) + @nospecialize(f, types) + + sig = Base.signature_type(f, types) + params = Base.CodegenParams(debug_info_kind=Cint(0), safepoint_on_entry=raw, gcstack_arg=raw) + + if debuginfo === :default + debuginfo = :source + elseif debuginfo !== :source && debuginfo !== :none + throw(ArgumentError("'debuginfo' must be either :source or :none")) + end + + # See `InteractiveUtils._dump_function` + f₁ = Base._which(sig; world=world₁) + mi_f₁ = Core.Compiler.specialize_method(f₁) + if dump_module + f₁_str = InteractiveUtils._dump_function_native_assembly(mi_f₁, world₁, false, syntax, debuginfo, binary, raw, params) + else + f₁_str = InteractiveUtils._dump_function_native_disassembly(mi_f₁, world₁, false, syntax, debuginfo, binary) + end + + f₂ = Base._which(sig; world=world₂) + mi_f₂ = Core.Compiler.specialize_method(f₂) + if dump_module + f₂_str = InteractiveUtils._dump_function_native_assembly(mi_f₂, world₂, false, syntax, debuginfo, binary, raw, params) + else + f₂_str = InteractiveUtils._dump_function_native_disassembly(mi_f₂, world₂, false, syntax, debuginfo, binary) + end + + return compare_code_native(f₁_str, f₂_str; color) +end + + +""" + compare_code_llvm(code₁, code₂; color=true) + +Return a [`CodeDiff`](@ref) between `code₁` and `code₂`. +Codes are cleaned-up with [`replace_llvm_module_name`](@ref) beforehand. + +If `color == true`, then both codes are highlighted using `InteractiveUtils.print_llvm`. +""" +function compare_code_llvm(code₁::AbstractString, code₂::AbstractString; color=true) + return compare_code(code₁, code₂, InteractiveUtils.print_llvm; color) +end + + +""" + compare_code_llvm( + f₁::Base.Callable, types₁::Type{<:Tuple}, + f₂::Base.Callable, types₂::Type{<:Tuple}; + color=true, kwargs... + ) + +Call `InteractiveUtils.code_llvm(f₁, types₁)` and `InteractiveUtils.code_llvm(f₂, types₂)` +and return their [`CodeDiff`](@ref). `kwargs` are passed to `code_llvm`. +""" +function compare_code_llvm( + f₁::Base.Callable, types₁::Type{<:Tuple}, + f₂::Base.Callable, types₂::Type{<:Tuple}; + color=true, kwargs... +) + @nospecialize(f₁, types₁, f₂, types₂) + + io_buf = IOBuffer() + io_ctx = IOContext(io_buf, :color => false) + + InteractiveUtils.code_llvm(io_ctx, f₁, types₁; kwargs...) + code₁ = String(take!(io_buf)) + + InteractiveUtils.code_llvm(io_buf, f₂, types₂; kwargs...) + code₂ = String(take!(io_buf)) + + return compare_code_llvm(code₁, code₂; color) +end + + +""" + compare_code_llvm( + f::Base.Callable, types::Type{<:Tuple}, world₁, world₂; + color=true, kwargs... + ) + +Similar to [`compare_code_llvm(f₁, types₁, f₂, types₂)`](@ref), but as a difference +between `f` in world ages `world₁` and `world₂`. +""" +function compare_code_llvm( + f::Base.Callable, types::Type{<:Tuple}, world₁::Integer, world₂::Integer; + color=true, raw=false, dump_module=false, optimize=true, debuginfo=:default +) + @nospecialize(f, types) + + sig = Base.signature_type(f, types) + params = Base.CodegenParams(debug_info_kind=Cint(0), safepoint_on_entry=raw, gcstack_arg=raw) + + if debuginfo === :default + debuginfo = :source + elseif debuginfo !== :source && debuginfo !== :none + throw(ArgumentError("'debuginfo' must be either :source or :none")) + end + + # See `InteractiveUtils._dump_function` + f₁ = Base._which(sig; world=world₁) + mi_f₁ = Core.Compiler.specialize_method(f₁) + f₁_str = InteractiveUtils._dump_function_llvm( + mi_f₁, world₁, false, !raw, dump_module, optimize, debuginfo, params + ) + + f₂ = Base._which(sig; world=world₂) + mi_f₂ = Core.Compiler.specialize_method(f₂) + f₂_str = InteractiveUtils._dump_function_llvm( + mi_f₂, world₂, false, !raw, dump_module, optimize, debuginfo, params + ) + + return compare_code_llvm(f₁_str, f₂_str; color) +end + + +""" + compare_code_typed(code_info₁::Pair, code_info₂::Pair; color=true) + compare_code_typed(code_info₁::Core.CodeInfo, code_info₂::Core.CodeInfo; color=true) + +Return a [`CodeDiff`](@ref) between `code_info₁` and `code_info₂`. + +If `color == true`, then both codes are highlighted. +""" +function compare_code_typed( + code_info₁::CI, code_info₂::CI; color=true +) where {CI <: Union{Core.CodeInfo, Pair{Core.CodeInfo, <:Type}}} + return compare_show(code_info₁, code_info₂; color) +end + + +""" + compare_code_typed( + f₁::Base.Callable, types₁::Type{<:Tuple}, + f₂::Base.Callable, types₂::Type{<:Tuple}; + color=true, kwargs... + ) + +Call `Base.code_typed(f₁, types₁)` and `Base.code_typed(f₂, types₂)` and return their +[`CodeDiff`](@ref). `kwargs` are passed to `code_typed`. + +Both function calls should only match a single method. +""" +function compare_code_typed( + f₁::Base.Callable, types₁::Type{<:Tuple}, + f₂::Base.Callable, types₂::Type{<:Tuple}; + color=true, kwargs... +) + @nospecialize(f₁, types₁, f₂, types₂) + + code_info₁ = Base.code_typed(f₁, types₁; kwargs...) + code_info₁ = only(code_info₁) + + code_info₂ = Base.code_typed(f₂, types₂; kwargs...) + code_info₂ = only(code_info₂) + + return compare_code_typed(code_info₁, code_info₂; color) +end + + +""" + compare_code_typed( + f::Base.Callable, types::Type{<:Tuple}, world₁, world₂; + color=true, kwargs... + ) + +Similar to [`compare_code_typed(f₁, types₁, f₂, types₂)`](@ref), but as a difference +between `f` in world ages `world₁` and `world₂`. +""" +function compare_code_typed( + f::Base.Callable, types::Type{<:Tuple}, world₁::Integer, world₂::Integer; + color=true, kwargs... +) + @nospecialize(f, types) + + code_info₁ = Base.code_typed(f, types; world=world₁, kwargs...) + code_info₁ = only(code_info₁) + + code_info₂ = Base.code_typed(f, types; world=world₂, kwargs...) + code_info₂ = only(code_info₂) + + return compare_code_typed(code_info₁, code_info₂; color) +end + + +""" + compare_ast(code₁::Expr, code₂::Expr; color=true, prettify=true, lines=false, alias=false) + +A [`CodeDiff`](@ref) between `code₁` and `code₂`, relying on the native display of Julia AST. + +If `prettify == true`, then +[`MacroTools.prettify(code; lines, alias)`](https://fluxml.ai/MacroTools.jl/stable/utilities/#MacroTools.prettify) +is used to cleanup the AST. `lines == true` will keep the `LineNumberNode`s and `alias == true` +will replace mangled names (or `gensym`s) by dummy names. + +`color == true` is special, as it places the stringified AST into a Markdown code block. +See [`compare_ast(code₁::Markdown.MD, code₂::Markdown.MD)`](@ref). +""" +function compare_ast(code₁::Expr, code₂::Expr; color=true, prettify=true, lines=false, alias=false) + if prettify + code₁ = MacroTools.prettify(code₁; lines, alias) + code₂ = MacroTools.prettify(code₂; lines, alias) + end + + # Placing the `Expr`s in blocks is required to have a multiline display + code₁ = MacroTools.block(code₁) + code₂ = MacroTools.block(code₂) + + if color + io_buf = IOBuffer() + + print(io_buf, code₁) + code_str₁ = String(take!(io_buf)) + code_md₁ = Markdown.MD(Markdown.julia, Markdown.Code("julia", code_str₁)) + + print(io_buf, code₂) + code_str₂ = String(take!(io_buf)) + code_md₂ = Markdown.MD(Markdown.julia, Markdown.Code("julia", code_str₂)) + + return compare_ast(code_md₁, code_md₂) + else + return compare_show(code₁, code₂; color=false) + end +end + + +""" + compare_ast(code₁::Markdown.MD, code₂::Markdown.MD; color=true) + +[`CodeDiff`](@ref) between Julia code string, in the form of Markdown code blocks. + +Relies on the Markdown code highlighting from [`OhMyREPL.jl`](https://github.com/KristofferC/OhMyREPL.jl) +to colorize Julia code. +""" +function compare_ast(code₁::Markdown.MD, code₂::Markdown.MD; color=true) + if !haskey(Base.loaded_modules, OhMYREPL_PKG_ID) + @warn "OhMyREPL.jl is not loaded, AST highlighting will not work" maxlog=1 + end + return compare_show(code₁, code₂; color, force_no_ansi=true) +end + + +""" + code_diff(code₁::Markdown.MD, code₂::Markdown.MD; kwargs...) + code_diff(code₁::Expr, code₂::Expr; kwargs...) + +Compare AST in `code₁` and `code₂`. `Expr` are placed in `Markdown` code blocks. +""" +code_diff(code₁::Markdown.MD, code₂::Markdown.MD; kwargs...) = compare_ast(code₁, code₂; kwargs...) +code_diff(code₁::Expr, code₂::Expr; kwargs...) = compare_ast(code₁, code₂; kwargs...) + +code_diff(::Val{:native}, code₁::AbstractString, code₂::AbstractString; kwargs...) = + compare_code(code₁, code₂, InteractiveUtils.print_native; kwargs...) +code_diff(::Val{:llvm}, code₁::AbstractString, code₂::AbstractString; kwargs...) = + compare_code(code₁, code₂, InteractiveUtils.print_llvm; kwargs...) +code_diff(::Val{:typed}, code₁::AbstractString, code₂::AbstractString; kwargs...) = + compare_code(code₁, code₂, identity; kwargs...) + +""" + code_diff(::Val{type}, f₁, types₁, f₂, types₂; kwargs...) + code_diff(::Val{type}, code₁::AbstractString, code₂::AbstractString; kwargs...) + code_diff(args...; type=:native, kwargs...) + +Dispatch to [`compare_code_native`](@ref), [`compare_code_llvm`](@ref), +[`compare_code_typed`](@ref) or [`compare_ast`](@ref) depending on `type`. +""" +code_diff(code₁::AbstractString, code₂::AbstractString; type::Symbol=:native, kwargs...) = + code_diff(Val(type), code₁, code₂; kwargs...) + +@nospecialize + +code_diff(::Val{:native}, f₁, types₁, f₂, types₂; kwargs...) = compare_code_native(f₁, types₁, f₂, types₂; kwargs...) +code_diff(::Val{:llvm}, f₁, types₁, f₂, types₂; kwargs...) = compare_code_llvm(f₁, types₁, f₂, types₂; kwargs...) +code_diff(::Val{:typed}, f₁, types₁, f₂, types₂; kwargs...) = compare_code_typed(f₁, types₁, f₂, types₂; kwargs...) + +code_diff(code₁::Tuple, code₂::Tuple; type::Symbol=:native, kwargs...) = + code_diff(Val(type), code₁..., code₂...; kwargs...) + +@specialize + + +""" + @code_diff [type=:native] [option=value...] f₁(...) f₂(...) + @code_diff [type] [option=value...] a b + +Compare the methods called by the `f₁(...)` and `f₂(...)` expressions, and return a +[`CodeDiff`](@ref). +In the other form of `@code_diff`, `a` and `b` must be either variable names (`Symbol`s) +or quoted expressions (e.g. `@code_diff :(1+2) :(2+3)`): in this case the difference type +might be inferred automatically. + +`option`s are passed as key-word arguments to [`code_diff`](@ref) and then to the +`compare_code_*` function for the given code `type`. +""" +macro code_diff(args...) + length(args) < 2 && throw(ArgumentError("@code_diff takes at least 2 arguments")) + options..., code₁, code₂ = args + + options = map(options) do option + !(option isa Expr && option.head === :(=)) && + throw(ArgumentError("options must be in the form `key=value`, got: $option")) + return Expr(:kw, option.args[1], option.args[2]) + end + + code₁, code₂ = map((code₁, code₂)) do code + (!(code isa Expr) || code.head === :quote) && return code + code.head !== :call && throw(ArgumentError("expected call expression, got: $code")) + # `f(a, b)` => `(f, Base.typesof(a, b))` + f = code.args[1] + f_args = :(Base.typesof($(code.args[2:end]...))) + return :($f, $f_args) + end + + call_expr = :($code_diff($code₁, $code₂; )) + append!(call_expr.args[2].args, options) + return esc(call_expr) +end diff --git a/src/display.jl b/src/display.jl new file mode 100644 index 0000000..7dc56e1 --- /dev/null +++ b/src/display.jl @@ -0,0 +1,202 @@ + +function print_columns(io, width, left_line, sep, right_line, empty_line, tab_replacement) + left_line = replace(left_line, '\t' => tab_replacement) + right_line = replace(right_line, '\t' => tab_replacement) + + wio = TextWidthLimiter(IOBuffer(), width) + wio_ctx = IOContext(wio, io) + + printstyled(wio_ctx, left_line) + left_len = wio.width + printstyled(io, String(take!(wio))) + if left_len < width + printstyled(io, @view(empty_line[1:width - left_len])) + end + + printstyled(io, sep) + + printstyled(wio_ctx, right_line) + right_len = wio.width + printstyled(io, String(take!(wio))) + if right_len < width + # Padding needed only for line numbers + printstyled(io, @view(empty_line[1:width - right_len])) + end +end + + +function print_columns_change(io, width, line_diff, highlighted_left, sep, empty_line, tab_replacement) + wio = TextWidthLimiter(IOBuffer(), width) + wio_ctx = IOContext(wio, io) + + printstyled_code_line_diff(wio_ctx, line_diff, highlighted_left, true, tab_replacement) + left_len = wio.width + printstyled(io, String(take!(wio))) + if left_len < width + printstyled(io, @view(empty_line[1:width - left_len])) + end + + printstyled(io, sep) + + printstyled_code_line_diff(wio_ctx, line_diff, highlighted_left, false, tab_replacement) + right_len = wio.width + printstyled(io, String(take!(wio))) + if right_len < width + # Padding needed only for line numbers + printstyled(io, @view(empty_line[1:width - right_len])) + end +end + + +function next_ansi_sequence(str, idx) + m = match(ANSI_REGEX, str, idx) + if m === nothing + return typemax(idx), "" + else + return m.offset, m.match + end +end + + +function printstyled_code_line_diff( + io::IO, diff::DeepDiffs.StringDiff, highlighted_left, removed_only::Bool, + tab_replacement +) + xchars = DeepDiffs.before(diff.diff) + ychars = DeepDiffs.after(diff.diff) + + if get(io, :color, false) + default_bkg = "\e[49m" # ANSI for the default background color + removed_bkg_color = removed_only ? "\e[41m" : "" # ANSI for red background + added_bkg_color = removed_only ? "" : "\e[42m" # ANSI for green background + else + default_bkg = "" + removed_bkg_color = "" + added_bkg_color = "" + end + + idx_before_next_ansi, ansi_seq = next_ansi_sequence(highlighted_left, 1) + highlighted_offset = 0 + + tmp_io = IOBuffer() + prev_state = :same + DeepDiffs.visitall(diff.diff) do idx, state, _ + if idx + highlighted_offset ≥ idx_before_next_ansi + write(tmp_io, ansi_seq) + if prev_state !== :same && occursin("\e[0m", ansi_seq) + prev_state = :same + end + highlighted_offset += length(ansi_seq) + idx_before_next_ansi, ansi_seq = + next_ansi_sequence(highlighted_left, idx + highlighted_offset) + end + + if state === :removed + !removed_only && return + prev_state !== :removed && write(tmp_io, removed_bkg_color) + c = xchars[idx] + elseif state === :added + removed_only && return + prev_state !== :added && write(tmp_io, added_bkg_color) + c = ychars[idx] + else + prev_state !== :same && write(tmp_io, default_bkg) + c = xchars[idx] + end + + write(tmp_io, c == '\t' ? tab_replacement : c) + prev_state = state + end + + prev_state !== :same && write(tmp_io, default_bkg) + write(tmp_io, @view highlighted_left[idx_before_next_ansi:end]) + + printstyled(io, String(take!(tmp_io))) +end + + +""" + side_by_side_diff([io::IO,] diff::CodeDiff; tab_width=4, width=nothing, line_numbers=nothing) + +Side by side display of a [`CodeDiff`](@ref) to `io` (defaults to `stdout`). + +`width` defaults to the width of the terminal. It is `80` by default for non-terminal `io`. + +`tab_width` is the number of spaces tabs are replaced with. + +`line_numbers=true` will add line numbers on each side of the columns. It defaults to the +environment variable `"CODE_DIFFS_LINE_NUMBERS"`, which itself defaults to `false`. +""" +function side_by_side_diff(io::IO, diff::CodeDiff; tab_width=4, width=nothing, line_numbers=nothing) + line_numbers = @something line_numbers parse(Bool, get(ENV, "CODE_DIFFS_LINE_NUMBERS", "false")) + + xlines = split(diff.highlighted_before, '\n') + ylines = split(diff.highlighted_after, '\n') + + width = @something width displaysize(io)[2] + if line_numbers + max_line = length(xlines) + length(DeepDiffs.added(diff)) + line_num_width = length(string(max_line)) + width -= 2*(line_num_width + 1) + empty_line_num = " "^(line_num_width+1) + else + line_num_width = 0 + empty_line_num = "" + end + + sep_same = " ┃ " + sep_removed = "⟪┫ " + sep_added = " ┣⟫" + sep_changed_to = "⟪╋⟫" + + column_width = fld(width - length(sep_same), 2) + column_width ≤ 5 && error("output terminal width ($width) is too small") + empty_column = " "^column_width + tab = " "^tab_width + + left_line = 1 + right_line = 1 + DeepDiffs.visitall(diff.diff) do idx, state, last + if line_numbers + if state !== :added + line_num = lpad(string(left_line), line_num_width) + printstyled(io, line_num, ' '; color=:light_black) + left_line += 1 + end + end + + right_printed = true + if state == :removed + if haskey(diff.changed, idx) + line_diff = diff.changed[idx] + print_columns_change(io, column_width, line_diff, xlines[idx], + sep_changed_to, empty_column, tab) + else + print_columns(io, column_width, xlines[idx], sep_removed, "", empty_column, tab) + right_printed = false + end + elseif state == :added + if idx ∈ diff.ignore_added + return + else + printstyled(io, empty_line_num) + print_columns(io, column_width, "", sep_added, ylines[idx], empty_column, tab) + end + else + print_columns(io, column_width, xlines[idx], sep_same, xlines[idx], empty_column, tab) + end + + if line_numbers && right_printed + line_num = rpad(string(right_line), line_num_width) + printstyled(io, line_num; color=:light_black) + right_line += 1 + end + + !last && println(io) + end +end + +function side_by_side_diff(diff::CodeDiff; kwargs...) + side_by_side_diff(stdout, diff; kwargs...) + println() +end diff --git a/test/references/f1.jl_typed b/test/references/f1.jl_typed new file mode 100644 index 0000000..ebb3c0b --- /dev/null +++ b/test/references/f1.jl_typed @@ -0,0 +1,3 @@ +CodeInfo( ┃ CodeInfo( +1 ─ return 1 ┃ 1 ─ return 1 +) => Int64 ┃ ) => Int64 \ No newline at end of file diff --git a/test/references/f1_COLOR.jl_typed b/test/references/f1_COLOR.jl_typed new file mode 100644 index 0000000..8138db6 --- /dev/null +++ b/test/references/f1_COLOR.jl_typed @@ -0,0 +1,3 @@ +CodeInfo(  ┃ CodeInfo( +1 ─ return 1  ┃ 1 ─ return 1 +) => Int64  ┃ ) => Int64 \ No newline at end of file diff --git a/test/references/f1_x86.S b/test/references/f1_x86.S new file mode 100644 index 0000000..6f4fe17 --- /dev/null +++ b/test/references/f1_x86.S @@ -0,0 +1,22 @@ + .text ┃ .text + .file "f" ┃ .file "f" + .globl f # -- Begin function f ┃ .globl f # -- Begin function f + .p2align 4, 0x90 ┃ .p2align 4, 0x90 + .type f,@function ┃ .type f,@function +f: # @f ┃ f: # @f + .cfi_startproc ┃ .cfi_startproc +# %bb.0: # %top ┃ # %bb.0: # %top + push rbp ┃ push rbp + .cfi_def_cfa_offset 16 ┃ .cfi_def_cfa_offset 16 + .cfi_offset rbp, -16 ┃ .cfi_offset rbp, -16 + mov rbp, rsp ┃ mov rbp, rsp + .cfi_def_cfa_register rbp ┃ .cfi_def_cfa_register rbp + mov eax, 1 ┃ mov eax, 1 + pop rbp ┃ pop rbp + ret ┃ ret +.Lfunc_end0: ┃ .Lfunc_end0: + .size f, .Lfunc_end0-f ┃ .size f, .Lfunc_end0-f + .cfi_endproc ┃ .cfi_endproc + # -- End function ┃ # -- End function + .section ".note.GNU-stack","",@progbits ┃ .section ".note.GNU-stack","",@progbits + ┃ \ No newline at end of file diff --git a/test/references/f1_x86.ll b/test/references/f1_x86.ll new file mode 100644 index 0000000..acd1c92 --- /dev/null +++ b/test/references/f1_x86.ll @@ -0,0 +1,6 @@ +; Function Attrs: uwtable ┃ ; Function Attrs: uwtable +define i64 @f() #0 { ┃ define i64 @f() #0 { +top: ┃ top: + ret i64 1 ┃ ret i64 1 +} ┃ } + ┃ \ No newline at end of file diff --git a/test/references/f1_x86_COLOR.S b/test/references/f1_x86_COLOR.S new file mode 100644 index 0000000..aa34020 --- /dev/null +++ b/test/references/f1_x86_COLOR.S @@ -0,0 +1,22 @@ + .text  ┃  .text + .file "f"  ┃  .file "f" + .globl f # -- Begin function f  ┃  .globl f # -- Begin function f + .p2align 4, 0x90  ┃  .p2align 4, 0x90 + .type f,@function  ┃  .type f,@function +f: # @f  ┃ f: # @f + .cfi_startproc  ┃  .cfi_startproc +# %bb.0: # %top  ┃ # %bb.0: # %top + push rbp  ┃  push rbp + .cfi_def_cfa_offset 16  ┃  .cfi_def_cfa_offset 16 + .cfi_offset rbp, -16  ┃  .cfi_offset rbp, -16 + mov rbp, rsp  ┃  mov rbp, rsp + .cfi_def_cfa_register rbp  ┃  .cfi_def_cfa_register rbp + mov eax, 1  ┃  mov eax, 1 + pop rbp  ┃  pop rbp + ret  ┃  ret +.Lfunc_end0:  ┃ .Lfunc_end0: + .size f, .Lfunc_end0-f  ┃  .size f, .Lfunc_end0-f + .cfi_endproc  ┃  .cfi_endproc + # -- End function  ┃  # -- End function + .section ".note.GNU-stack","",@progbits  ┃  .section ".note.GNU-stack","",@progbits +  ┃  \ No newline at end of file diff --git a/test/references/f1_x86_COLOR.ll b/test/references/f1_x86_COLOR.ll new file mode 100644 index 0000000..56346e4 --- /dev/null +++ b/test/references/f1_x86_COLOR.ll @@ -0,0 +1,6 @@ +; Function Attrs: uwtable  ┃ ; Function Attrs: uwtable +define i64 @f() #0 {  ┃ define i64 @f() #0 { +top:  ┃ top: + ret i64 1  ┃  ret i64 1 +}  ┃ } +  ┃  \ No newline at end of file diff --git a/test/references/f1_x86_LINES.ll b/test/references/f1_x86_LINES.ll new file mode 100644 index 0000000..1eb9589 --- /dev/null +++ b/test/references/f1_x86_LINES.ll @@ -0,0 +1,6 @@ +1 ; Function Attrs: uwtable ┃ ; Function Attrs: uwtable 1 +2 define i64 @f() #0 { ┃ define i64 @f() #0 { 2 +3 top: ┃ top: 3 +4 ret i64 1 ┃ ret i64 1 4 +5 } ┃ } 5 +6 ┃ 6 \ No newline at end of file diff --git a/test/references/saxpy.jl_typed b/test/references/saxpy.jl_typed new file mode 100644 index 0000000..858a51c --- /dev/null +++ b/test/references/saxpy.jl_typed @@ -0,0 +1,55 @@ +CodeInfo( ┃ CodeInfo( +1 ── %1 = Base.arraysize(r, 1)::Int64 ┃ 1 ── %1 = Base.arraysize(r, 1)::Int64 +│ %2 = Base.slt_int(%1, 0)::Bool ┃ │ %2 = Base.slt_int(%1, 0)::Bool +│ %3 = Core.ifelse(%2, 0, %1)::Int64 ┃ │ %3 = Core.ifelse(%2, 0, %1)::Int64 +│ %4 = Base.slt_int(%3, 1)::Bool ⟪┫ +└─── goto #3 if not %4 ⟪╋⟫└─── goto #13 if not true +2 ── goto #4 ⟪┫ +3 ── goto #4 ⟪┫ +4 ┄─ %8 = φ (#2 => true, #3 => false)::Bool ⟪┫ +│ %9 = φ (#3 => 1)::Int64 ⟪┫ +│ %10 = φ (#3 => 1)::Int64 ⟪┫ +│ %11 = Base.not_int(%8)::Bool ⟪┫ +└─── goto #10 if not %11 ⟪╋⟫└─── goto #12 if not %6 +5 ┄─ %13 = φ (#4 => %9, #9 => %25)::Int64 ⟪╋⟫4 ┄─ %9 = φ (#3 => 0, #11 => %28)::Int64 +│ %14 = φ (#4 => %10, #9 => %26)::Int64 ⟪┫ +│ %15 = Base.arrayref(true, x, %13)::Int64 ⟪┫ +│ %16 = Base.mul_int(a, %15)::Int64 ⟪┫ +│ %17 = Base.arrayref(true, y, %13)::Int64 ⟪┫ +│ %18 = Base.add_int(%16, %17)::Int64 ⟪┫ +│ Base.arrayset(true, r, %18, %13)::Vector{Int64…⟪┫ +│ %20 = (%14 === %3)::Bool ⟪┫ +└─── goto #7 if not %20 ⟪┫ +6 ── goto #8 ⟪┫ +7 ── %23 = Base.add_int(%14, 1)::Int64 ⟪┫ +└─── goto #8 ⟪┫ +8 ┄─ %25 = φ (#7 => %23)::Int64 ⟪┫ +│ %26 = φ (#7 => %23)::Int64 ⟪┫ +│ %27 = φ (#6 => true, #7 => false)::Bool ⟪┫ +│ %28 = Base.not_int(%27)::Bool ⟪╋⟫│ %10 = Base.slt_int(%9, %3)::Bool +└─── goto #10 if not %28 ⟪╋⟫└─── goto #12 if not %10 +9 ── goto #5 ⟪╋⟫7 ── goto #9 +10 ┄ return nothing ⟪╋⟫13 ┄ return Main.nothing + ┣⟫│ %4 = %new(Base.OneTo{Int64}, %3)::Base.OneTo{Int64} + ┣⟫2 ── %6 = Base.slt_int(0, %3)::Bool + ┣⟫3 ── nothing::Nothing + ┣⟫5 ── %12 = Base.add_int(%9, 1)::Int64 + ┣⟫└─── goto #9 if not false + ┣⟫6 ── %14 = Base.slt_int(0, %12)::Bool + ┣⟫│ %15 = Base.sle_int(%12, %3)::Bool + ┣⟫│ %16 = Base.and_int(%14, %15)::Bool + ┣⟫└─── goto #8 if not %16 + ┣⟫8 ── invoke Base.throw_boundserror(%4::Base.OneTo{I… + ┣⟫└─── unreachable + ┣⟫9 ┄─ goto #10 + ┣⟫10 ─ goto #11 + ┣⟫11 ─ %23 = Base.arrayref(false, x, %12)::Int64 + ┣⟫│ %24 = Base.mul_int(a, %23)::Int64 + ┣⟫│ %25 = Base.arrayref(false, y, %12)::Int64 + ┣⟫│ %26 = Base.add_int(%24, %25)::Int64 + ┣⟫│ Base.arrayset(false, r, %26, %12)::Vector{Int6… + ┣⟫│ %28 = Base.add_int(%9, 1)::Int64 + ┣⟫│ $(Expr(:loopinfo, Symbol("julia.simdloop"), Sy… + ┣⟫└─── goto #4 + ┣⟫12 ┄ nothing::Nothing +) => Nothing ┃ ) => Nothing \ No newline at end of file diff --git a/test/references/saxpy_COLOR.jl_typed b/test/references/saxpy_COLOR.jl_typed new file mode 100644 index 0000000..3ffce3a --- /dev/null +++ b/test/references/saxpy_COLOR.jl_typed @@ -0,0 +1,55 @@ +CodeInfo(  ┃ CodeInfo( +1 ── %1 = Base.arraysize(r, 1)::Int64  ┃ 1 ── %1 = Base.arraysize(r, 1)::Int64 +│  %2 = Base.slt_int(%1, 0)::Bool  ┃ │  %2 = Base.slt_int(%1, 0)::Bool +│  %3 = Core.ifelse(%2, 0, %1)::Int64  ┃ │  %3 = Core.ifelse(%2, 0, %1)::Int64 +│  %4 = Base.slt_int(%3, 1)::Bool ⟪┫  +└─── goto #3 if not %4 ⟪╋⟫└─── goto #13 if not true +2 ── goto #4 ⟪┫  +3 ── goto #4 ⟪┫  +4 ┄─ %8 = φ (#2 => true, #3 => false)::Bool ⟪┫  +│  %9 = φ (#3 => 1)::Int64 ⟪┫  +│  %10 = φ (#3 => 1)::Int64 ⟪┫  +│  %11 = Base.not_int(%8)::Bool ⟪┫  +└─── goto #10 if not %11 ⟪╋⟫└─── goto #12 if not %6 +5 ┄─ %13 = φ (#4 => %9, #9 => %25)::Int64 ⟪╋⟫4 ┄─ %9  = φ (#3 => 0, #11 => %28)::Int64 +│  %14 = φ (#4 => %10, #9 => %26)::Int64 ⟪┫  +│  %15 = Base.arrayref(true, x, %13)::Int64 ⟪┫  +│  %16 = Base.mul_int(a, %15)::Int64 ⟪┫  +│  %17 = Base.arrayref(true, y, %13)::Int64 ⟪┫  +│  %18 = Base.add_int(%16, %17)::Int64 ⟪┫  +│  Base.arrayset(true, r, %18, %13)::Vector{Int64…⟪┫  +│  %20 = (%14 === %3)::Bool ⟪┫  +└─── goto #7 if not %20 ⟪┫  +6 ── goto #8 ⟪┫  +7 ── %23 = Base.add_int(%14, 1)::Int64 ⟪┫  +└─── goto #8 ⟪┫  +8 ┄─ %25 = φ (#7 => %23)::Int64 ⟪┫  +│  %26 = φ (#7 => %23)::Int64 ⟪┫  +│  %27 = φ (#6 => true, #7 => false)::Bool ⟪┫  +│ %28 = Base.not_int(%27)::Bool ⟪╋⟫│ %10 = Base.slt_int(%9, %3)::Bool +└─── goto #10 if not %28 ⟪╋⟫└─── goto #12 if not %10 +9 ──  goto #5 ⟪╋⟫7 ──  goto #9 +10 ┄  return nothing ⟪╋⟫13 ┄  return Main.nothing +  ┣⟫│  %4 = %new(Base.OneTo{Int64}, %3)::Base.OneTo{Int64} +  ┣⟫2 ── %6 = Base.slt_int(0, %3)::Bool +  ┣⟫3 ── nothing::Nothing +  ┣⟫5 ── %12 = Base.add_int(%9, 1)::Int64 +  ┣⟫└─── goto #9 if not false +  ┣⟫6 ── %14 = Base.slt_int(0, %12)::Bool +  ┣⟫│  %15 = Base.sle_int(%12, %3)::Bool +  ┣⟫│  %16 = Base.and_int(%14, %15)::Bool +  ┣⟫└─── goto #8 if not %16 +  ┣⟫8 ── invoke Base.throw_boundserror(%4::Base.OneTo{I… +  ┣⟫└─── unreachable +  ┣⟫9 ┄─ goto #10 +  ┣⟫10 ─ goto #11 +  ┣⟫11 ─ %23 = Base.arrayref(false, x, %12)::Int64 +  ┣⟫│  %24 = Base.mul_int(a, %23)::Int64 +  ┣⟫│  %25 = Base.arrayref(false, y, %12)::Int64 +  ┣⟫│  %26 = Base.add_int(%24, %25)::Int64 +  ┣⟫│  Base.arrayset(false, r, %26, %12)::Vector{Int6… +  ┣⟫│  %28 = Base.add_int(%9, 1)::Int64 +  ┣⟫│  $(Expr(:loopinfo, Symbol("julia.simdloop"), Sy… +  ┣⟫└─── goto #4 +  ┣⟫12 ┄ nothing::Nothing +) => Nothing  ┃ ) => Nothing \ No newline at end of file diff --git a/test/references/saxpy_x86.S b/test/references/saxpy_x86.S new file mode 100644 index 0000000..91d1cac --- /dev/null +++ b/test/references/saxpy_x86.S @@ -0,0 +1,379 @@ + .text ┃ .text + .file "saxpy" ⟪╋⟫ .file "saxpy_simd" + .globl saxpy # -- Begin function sa…⟪╋⟫ .globl saxpy_simd # -- Begin function sa… + .p2align 4, 0x90 ┃ .p2align 4, 0x90 + .type saxpy,@function ⟪╋⟫ .type saxpy_simd,@function +saxpy: # @saxpy ⟪╋⟫saxpy_simd: # @saxpy_simd + .cfi_startproc ┃ .cfi_startproc +# %bb.0: # %top ┃ # %bb.0: # %top + push rbp ┃ push rbp + .cfi_def_cfa_offset 16 ┃ .cfi_def_cfa_offset 16 + .cfi_offset rbp, -16 ┃ .cfi_offset rbp, -16 + mov rbp, rsp ┃ mov rbp, rsp + .cfi_def_cfa_register rbp ┃ .cfi_def_cfa_register rbp + push r15 ⟪┫ + push r14 ⟪┫ + push r13 ⟪┫ + push r12 ⟪┫ + push rsi ┃ push rsi + push rdi ┃ push rdi + push rbx ⟪┫ + sub rsp, 72 ⟪╋⟫ sub rsp, 32 + vmovdqa xmmword ptr [rbp - 80], xmm7 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 32], xmm7 # 16-byte … + vmovdqa xmmword ptr [rbp - 96], xmm6 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 48], xmm6 # 16-byte … + .cfi_offset rbx, -72 ⟪╋⟫ .cfi_offset rdi, -32 + .cfi_offset rdi, -64 ⟪╋⟫ .cfi_offset rsi, -24 + .cfi_offset rsi, -56 ⟪╋⟫ .cfi_offset xmm6, -64 + .cfi_offset r12, -48 ⟪╋⟫ .cfi_offset xmm7, -48 + .cfi_offset r13, -40 ⟪┫ + .cfi_offset r14, -32 ⟪┫ + .cfi_offset r15, -24 ⟪┫ + .cfi_offset xmm6, -112 ⟪┫ + .cfi_offset xmm7, -96 ⟪┫ + mov r13, r8 ⟪┫ + mov r8, qword ptr [rcx + 8] ⟪╋⟫ mov r10, qword ptr [rcx + 8] + test r8, r8 ⟪╋⟫ test r10, r10 + je .LBB0_15 ⟪╋⟫ je .LBB0_11 +# %bb.1: # %L13.preheader ⟪╋⟫# %bb.1: # %L12.lr.ph + mov r15, qword ptr [r13] ⟪╋⟫ mov r8, qword ptr [r8] + mov r11, qword ptr [r13 + 8] ⟪╋⟫ mov r9, qword ptr [r9] + mov r10, qword ptr [r9] ⟪╋⟫ mov r11, qword ptr [rcx] + mov r14, qword ptr [r9 + 8] ⟪┫ + mov qword ptr [rbp - 120], rcx # 8-byte Spill ⟪┫ + mov rbx, qword ptr [rcx] ⟪┫ + cmp r11, r8 ⟪╋⟫ cmp r10, 16 + mov rax, r8 ⟪┫ + cmovb rax, r11 ⟪┫ + mov rsi, r14 ⟪┫ + sar rsi, 63 ⟪┫ + and rsi, r14 ⟪┫ + mov rdi, r14 ⟪┫ + sub rdi, rsi ⟪┫ + test rsi, rsi ⟪┫ + mov r12, -1 ⟪┫ + cmovns r12, rsi ⟪┫ + inc r12 ⟪┫ + imul r12, rdi ⟪┫ + cmp rax, r12 ⟪┫ + cmovb r12, rax ⟪┫ + cmp r12, r8 ⟪┫ + cmovae r12, r8 ⟪┫ + mov edi, 1 ⟪┫ + test r12, r12 ⟪┫ + je .LBB0_2 ⟪╋⟫ jae .LBB0_3 +# %bb.3: # %idxend21.prehe…⟪┫ + cmp r12, 16 ⟪┫ + jae .LBB0_4 ⟪┫ +.LBB0_6: # %scalar.ph ⟪┫ + dec rdi ⟪┫ + mov rax, rdi ⟪┫ + .p2align 4, 0x90 ⟪┫ +.LBB0_7: # %idxend21 ⟪┫ + # =>This Inner Lo…⟪┫ + mov rcx, qword ptr [r15 + 8*rax] ⟪┫ + imul rcx, rdx ⟪┫ + add rcx, qword ptr [r10 + 8*rax] ⟪┫ + mov qword ptr [rbx + 8*rax], rcx ⟪┫ + inc rax ⟪┫ + cmp rax, r12 ⟪┫ + jb .LBB0_7 ⟪┫ +# %bb.8: # %main.exit.sele…⟪┫ + lea rdi, [rax + 1] ⟪┫ +.LBB0_9: # %main.exit.sele…⟪┫ + cmp rax, r8 ⟪┫ + mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫ + jae .LBB0_15 ⟪┫ + ┣⟫# %bb.2: + ┣⟫ xor eax, eax + jmp .LBB0_10 ┃ jmp .LBB0_10 +.LBB0_2: ⟪┫ + mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫ +.LBB0_10: # %main.pseudo.ex…⟪╋⟫.LBB0_3: # %vector.ph + lea rax, [r8 + 1] ⟪┫ + .p2align 4, 0x90 ⟪┫ +.LBB0_11: # %L13.postloop ⟪┫ + # =>This Inner Lo…⟪┫ + lea rsi, [rdi - 1] ⟪┫ + cmp rsi, r11 ⟪┫ + jae .LBB0_25 ⟪┫ +# %bb.12: # %idxend.postloo…⟪┫ + # in Loop: Head…⟪┫ + cmp rsi, r14 ⟪┫ + jae .LBB0_26 ⟪┫ +# %bb.13: # %idxend12.postl…⟪┫ + # in Loop: Head…⟪┫ + cmp rsi, r8 ⟪┫ + jae .LBB0_27 ⟪┫ +# %bb.14: # %idxend21.postl…⟪┫ + # in Loop: Head…⟪┫ + mov rsi, qword ptr [r15 + 8*rdi - 8] ⟪┫ + imul rsi, rdx ⟪┫ + add rsi, qword ptr [r10 + 8*rdi - 8] ⟪┫ + mov qword ptr [rbx + 8*rdi - 8], rsi ⟪┫ + inc rdi ⟪┫ + cmp rax, rdi ⟪┫ + jne .LBB0_11 ⟪┫ +.LBB0_15: # %L31 ⟪┫ + vmovaps xmm6, xmmword ptr [rbp - 96] # 16-byte …⟪┫ + vmovaps xmm7, xmmword ptr [rbp - 80] # 16-byte …⟪┫ + lea rsp, [rbp - 56] ⟪┫ + pop rbx ⟪┫ + pop rdi ⟪┫ + pop rsi ⟪┫ + pop r12 ⟪┫ + pop r13 ⟪┫ + pop r14 ⟪┫ + pop r15 ⟪┫ + pop rbp ⟪┫ + vzeroupper ⟪┫ + ret ⟪┫ +.LBB0_4: # %vector.memchec…⟪┫ + mov qword ptr [rbp - 104], r13 # 8-byte Spill ⟪┫ + mov qword ptr [rbp - 112], r9 # 8-byte Spill ⟪┫ + lea rsi, [rbx + 8*r12] ⟪┫ + lea rax, [r15 + 8*r12] ⟪┫ + lea r13, [r10 + 8*r12] ⟪┫ + cmp rbx, rax ⟪┫ + setb r9b ⟪┫ + cmp r15, rsi ⟪┫ + setb cl ⟪┫ + cmp rbx, r13 ⟪┫ + setb al ⟪┫ + cmp r10, rsi ⟪┫ + setb sil ⟪┫ + test r9b, cl ⟪┫ + jne .LBB0_5 ⟪┫ +# %bb.16: # %vector.memchec…⟪┫ + and al, sil ⟪┫ + mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫ + mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫ + jne .LBB0_6 ⟪┫ +# %bb.17: # %vector.ph ⟪┫ + movabs rsi, 9223372036854775792 ⟪┫ + and rsi, r12 ⟪┫ + ┣⟫ mov rax, r10 + ┣⟫ and rax, -16 + vmovq xmm0, rdx ┃ vmovq xmm0, rdx + vpbroadcastq ymm0, xmm0 ┃ vpbroadcastq ymm0, xmm0 + lea rcx, [rsi - 16] ⟪╋⟫ lea rcx, [rax - 16] + mov rax, rcx ⟪╋⟫ mov rsi, rcx + shr rax, 4 ⟪╋⟫ shr rsi, 4 + inc rax ⟪╋⟫ inc rsi + vpsrlq ymm1, ymm0, 32 ┃ vpsrlq ymm1, ymm0, 32 + test rcx, rcx ┃ test rcx, rcx + je .LBB0_18 ⟪╋⟫ je .LBB0_4 +# %bb.19: # %vector.ph.new ⟪╋⟫# %bb.5: # %vector.ph.new + mov r13, rax ⟪┫ + and r13, -2 ⟪╋⟫ and rdi, -2 + xor edi, edi ⟪┫ + ┣⟫ mov rdi, rsi + ┣⟫ xor ecx, ecx + .p2align 4, 0x90 ┃ .p2align 4, 0x90 +.LBB0_20: # %vector.body ⟪╋⟫.LBB0_6: # %vector.body + # =>This Inner Lo… ┃ # =>This Inner Lo… + vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] + vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] + vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] + vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] + vpmuludq ymm6, ymm2, ymm1 ┃ vpmuludq ymm6, ymm2, ymm1 + vpsrlq ymm7, ymm2, 32 ┃ vpsrlq ymm7, ymm2, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm2, ymm2, ymm0 ┃ vpmuludq ymm2, ymm2, ymm0 + vpaddq ymm2, ymm2, ymm6 ┃ vpaddq ymm2, ymm2, ymm6 + vpmuludq ymm6, ymm3, ymm1 ┃ vpmuludq ymm6, ymm3, ymm1 + vpsrlq ymm7, ymm3, 32 ┃ vpsrlq ymm7, ymm3, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm3, ymm3, ymm0 ┃ vpmuludq ymm3, ymm3, ymm0 + vpaddq ymm3, ymm3, ymm6 ┃ vpaddq ymm3, ymm3, ymm6 + vpmuludq ymm6, ymm4, ymm1 ┃ vpmuludq ymm6, ymm4, ymm1 + vpsrlq ymm7, ymm4, 32 ┃ vpsrlq ymm7, ymm4, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm4, ymm4, ymm0 ┃ vpmuludq ymm4, ymm4, ymm0 + vpaddq ymm4, ymm4, ymm6 ┃ vpaddq ymm4, ymm4, ymm6 + vpmuludq ymm6, ymm5, ymm1 ┃ vpmuludq ymm6, ymm5, ymm1 + vpsrlq ymm7, ymm5, 32 ┃ vpsrlq ymm7, ymm5, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm5, ymm5, ymm0 ┃ vpmuludq ymm5, ymm5, ymm0 + vpaddq ymm5, ymm5, ymm6 ┃ vpaddq ymm5, ymm5, ymm6 + vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx] + vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 32] + vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 64] + vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 96] + vmovdqu ymmword ptr [rbx + 8*rdi], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm2 + vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm3 + vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm4 + vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm5 + vmovdqu ymm2, ymmword ptr [r15 + 8*rdi + 128] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx + 128] + vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 160] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 160] + vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 192] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 192] + vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 224] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 224] + vpmuludq ymm6, ymm2, ymm1 ┃ vpmuludq ymm6, ymm2, ymm1 + vpsrlq ymm7, ymm2, 32 ┃ vpsrlq ymm7, ymm2, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm2, ymm2, ymm0 ┃ vpmuludq ymm2, ymm2, ymm0 + vpaddq ymm2, ymm2, ymm6 ┃ vpaddq ymm2, ymm2, ymm6 + vpmuludq ymm6, ymm3, ymm1 ┃ vpmuludq ymm6, ymm3, ymm1 + vpsrlq ymm7, ymm3, 32 ┃ vpsrlq ymm7, ymm3, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm3, ymm3, ymm0 ┃ vpmuludq ymm3, ymm3, ymm0 + vpaddq ymm3, ymm3, ymm6 ┃ vpaddq ymm3, ymm3, ymm6 + vpmuludq ymm6, ymm4, ymm1 ┃ vpmuludq ymm6, ymm4, ymm1 + vpsrlq ymm7, ymm4, 32 ┃ vpsrlq ymm7, ymm4, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm4, ymm4, ymm0 ┃ vpmuludq ymm4, ymm4, ymm0 + vpaddq ymm4, ymm4, ymm6 ┃ vpaddq ymm4, ymm4, ymm6 + vpmuludq ymm6, ymm5, ymm1 ┃ vpmuludq ymm6, ymm5, ymm1 + vpsrlq ymm7, ymm5, 32 ┃ vpsrlq ymm7, ymm5, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm5, ymm5, ymm0 ┃ vpmuludq ymm5, ymm5, ymm0 + vpaddq ymm5, ymm5, ymm6 ┃ vpaddq ymm5, ymm5, ymm6 + vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi + 128] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx + 128] + vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 160] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 160] + vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 192] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 192] + vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 224] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 224] + vmovdqu ymmword ptr [rbx + 8*rdi + 128], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 128], ymm2 + vmovdqu ymmword ptr [rbx + 8*rdi + 160], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 160], ymm3 + vmovdqu ymmword ptr [rbx + 8*rdi + 192], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 192], ymm4 + vmovdqu ymmword ptr [rbx + 8*rdi + 224], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 224], ymm5 + add rdi, 32 ⟪╋⟫ add rcx, 32 + add r13, -2 ⟪╋⟫ add rdi, -2 + jne .LBB0_20 ⟪╋⟫ jne .LBB0_6 + jmp .LBB0_21 ⟪┫ +.LBB0_5: ⟪┫ + mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫ + mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫ + jmp .LBB0_6 ⟪┫ +.LBB0_18: ⟪┫ + xor edi, edi ⟪┫ +.LBB0_21: # %middle.block.u…⟪╋⟫# %bb.7: # %middle.block.u… + test al, 1 ⟪╋⟫ test sil, 1 + mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫ + je .LBB0_23 ⟪╋⟫ je .LBB0_9 +# %bb.22: # %vector.body.ep…⟪╋⟫.LBB0_8: # %vector.body.ep… + vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] + vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] + vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] + vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] + vpmuludq ymm6, ymm2, ymm1 ┃ vpmuludq ymm6, ymm2, ymm1 + vpsrlq ymm7, ymm2, 32 ┃ vpsrlq ymm7, ymm2, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm2, ymm2, ymm0 ┃ vpmuludq ymm2, ymm2, ymm0 + vpaddq ymm2, ymm2, ymm6 ┃ vpaddq ymm2, ymm2, ymm6 + vpmuludq ymm6, ymm3, ymm1 ┃ vpmuludq ymm6, ymm3, ymm1 + vpsrlq ymm7, ymm3, 32 ┃ vpsrlq ymm7, ymm3, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm3, ymm3, ymm0 ┃ vpmuludq ymm3, ymm3, ymm0 + vpaddq ymm3, ymm3, ymm6 ┃ vpaddq ymm3, ymm3, ymm6 + vpmuludq ymm6, ymm4, ymm1 ┃ vpmuludq ymm6, ymm4, ymm1 + vpsrlq ymm7, ymm4, 32 ┃ vpsrlq ymm7, ymm4, 32 + vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 + vpmuludq ymm4, ymm4, ymm0 ┃ vpmuludq ymm4, ymm4, ymm0 + vpaddq ymm4, ymm4, ymm6 ┃ vpaddq ymm4, ymm4, ymm6 + vpmuludq ymm1, ymm5, ymm1 ┃ vpmuludq ymm1, ymm5, ymm1 + vpsrlq ymm6, ymm5, 32 ┃ vpsrlq ymm6, ymm5, 32 + vpmuludq ymm6, ymm6, ymm0 ┃ vpmuludq ymm6, ymm6, ymm0 + vpaddq ymm1, ymm1, ymm6 ┃ vpaddq ymm1, ymm1, ymm6 + vpsllq ymm1, ymm1, 32 ┃ vpsllq ymm1, ymm1, 32 + vpmuludq ymm0, ymm5, ymm0 ┃ vpmuludq ymm0, ymm5, ymm0 + vpaddq ymm0, ymm0, ymm1 ┃ vpaddq ymm0, ymm0, ymm1 + vpaddq ymm1, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm1, ymm2, ymmword ptr [r9 + 8*rcx] + vpaddq ymm2, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm2, ymm3, ymmword ptr [r9 + 8*rcx + 32] + vpaddq ymm3, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm3, ymm4, ymmword ptr [r9 + 8*rcx + 64] + vpaddq ymm0, ymm0, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm0, ymm0, ymmword ptr [r9 + 8*rcx + 96] + vmovdqu ymmword ptr [rbx + 8*rdi], ymm1 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm1 + vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm2 + vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm3 + vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm0 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm0 +.LBB0_23: # %middle.block ⟪╋⟫.LBB0_9: # %middle.block + lea rdi, [rsi + 1] ⟪┫ + cmp r12, rsi ⟪╋⟫ cmp r10, rax + jne .LBB0_6 ⟪╋⟫ je .LBB0_11 +# %bb.24: ⟪┫ + mov rax, r12 ⟪┫ + jmp .LBB0_9 ⟪┫ +.LBB0_25: # %oob ⟪╋⟫.LBB0_10: # %L12 + mov eax, 16 ⟪┫ + movabs r11, offset ___chkstk_ms ⟪┫ + call r11 ⟪┫ + sub rsp, rax ⟪┫ + mov rdx, rsp ⟪┫ + mov qword ptr [rdx], rdi ⟪┫ + sub rsp, 32 ⟪┫ + movabs rax, offset ijl_bounds_error_ints ⟪┫ + mov r8d, 1 ⟪┫ + mov rcx, r13 ⟪┫ + vzeroupper ⟪┫ + call rax ⟪┫ +.LBB0_26: # %oob10 ⟪╋⟫.LBB0_11: # %L32 + mov eax, 16 ⟪┫ + movabs r11, offset ___chkstk_ms ⟪┫ + call r11 ⟪┫ + sub rsp, rax ⟪┫ + mov rdx, rsp ⟪┫ + mov qword ptr [rdx], rdi ⟪┫ + sub rsp, 32 ⟪╋⟫ add rsp, 32 + movabs rax, offset ijl_bounds_error_ints ⟪┫ + mov r8d, 1 ⟪┫ + mov rcx, r9 ⟪┫ + vzeroupper ⟪┫ + call rax ⟪┫ +.LBB0_27: # %oob19 ⟪┫ + mov eax, 16 ⟪┫ + movabs r11, offset ___chkstk_ms ⟪┫ + call r11 ⟪┫ + sub rsp, rax ⟪┫ + mov rdx, rsp ⟪┫ + mov qword ptr [rdx], rdi ⟪┫ + sub rsp, 32 ⟪┫ + movabs rax, offset ijl_bounds_error_ints ⟪┫ + mov r8d, 1 ⟪┫ + ┣⟫ .p2align 4, 0x90 + ┣⟫ # =>This Inner Lo… + ┣⟫ mov rcx, qword ptr [r8 + 8*rax] + ┣⟫ imul rcx, rdx + ┣⟫ add rcx, qword ptr [r9 + 8*rax] + ┣⟫ mov qword ptr [r11 + 8*rax], rcx + ┣⟫ inc rax + ┣⟫ cmp r10, rax + ┣⟫ jne .LBB0_10 + ┣⟫ vmovaps xmm6, xmmword ptr [rbp - 48] # 16-byte … + ┣⟫ vmovaps xmm7, xmmword ptr [rbp - 32] # 16-byte … + ┣⟫ pop rdi + ┣⟫ pop rsi + ┣⟫ pop rbp + vzeroupper ┃ vzeroupper + call rax ⟪┫ + ┣⟫ ret + ┣⟫.LBB0_4: + ┣⟫ xor ecx, ecx + ┣⟫ test sil, 1 + ┣⟫ je .LBB0_9 + ┣⟫ jmp .LBB0_8 +.Lfunc_end0: ┃ .Lfunc_end0: + .size saxpy, .Lfunc_end0-saxpy ⟪╋⟫ .size saxpy_simd, .Lfunc_end0-saxpy_simd + .cfi_endproc ┃ .cfi_endproc + # -- End function ┃ # -- End function + .section ".note.GNU-stack","",@progbits ┃ .section ".note.GNU-stack","",@progbits + ┃ \ No newline at end of file diff --git a/test/references/saxpy_x86.ll b/test/references/saxpy_x86.ll new file mode 100644 index 0000000..0e90e52 --- /dev/null +++ b/test/references/saxpy_x86.ll @@ -0,0 +1,324 @@ +; Function Attrs: uwtable ┃ ; Function Attrs: uwtable +define void @saxpy({}* noundef nonnull align 16 dereferen…⟪╋⟫define void @saxpy_simd({}* noundef nonnull align 16 dere… +top: ┃ top: + %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* ┃ %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* + %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… ┃ %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… + %arraylen = load i64, i64* %arraylen_ptr, align 8 ┃ %arraylen = load i64, i64* %arraylen_ptr, align 8 + %.not.not = icmp eq i64 %arraylen, 0 ⟪╋⟫ %.not = icmp eq i64 %arraylen, 0 + br i1 %.not.not, label %L31, label %L13.preheader ⟪╋⟫ br i1 %.not, label %L32, label %L12.lr.ph + ⟪┫ +L13.preheader: ; preds…⟪┫ + %5 = bitcast {}* %2 to { i8*, i64, i16, i16, i32 }* ⟪┫ + %arraylen_ptr5 = getelementptr inbounds { i8*, i64, i16…⟪┫ + %arraylen6 = load i64, i64* %arraylen_ptr5, align 8 ⟪┫ + %6 = bitcast {}* %3 to { i8*, i64, i16, i16, i32 }* ⟪┫ + %arraylen_ptr7 = getelementptr inbounds { i8*, i64, i16…⟪┫ + %arraylen8 = load i64, i64* %arraylen_ptr7, align 8 ⟪┫ + %7 = bitcast {}* %2 to i64** ⟪┫ + %arrayptr29 = load i64*, i64** %7, align 8 ⟪┫ + %8 = bitcast {}* %3 to i64** ⟪┫ + %arrayptr1430 = load i64*, i64** %8, align 8 ⟪┫ + %9 = bitcast {}* %0 to i64** ⟪┫ + %arrayptr2331 = load i64*, i64** %9, align 8 ⟪┫ + %umin = call i64 @llvm.umin.i64(i64 %arraylen6, i64 %ar…⟪┫ + %smin = call i64 @llvm.smin.i64(i64 %arraylen8, i64 0) ⟪┫ + %10 = sub i64 %arraylen8, %smin ⟪┫ + %smax = call i64 @llvm.smax.i64(i64 %smin, i64 -1) ⟪┫ + %11 = add nsw i64 %smax, 1 ⟪┫ + %12 = mul nuw nsw i64 %10, %11 ⟪┫ + %umin36 = call i64 @llvm.umin.i64(i64 %umin, i64 %12) ⟪┫ + %exit.mainloop.at = call i64 @llvm.umin.i64(i64 %umin36…⟪┫ + %.not = icmp eq i64 %exit.mainloop.at, 0 ⟪┫ + br i1 %.not, label %main.pseudo.exit, label %idxend21.p…⟪┫ + ⟪┫ +idxend21.preheader: ; preds…⟪┫ + %min.iters.check = icmp ult i64 %exit.mainloop.at, 16 ⟪┫ + br i1 %min.iters.check, label %scalar.ph, label %vector…⟪┫ + ┃ +vector.memcheck: ; preds…⟪┫ + %scevgep = getelementptr i64, i64* %arrayptr2331, i64 %…⟪┫ + %scevgep58 = getelementptr i64, i64* %arrayptr29, i64 %…⟪┫ + %scevgep61 = getelementptr i64, i64* %arrayptr1430, i64…⟪┫ + %bound0 = icmp ult i64* %arrayptr2331, %scevgep58 ⟪┫ + %bound1 = icmp ult i64* %arrayptr29, %scevgep ⟪┫ + %found.conflict = and i1 %bound0, %bound1 ⟪┫ + %bound063 = icmp ult i64* %arrayptr2331, %scevgep61 ⟪┫ + %bound164 = icmp ult i64* %arrayptr1430, %scevgep ⟪┫ + %found.conflict65 = and i1 %bound063, %bound164 ⟪┫ + %conflict.rdx = or i1 %found.conflict, %found.conflict6…⟪┫ + br i1 %conflict.rdx, label %scalar.ph, label %vector.ph ⟪╋⟫ br i1 %min.iters.check, label %scalar.ph, label %vector… + ┣⟫L12.lr.ph: ; preds… + ┣⟫ %5 = bitcast {}* %2 to i64** + ┣⟫ %arrayptr8 = load i64*, i64** %5, align 8 + ┣⟫ %6 = bitcast {}* %3 to i64** + ┣⟫ %arrayptr29 = load i64*, i64** %6, align 8 + ┣⟫ %7 = bitcast {}* %0 to i64** + ┣⟫ %arrayptr510 = load i64*, i64** %7, align 8 + ┣⟫ %min.iters.check = icmp ult i64 %arraylen, 16 + ┃ +vector.ph: ; preds…⟪╋⟫vector.ph: ; preds… + %n.vec = and i64 %exit.mainloop.at, 9223372036854775792 ⟪╋⟫ %n.vec = and i64 %arraylen, 9223372036854775792 + %ind.end = or i64 %n.vec, 1 ⟪┫ + %broadcast.splatinsert = insertelement <4 x i64> poison… ┃ %broadcast.splatinsert = insertelement <4 x i64> poison… + %broadcast.splat = shufflevector <4 x i64> %broadcast.s… ┃ %broadcast.splat = shufflevector <4 x i64> %broadcast.s… + %13 = add nsw i64 %n.vec, -16 ⟪╋⟫ %8 = add nsw i64 %n.vec, -16 + %14 = lshr exact i64 %13, 4 ⟪╋⟫ %9 = lshr exact i64 %8, 4 + %15 = add nuw nsw i64 %14, 1 ⟪╋⟫ %10 = add nuw nsw i64 %9, 1 + %xtraiter = and i64 %15, 1 ⟪╋⟫ %xtraiter = and i64 %10, 1 + %16 = icmp eq i64 %13, 0 ⟪╋⟫ %11 = icmp eq i64 %8, 0 + br i1 %16, label %middle.block.unr-lcssa, label %vector…⟪╋⟫ br i1 %11, label %middle.block.unr-lcssa, label %vector… + ┃ +vector.ph.new: ; preds… ┃ vector.ph.new: ; preds… + %unroll_iter = and i64 %15, 2305843009213693950 ⟪╋⟫ %unroll_iter = and i64 %10, 2305843009213693950 + br label %vector.body ┃ br label %vector.body + ┃ +vector.body: ; preds… ┃ vector.body: ; preds… + %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… ┃ %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… + %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… ┃ %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… + %17 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %12 = getelementptr inbounds i64, i64* %arrayptr8, i64 … + %18 = bitcast i64* %17 to <4 x i64>* ⟪╋⟫ %13 = bitcast i64* %12 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %18, align 8 ⟪╋⟫ %wide.load = load <4 x i64>, <4 x i64>* %13, align 8 + %19 = getelementptr inbounds i64, i64* %17, i64 4 ⟪╋⟫ %14 = getelementptr inbounds i64, i64* %12, i64 4 + %20 = bitcast i64* %19 to <4 x i64>* ⟪╋⟫ %15 = bitcast i64* %14 to <4 x i64>* + %wide.load66 = load <4 x i64>, <4 x i64>* %20, align 8 ⟪╋⟫ %wide.load13 = load <4 x i64>, <4 x i64>* %15, align 8 + %21 = getelementptr inbounds i64, i64* %17, i64 8 ⟪╋⟫ %16 = getelementptr inbounds i64, i64* %12, i64 8 + %22 = bitcast i64* %21 to <4 x i64>* ⟪╋⟫ %17 = bitcast i64* %16 to <4 x i64>* + %wide.load67 = load <4 x i64>, <4 x i64>* %22, align 8 ⟪╋⟫ %wide.load14 = load <4 x i64>, <4 x i64>* %17, align 8 + %23 = getelementptr inbounds i64, i64* %17, i64 12 ⟪╋⟫ %18 = getelementptr inbounds i64, i64* %12, i64 12 + %24 = bitcast i64* %23 to <4 x i64>* ⟪╋⟫ %19 = bitcast i64* %18 to <4 x i64>* + %wide.load68 = load <4 x i64>, <4 x i64>* %24, align 8 ⟪╋⟫ %wide.load15 = load <4 x i64>, <4 x i64>* %19, align 8 + %25 = mul <4 x i64> %wide.load, %broadcast.splat ⟪╋⟫ %20 = mul <4 x i64> %wide.load, %broadcast.splat + %26 = mul <4 x i64> %wide.load66, %broadcast.splat ⟪╋⟫ %21 = mul <4 x i64> %wide.load13, %broadcast.splat + %27 = mul <4 x i64> %wide.load67, %broadcast.splat ⟪╋⟫ %22 = mul <4 x i64> %wide.load14, %broadcast.splat + %28 = mul <4 x i64> %wide.load68, %broadcast.splat ⟪╋⟫ %23 = mul <4 x i64> %wide.load15, %broadcast.splat + %29 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %24 = getelementptr inbounds i64, i64* %arrayptr29, i64… + %30 = bitcast i64* %29 to <4 x i64>* ⟪╋⟫ %25 = bitcast i64* %24 to <4 x i64>* + %wide.load75 = load <4 x i64>, <4 x i64>* %30, align 8 ⟪╋⟫ %wide.load22 = load <4 x i64>, <4 x i64>* %25, align 8 + %31 = getelementptr inbounds i64, i64* %29, i64 4 ⟪╋⟫ %26 = getelementptr inbounds i64, i64* %24, i64 4 + %32 = bitcast i64* %31 to <4 x i64>* ⟪╋⟫ %27 = bitcast i64* %26 to <4 x i64>* + %wide.load76 = load <4 x i64>, <4 x i64>* %32, align 8 ⟪╋⟫ %wide.load23 = load <4 x i64>, <4 x i64>* %27, align 8 + %33 = getelementptr inbounds i64, i64* %29, i64 8 ⟪╋⟫ %28 = getelementptr inbounds i64, i64* %24, i64 8 + %34 = bitcast i64* %33 to <4 x i64>* ⟪╋⟫ %29 = bitcast i64* %28 to <4 x i64>* + %wide.load77 = load <4 x i64>, <4 x i64>* %34, align 8 ⟪╋⟫ %wide.load24 = load <4 x i64>, <4 x i64>* %29, align 8 + %35 = getelementptr inbounds i64, i64* %29, i64 12 ⟪╋⟫ %30 = getelementptr inbounds i64, i64* %24, i64 12 + %36 = bitcast i64* %35 to <4 x i64>* ⟪╋⟫ %31 = bitcast i64* %30 to <4 x i64>* + %wide.load78 = load <4 x i64>, <4 x i64>* %36, align 8 ⟪╋⟫ %wide.load25 = load <4 x i64>, <4 x i64>* %31, align 8 + %37 = add <4 x i64> %wide.load75, %25 ⟪╋⟫ %32 = add <4 x i64> %wide.load22, %20 + %38 = add <4 x i64> %wide.load76, %26 ⟪╋⟫ %33 = add <4 x i64> %wide.load23, %21 + %39 = add <4 x i64> %wide.load77, %27 ⟪╋⟫ %34 = add <4 x i64> %wide.load24, %22 + %40 = add <4 x i64> %wide.load78, %28 ⟪╋⟫ %35 = add <4 x i64> %wide.load25, %23 + %41 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %36 = getelementptr inbounds i64, i64* %arrayptr510, i6… + %42 = bitcast i64* %41 to <4 x i64>* ⟪╋⟫ %37 = bitcast i64* %36 to <4 x i64>* + store <4 x i64> %37, <4 x i64>* %42, align 8 ⟪╋⟫ store <4 x i64> %32, <4 x i64>* %37, align 8 + %43 = getelementptr inbounds i64, i64* %41, i64 4 ⟪╋⟫ %38 = getelementptr inbounds i64, i64* %36, i64 4 + %44 = bitcast i64* %43 to <4 x i64>* ⟪╋⟫ %39 = bitcast i64* %38 to <4 x i64>* + store <4 x i64> %38, <4 x i64>* %44, align 8 ⟪╋⟫ store <4 x i64> %33, <4 x i64>* %39, align 8 + %45 = getelementptr inbounds i64, i64* %41, i64 8 ⟪╋⟫ %40 = getelementptr inbounds i64, i64* %36, i64 8 + %46 = bitcast i64* %45 to <4 x i64>* ⟪╋⟫ %41 = bitcast i64* %40 to <4 x i64>* + store <4 x i64> %39, <4 x i64>* %46, align 8 ⟪╋⟫ store <4 x i64> %34, <4 x i64>* %41, align 8 + %47 = getelementptr inbounds i64, i64* %41, i64 12 ⟪╋⟫ %42 = getelementptr inbounds i64, i64* %36, i64 12 + %48 = bitcast i64* %47 to <4 x i64>* ⟪╋⟫ %43 = bitcast i64* %42 to <4 x i64>* + store <4 x i64> %40, <4 x i64>* %48, align 8 ⟪╋⟫ store <4 x i64> %35, <4 x i64>* %43, align 8 + %index.next = or i64 %index, 16 ┃ %index.next = or i64 %index, 16 + %49 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %44 = getelementptr inbounds i64, i64* %arrayptr8, i64 … + %50 = bitcast i64* %49 to <4 x i64>* ⟪╋⟫ %45 = bitcast i64* %44 to <4 x i64>* + %wide.load.1 = load <4 x i64>, <4 x i64>* %50, align 8 ⟪╋⟫ %wide.load.1 = load <4 x i64>, <4 x i64>* %45, align 8 + %51 = getelementptr inbounds i64, i64* %49, i64 4 ⟪╋⟫ %46 = getelementptr inbounds i64, i64* %44, i64 4 + %52 = bitcast i64* %51 to <4 x i64>* ⟪╋⟫ %47 = bitcast i64* %46 to <4 x i64>* + %wide.load66.1 = load <4 x i64>, <4 x i64>* %52, align …⟪╋⟫ %wide.load13.1 = load <4 x i64>, <4 x i64>* %47, align … + %53 = getelementptr inbounds i64, i64* %49, i64 8 ⟪╋⟫ %48 = getelementptr inbounds i64, i64* %44, i64 8 + %54 = bitcast i64* %53 to <4 x i64>* ⟪╋⟫ %49 = bitcast i64* %48 to <4 x i64>* + %wide.load67.1 = load <4 x i64>, <4 x i64>* %54, align …⟪╋⟫ %wide.load14.1 = load <4 x i64>, <4 x i64>* %49, align … + %55 = getelementptr inbounds i64, i64* %49, i64 12 ⟪╋⟫ %50 = getelementptr inbounds i64, i64* %44, i64 12 + %56 = bitcast i64* %55 to <4 x i64>* ⟪╋⟫ %51 = bitcast i64* %50 to <4 x i64>* + %wide.load68.1 = load <4 x i64>, <4 x i64>* %56, align …⟪╋⟫ %wide.load15.1 = load <4 x i64>, <4 x i64>* %51, align … + %57 = mul <4 x i64> %wide.load.1, %broadcast.splat ⟪╋⟫ %52 = mul <4 x i64> %wide.load.1, %broadcast.splat + %58 = mul <4 x i64> %wide.load66.1, %broadcast.splat ⟪╋⟫ %53 = mul <4 x i64> %wide.load13.1, %broadcast.splat + %59 = mul <4 x i64> %wide.load67.1, %broadcast.splat ⟪╋⟫ %54 = mul <4 x i64> %wide.load14.1, %broadcast.splat + %60 = mul <4 x i64> %wide.load68.1, %broadcast.splat ⟪╋⟫ %55 = mul <4 x i64> %wide.load15.1, %broadcast.splat + %61 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %56 = getelementptr inbounds i64, i64* %arrayptr29, i64… + %62 = bitcast i64* %61 to <4 x i64>* ⟪╋⟫ %57 = bitcast i64* %56 to <4 x i64>* + %wide.load75.1 = load <4 x i64>, <4 x i64>* %62, align …⟪╋⟫ %wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align … + %63 = getelementptr inbounds i64, i64* %61, i64 4 ⟪╋⟫ %58 = getelementptr inbounds i64, i64* %56, i64 4 + %64 = bitcast i64* %63 to <4 x i64>* ⟪╋⟫ %59 = bitcast i64* %58 to <4 x i64>* + %wide.load76.1 = load <4 x i64>, <4 x i64>* %64, align …⟪╋⟫ %wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align … + %65 = getelementptr inbounds i64, i64* %61, i64 8 ⟪╋⟫ %60 = getelementptr inbounds i64, i64* %56, i64 8 + %66 = bitcast i64* %65 to <4 x i64>* ⟪╋⟫ %61 = bitcast i64* %60 to <4 x i64>* + %wide.load77.1 = load <4 x i64>, <4 x i64>* %66, align …⟪╋⟫ %wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align … + %67 = getelementptr inbounds i64, i64* %61, i64 12 ⟪╋⟫ %62 = getelementptr inbounds i64, i64* %56, i64 12 + %68 = bitcast i64* %67 to <4 x i64>* ⟪╋⟫ %63 = bitcast i64* %62 to <4 x i64>* + %wide.load78.1 = load <4 x i64>, <4 x i64>* %68, align …⟪╋⟫ %wide.load25.1 = load <4 x i64>, <4 x i64>* %63, align … + %69 = add <4 x i64> %wide.load75.1, %57 ⟪╋⟫ %64 = add <4 x i64> %wide.load22.1, %52 + %70 = add <4 x i64> %wide.load76.1, %58 ⟪╋⟫ %65 = add <4 x i64> %wide.load23.1, %53 + %71 = add <4 x i64> %wide.load77.1, %59 ⟪╋⟫ %66 = add <4 x i64> %wide.load24.1, %54 + %72 = add <4 x i64> %wide.load78.1, %60 ⟪╋⟫ %67 = add <4 x i64> %wide.load25.1, %55 + %73 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %68 = getelementptr inbounds i64, i64* %arrayptr510, i6… + %74 = bitcast i64* %73 to <4 x i64>* ⟪╋⟫ %69 = bitcast i64* %68 to <4 x i64>* + store <4 x i64> %69, <4 x i64>* %74, align 8 ⟪╋⟫ store <4 x i64> %64, <4 x i64>* %69, align 8 + %75 = getelementptr inbounds i64, i64* %73, i64 4 ⟪╋⟫ %70 = getelementptr inbounds i64, i64* %68, i64 4 + %76 = bitcast i64* %75 to <4 x i64>* ⟪╋⟫ %71 = bitcast i64* %70 to <4 x i64>* + store <4 x i64> %70, <4 x i64>* %76, align 8 ⟪╋⟫ store <4 x i64> %65, <4 x i64>* %71, align 8 + %77 = getelementptr inbounds i64, i64* %73, i64 8 ⟪╋⟫ %72 = getelementptr inbounds i64, i64* %68, i64 8 + %78 = bitcast i64* %77 to <4 x i64>* ⟪╋⟫ %73 = bitcast i64* %72 to <4 x i64>* + store <4 x i64> %71, <4 x i64>* %78, align 8 ⟪╋⟫ store <4 x i64> %66, <4 x i64>* %73, align 8 + %79 = getelementptr inbounds i64, i64* %73, i64 12 ⟪╋⟫ %74 = getelementptr inbounds i64, i64* %68, i64 12 + %80 = bitcast i64* %79 to <4 x i64>* ⟪╋⟫ %75 = bitcast i64* %74 to <4 x i64>* + store <4 x i64> %72, <4 x i64>* %80, align 8 ⟪╋⟫ store <4 x i64> %67, <4 x i64>* %75, align 8 + %index.next.1 = add nuw i64 %index, 32 ┃ %index.next.1 = add nuw i64 %index, 32 + %niter.next.1 = add i64 %niter, 2 ┃ %niter.next.1 = add i64 %niter, 2 + %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter ┃ %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter + br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… ┃ br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… + ┃ +middle.block.unr-lcssa: ; preds… ┃ middle.block.unr-lcssa: ; preds… + %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… ┃ %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 ┃ %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %middle.block, label %vector… ┃ br i1 %lcmp.mod.not, label %middle.block, label %vector… + ┃ +vector.body.epil.preheader: ; preds… ┃ vector.body.epil.preheader: ; preds… + %81 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %76 = getelementptr inbounds i64, i64* %arrayptr8, i64 … + %82 = bitcast i64* %81 to <4 x i64>* ⟪╋⟫ %77 = bitcast i64* %76 to <4 x i64>* + %wide.load.epil = load <4 x i64>, <4 x i64>* %82, align…⟪╋⟫ %wide.load.epil = load <4 x i64>, <4 x i64>* %77, align… + %83 = getelementptr inbounds i64, i64* %81, i64 4 ⟪╋⟫ %78 = getelementptr inbounds i64, i64* %76, i64 4 + %84 = bitcast i64* %83 to <4 x i64>* ⟪╋⟫ %79 = bitcast i64* %78 to <4 x i64>* + %wide.load66.epil = load <4 x i64>, <4 x i64>* %84, ali…⟪╋⟫ %wide.load13.epil = load <4 x i64>, <4 x i64>* %79, ali… + %85 = getelementptr inbounds i64, i64* %81, i64 8 ⟪╋⟫ %80 = getelementptr inbounds i64, i64* %76, i64 8 + %86 = bitcast i64* %85 to <4 x i64>* ⟪╋⟫ %81 = bitcast i64* %80 to <4 x i64>* + %wide.load67.epil = load <4 x i64>, <4 x i64>* %86, ali…⟪╋⟫ %wide.load14.epil = load <4 x i64>, <4 x i64>* %81, ali… + %87 = getelementptr inbounds i64, i64* %81, i64 12 ⟪╋⟫ %82 = getelementptr inbounds i64, i64* %76, i64 12 + %88 = bitcast i64* %87 to <4 x i64>* ⟪╋⟫ %83 = bitcast i64* %82 to <4 x i64>* + %wide.load68.epil = load <4 x i64>, <4 x i64>* %88, ali…⟪╋⟫ %wide.load15.epil = load <4 x i64>, <4 x i64>* %83, ali… + %89 = mul <4 x i64> %wide.load.epil, %broadcast.splat ⟪╋⟫ %84 = mul <4 x i64> %wide.load.epil, %broadcast.splat + %90 = mul <4 x i64> %wide.load66.epil, %broadcast.splat ⟪╋⟫ %85 = mul <4 x i64> %wide.load13.epil, %broadcast.splat + %91 = mul <4 x i64> %wide.load67.epil, %broadcast.splat ⟪╋⟫ %86 = mul <4 x i64> %wide.load14.epil, %broadcast.splat + %92 = mul <4 x i64> %wide.load68.epil, %broadcast.splat ⟪╋⟫ %87 = mul <4 x i64> %wide.load15.epil, %broadcast.splat + %93 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %88 = getelementptr inbounds i64, i64* %arrayptr29, i64… + %94 = bitcast i64* %93 to <4 x i64>* ⟪╋⟫ %89 = bitcast i64* %88 to <4 x i64>* + %wide.load75.epil = load <4 x i64>, <4 x i64>* %94, ali…⟪╋⟫ %wide.load22.epil = load <4 x i64>, <4 x i64>* %89, ali… + %95 = getelementptr inbounds i64, i64* %93, i64 4 ⟪╋⟫ %90 = getelementptr inbounds i64, i64* %88, i64 4 + %96 = bitcast i64* %95 to <4 x i64>* ⟪╋⟫ %91 = bitcast i64* %90 to <4 x i64>* + %wide.load76.epil = load <4 x i64>, <4 x i64>* %96, ali…⟪╋⟫ %wide.load23.epil = load <4 x i64>, <4 x i64>* %91, ali… + %97 = getelementptr inbounds i64, i64* %93, i64 8 ⟪╋⟫ %92 = getelementptr inbounds i64, i64* %88, i64 8 + %98 = bitcast i64* %97 to <4 x i64>* ⟪╋⟫ %93 = bitcast i64* %92 to <4 x i64>* + %wide.load77.epil = load <4 x i64>, <4 x i64>* %98, ali…⟪╋⟫ %wide.load24.epil = load <4 x i64>, <4 x i64>* %93, ali… + %99 = getelementptr inbounds i64, i64* %93, i64 12 ⟪╋⟫ %94 = getelementptr inbounds i64, i64* %88, i64 12 + %100 = bitcast i64* %99 to <4 x i64>* ⟪╋⟫ %95 = bitcast i64* %94 to <4 x i64>* + %wide.load78.epil = load <4 x i64>, <4 x i64>* %100, al…⟪╋⟫ %wide.load25.epil = load <4 x i64>, <4 x i64>* %95, ali… + %101 = add <4 x i64> %wide.load75.epil, %89 ⟪╋⟫ %96 = add <4 x i64> %wide.load22.epil, %84 + %102 = add <4 x i64> %wide.load76.epil, %90 ⟪╋⟫ %97 = add <4 x i64> %wide.load23.epil, %85 + %103 = add <4 x i64> %wide.load77.epil, %91 ⟪╋⟫ %98 = add <4 x i64> %wide.load24.epil, %86 + %104 = add <4 x i64> %wide.load78.epil, %92 ⟪╋⟫ %99 = add <4 x i64> %wide.load25.epil, %87 + %105 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪╋⟫ %100 = getelementptr inbounds i64, i64* %arrayptr510, i… + %106 = bitcast i64* %105 to <4 x i64>* ⟪╋⟫ %101 = bitcast i64* %100 to <4 x i64>* + store <4 x i64> %101, <4 x i64>* %106, align 8 ⟪╋⟫ store <4 x i64> %96, <4 x i64>* %101, align 8 + %107 = getelementptr inbounds i64, i64* %105, i64 4 ⟪╋⟫ %102 = getelementptr inbounds i64, i64* %100, i64 4 + %108 = bitcast i64* %107 to <4 x i64>* ⟪╋⟫ %103 = bitcast i64* %102 to <4 x i64>* + store <4 x i64> %102, <4 x i64>* %108, align 8 ⟪╋⟫ store <4 x i64> %97, <4 x i64>* %103, align 8 + %109 = getelementptr inbounds i64, i64* %105, i64 8 ⟪╋⟫ %104 = getelementptr inbounds i64, i64* %100, i64 8 + %110 = bitcast i64* %109 to <4 x i64>* ⟪╋⟫ %105 = bitcast i64* %104 to <4 x i64>* + store <4 x i64> %103, <4 x i64>* %110, align 8 ⟪╋⟫ store <4 x i64> %98, <4 x i64>* %105, align 8 + %111 = getelementptr inbounds i64, i64* %105, i64 12 ⟪╋⟫ %106 = getelementptr inbounds i64, i64* %100, i64 12 + %112 = bitcast i64* %111 to <4 x i64>* ⟪╋⟫ %107 = bitcast i64* %106 to <4 x i64>* + store <4 x i64> %104, <4 x i64>* %112, align 8 ⟪╋⟫ store <4 x i64> %99, <4 x i64>* %107, align 8 + br label %middle.block ┃ br label %middle.block + ┃ +middle.block: ; preds… ┃ middle.block: ; preds… + %cmp.n = icmp eq i64 %exit.mainloop.at, %n.vec ⟪╋⟫ %cmp.n = icmp eq i64 %arraylen, %n.vec + br i1 %cmp.n, label %main.exit.selector, label %scalar.…⟪┫ + ⟪┫ +scalar.ph: ; preds…⟪┫ + %bc.resume.val = phi i64 [ %ind.end, %middle.block ], […⟪┫ + br label %idxend21 ⟪┫ + ⟪┫ +L31: ; preds…⟪┫ + ret void ⟪┫ + ⟪┫ +oob: ; preds…⟪┫ + %errorbox = alloca i64, align 8 ⟪┫ + store i64 %value_phi3.postloop, i64* %errorbox, align 8 ⟪┫ + call void @ijl_bounds_error_ints({}* %2, i64* nonnull %…⟪┫ + unreachable ⟪┫ + ⟪┫ +oob10: ; preds…⟪┫ + %errorbox11 = alloca i64, align 8 ⟪┫ + store i64 %value_phi3.postloop, i64* %errorbox11, align…⟪┫ + call void @ijl_bounds_error_ints({}* %3, i64* nonnull %…⟪┫ + unreachable ⟪┫ + ⟪┫ +oob19: ; preds…⟪┫ + %errorbox20 = alloca i64, align 8 ⟪┫ + store i64 %value_phi3.postloop, i64* %errorbox20, align…⟪┫ + call void @ijl_bounds_error_ints({}* %0, i64* nonnull %…⟪┫ + unreachable ⟪┫ + ⟪┫ +idxend21: ; preds…⟪┫ + %value_phi3 = phi i64 [ %119, %idxend21 ], [ %bc.resume…⟪┫ + %113 = add nsw i64 %value_phi3, -1 ⟪┫ + %114 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫ + %arrayref = load i64, i64* %114, align 8 ⟪┫ + %115 = mul i64 %arrayref, %1 ⟪┫ + %116 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫ + %arrayref15 = load i64, i64* %116, align 8 ⟪┫ + %117 = add i64 %arrayref15, %115 ⟪┫ + %118 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫ + store i64 %117, i64* %118, align 8 ⟪┫ + %119 = add nuw nsw i64 %value_phi3, 1 ⟪┫ + %.not51 = icmp ult i64 %value_phi3, %exit.mainloop.at ⟪┫ + br i1 %.not51, label %idxend21, label %main.exit.select…⟪┫ + ⟪┫ +main.exit.selector: ; preds…⟪┫ + %value_phi3.lcssa = phi i64 [ %exit.mainloop.at, %middl…⟪┫ + %.lcssa = phi i64 [ %ind.end, %middle.block ], [ %119, …⟪┫ + %120 = icmp ult i64 %value_phi3.lcssa, %arraylen ⟪┫ + br i1 %120, label %main.pseudo.exit, label %L31 ⟪┫ + ⟪┫ +main.pseudo.exit: ; preds…⟪┫ + %value_phi3.copy = phi i64 [ 1, %L13.preheader ], [ %.l…⟪┫ + br label %L13.postloop ⟪┫ + ⟪┫ +L13.postloop: ; preds…⟪┫ + %value_phi3.postloop = phi i64 [ %127, %idxend21.postlo…⟪┫ + %121 = add i64 %value_phi3.postloop, -1 ⟪┫ + %inbounds.postloop = icmp ult i64 %121, %arraylen6 ⟪┫ + br i1 %inbounds.postloop, label %idxend.postloop, label…⟪┫ + ┣⟫ br i1 %cmp.n, label %L32, label %scalar.ph + ┃ +idxend.postloop: ; preds…⟪┫ + %inbounds9.postloop = icmp ult i64 %121, %arraylen8 ⟪┫ + br i1 %inbounds9.postloop, label %idxend12.postloop, la…⟪┫ + ┣⟫scalar.ph: ; preds… + ┣⟫ %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0… + ┣⟫ br label %L12 + ┃ +idxend12.postloop: ; preds…⟪┫ + %inbounds18.postloop = icmp ult i64 %121, %arraylen ⟪┫ + br i1 %inbounds18.postloop, label %idxend21.postloop, l…⟪┫ + ┣⟫L12: ; preds… + ┣⟫ %value_phi12 = phi i64 [ %bc.resume.val, %scalar.ph ], … + ┣⟫ %108 = getelementptr inbounds i64, i64* %arrayptr8, i64… + ┣⟫ %arrayref = load i64, i64* %108, align 8 + ┣⟫ %109 = mul i64 %arrayref, %1 + ┣⟫ %110 = getelementptr inbounds i64, i64* %arrayptr29, i6… + ┣⟫ %arrayref3 = load i64, i64* %110, align 8 + ┣⟫ %111 = add i64 %arrayref3, %109 + ┣⟫ %112 = getelementptr inbounds i64, i64* %arrayptr510, i… + ┣⟫ store i64 %111, i64* %112, align 8 + ┣⟫ %113 = add nuw nsw i64 %value_phi12, 1 + ┣⟫ %exitcond.not = icmp eq i64 %113, %arraylen + ┣⟫ br i1 %exitcond.not, label %L32, label %L12 + ┃ +idxend21.postloop: ; preds…⟪┫ + %122 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫ + %arrayref.postloop = load i64, i64* %122, align 8 ⟪┫ + %123 = mul i64 %arrayref.postloop, %1 ⟪┫ + %124 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫ + %arrayref15.postloop = load i64, i64* %124, align 8 ⟪┫ + %125 = add i64 %arrayref15.postloop, %123 ⟪┫ + %126 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫ + store i64 %125, i64* %126, align 8 ⟪┫ + %.not.not32.postloop = icmp eq i64 %value_phi3.postloop…⟪┫ + %127 = add nuw nsw i64 %value_phi3.postloop, 1 ⟪┫ + br i1 %.not.not32.postloop, label %L31, label %L13.post…⟪┫ + ┣⟫L32: ; preds… + ┣⟫ ret void +} ┃ } + ┃ \ No newline at end of file diff --git a/test/references/saxpy_x86_COLOR.S b/test/references/saxpy_x86_COLOR.S new file mode 100644 index 0000000..2cd93b2 --- /dev/null +++ b/test/references/saxpy_x86_COLOR.S @@ -0,0 +1,379 @@ + .text  ┃  .text + .file "saxpy" ⟪╋⟫ .file "saxpy_simd" + .globl saxpy  # -- Begin function sa…⟪╋⟫ .globl saxpy_simd # -- Begin function sa… + .p2align 4, 0x90  ┃  .p2align 4, 0x90 + .type saxpy,@function ⟪╋⟫ .type saxpy_simd,@function +saxpy:  # @saxpy ⟪╋⟫saxpy_simd: # @saxpy_simd + .cfi_startproc  ┃  .cfi_startproc +# %bb.0: # %top  ┃ # %bb.0: # %top + push rbp  ┃  push rbp + .cfi_def_cfa_offset 16  ┃  .cfi_def_cfa_offset 16 + .cfi_offset rbp, -16  ┃  .cfi_offset rbp, -16 + mov rbp, rsp  ┃  mov rbp, rsp + .cfi_def_cfa_register rbp  ┃  .cfi_def_cfa_register rbp + push r15 ⟪┫  + push r14 ⟪┫  + push r13 ⟪┫  + push r12 ⟪┫  + push rsi  ┃  push rsi + push rdi  ┃  push rdi + push rbx ⟪┫  + sub rsp, 72 ⟪╋⟫ sub rsp, 32 + vmovdqa xmmword ptr [rbp - 80], xmm7 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 32], xmm7 # 16-byte … + vmovdqa xmmword ptr [rbp - 96], xmm6 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 48], xmm6 # 16-byte … + .cfi_offset rbx, -72 ⟪╋⟫ .cfi_offset rdi, -32 + .cfi_offset rdi, -64 ⟪╋⟫ .cfi_offset rsi, -24 + .cfi_offset rsi, -56 ⟪╋⟫ .cfi_offset xmm6, -64 + .cfi_offset r12, -48 ⟪╋⟫ .cfi_offset xmm7, -48 + .cfi_offset r13, -40 ⟪┫  + .cfi_offset r14, -32 ⟪┫  + .cfi_offset r15, -24 ⟪┫  + .cfi_offset xmm6, -112 ⟪┫  + .cfi_offset xmm7, -96 ⟪┫  + mov r13, r8 ⟪┫  + mov r8, qword ptr [rcx + 8] ⟪╋⟫ mov r10, qword ptr [rcx + 8] + test r8, r8 ⟪╋⟫ test r10, r10 + je .LBB0_15 ⟪╋⟫ je .LBB0_11 +# %bb.1: # %L13.preheader ⟪╋⟫# %bb.1: # %L12.lr.ph + mov r15, qword ptr [r13] ⟪╋⟫ mov r8, qword ptr [r8] + mov r11, qword ptr [r13 + 8] ⟪╋⟫ mov r9, qword ptr [r9] + mov r10, qword ptr [r9] ⟪╋⟫ mov r11, qword ptr [rcx] + mov r14, qword ptr [r9 + 8] ⟪┫  + mov qword ptr [rbp - 120], rcx # 8-byte Spill ⟪┫  + mov rbx, qword ptr [rcx] ⟪┫  + cmp r11, r8 ⟪╋⟫ cmp r10, 16 + mov rax, r8 ⟪┫  + cmovb rax, r11 ⟪┫  + mov rsi, r14 ⟪┫  + sar rsi, 63 ⟪┫  + and rsi, r14 ⟪┫  + mov rdi, r14 ⟪┫  + sub rdi, rsi ⟪┫  + test rsi, rsi ⟪┫  + mov r12, -1 ⟪┫  + cmovns r12, rsi ⟪┫  + inc r12 ⟪┫  + imul r12, rdi ⟪┫  + cmp rax, r12 ⟪┫  + cmovb r12, rax ⟪┫  + cmp r12, r8 ⟪┫  + cmovae r12, r8 ⟪┫  + mov edi, 1 ⟪┫  + test r12, r12 ⟪┫  + je .LBB0_2 ⟪╋⟫ jae .LBB0_3 +# %bb.3: # %idxend21.prehe…⟪┫  + cmp r12, 16 ⟪┫  + jae .LBB0_4 ⟪┫  +.LBB0_6: # %scalar.ph ⟪┫  + dec rdi ⟪┫  + mov rax, rdi ⟪┫  + .p2align 4, 0x90 ⟪┫  +.LBB0_7: # %idxend21 ⟪┫  + # =>This Inner Lo…⟪┫  + mov rcx, qword ptr [r15 + 8*rax] ⟪┫  + imul rcx, rdx ⟪┫  + add rcx, qword ptr [r10 + 8*rax] ⟪┫  + mov qword ptr [rbx + 8*rax], rcx ⟪┫  + inc rax ⟪┫  + cmp rax, r12 ⟪┫  + jb .LBB0_7 ⟪┫  +# %bb.8: # %main.exit.sele…⟪┫  + lea rdi, [rax + 1] ⟪┫  +.LBB0_9: # %main.exit.sele…⟪┫  + cmp rax, r8 ⟪┫  + mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫  + jae .LBB0_15 ⟪┫  +  ┣⟫# %bb.2: +  ┣⟫ xor eax, eax + jmp .LBB0_10  ┃  jmp .LBB0_10 +.LBB0_2: ⟪┫  + mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫  +.LBB0_10: # %main.pseudo.ex…⟪╋⟫.LBB0_3:  # %vector.ph + lea rax, [r8 + 1] ⟪┫  + .p2align 4, 0x90 ⟪┫  +.LBB0_11: # %L13.postloop ⟪┫  + # =>This Inner Lo…⟪┫  + lea rsi, [rdi - 1] ⟪┫  + cmp rsi, r11 ⟪┫  + jae .LBB0_25 ⟪┫  +# %bb.12: # %idxend.postloo…⟪┫  + # in Loop: Head…⟪┫  + cmp rsi, r14 ⟪┫  + jae .LBB0_26 ⟪┫  +# %bb.13: # %idxend12.postl…⟪┫  + # in Loop: Head…⟪┫  + cmp rsi, r8 ⟪┫  + jae .LBB0_27 ⟪┫  +# %bb.14: # %idxend21.postl…⟪┫  + # in Loop: Head…⟪┫  + mov rsi, qword ptr [r15 + 8*rdi - 8] ⟪┫  + imul rsi, rdx ⟪┫  + add rsi, qword ptr [r10 + 8*rdi - 8] ⟪┫  + mov qword ptr [rbx + 8*rdi - 8], rsi ⟪┫  + inc rdi ⟪┫  + cmp rax, rdi ⟪┫  + jne .LBB0_11 ⟪┫  +.LBB0_15: # %L31 ⟪┫  + vmovaps xmm6, xmmword ptr [rbp - 96] # 16-byte …⟪┫  + vmovaps xmm7, xmmword ptr [rbp - 80] # 16-byte …⟪┫  + lea rsp, [rbp - 56] ⟪┫  + pop rbx ⟪┫  + pop rdi ⟪┫  + pop rsi ⟪┫  + pop r12 ⟪┫  + pop r13 ⟪┫  + pop r14 ⟪┫  + pop r15 ⟪┫  + pop rbp ⟪┫  + vzeroupper ⟪┫  + ret ⟪┫  +.LBB0_4: # %vector.memchec…⟪┫  + mov qword ptr [rbp - 104], r13 # 8-byte Spill ⟪┫  + mov qword ptr [rbp - 112], r9 # 8-byte Spill ⟪┫  + lea rsi, [rbx + 8*r12] ⟪┫  + lea rax, [r15 + 8*r12] ⟪┫  + lea r13, [r10 + 8*r12] ⟪┫  + cmp rbx, rax ⟪┫  + setb r9b ⟪┫  + cmp r15, rsi ⟪┫  + setb cl ⟪┫  + cmp rbx, r13 ⟪┫  + setb al ⟪┫  + cmp r10, rsi ⟪┫  + setb sil ⟪┫  + test r9b, cl ⟪┫  + jne .LBB0_5 ⟪┫  +# %bb.16: # %vector.memchec…⟪┫  + and al, sil ⟪┫  + mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫  + mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫  + jne .LBB0_6 ⟪┫  +# %bb.17: # %vector.ph ⟪┫  + movabs rsi, 9223372036854775792 ⟪┫  + and rsi, r12 ⟪┫  +  ┣⟫ mov rax, r10 +  ┣⟫ and rax, -16 + vmovq xmm0, rdx  ┃  vmovq xmm0, rdx + vpbroadcastq ymm0, xmm0  ┃  vpbroadcastq ymm0, xmm0 + lea rcx, [rsi - 16] ⟪╋⟫ lea rcx, [rax - 16] + mov rax, rcx ⟪╋⟫ mov rsi, rcx + shr rax, 4 ⟪╋⟫ shr rsi, 4 + inc rax ⟪╋⟫ inc rsi + vpsrlq ymm1, ymm0, 32  ┃  vpsrlq ymm1, ymm0, 32 + test rcx, rcx  ┃  test rcx, rcx + je .LBB0_18 ⟪╋⟫ je .LBB0_4 +# %bb.19: # %vector.ph.new ⟪╋⟫# %bb.5:  # %vector.ph.new + mov r13, rax ⟪┫  + and r13, -2 ⟪╋⟫ and rdi, -2 + xor edi, edi ⟪┫  +  ┣⟫ mov rdi, rsi +  ┣⟫ xor ecx, ecx + .p2align 4, 0x90  ┃  .p2align 4, 0x90 +.LBB0_20: # %vector.body ⟪╋⟫.LBB0_6:  # %vector.body + # =>This Inner Lo… ┃  # =>This Inner Lo… + vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] + vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] + vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] + vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] + vpmuludq ymm6, ymm2, ymm1  ┃  vpmuludq ymm6, ymm2, ymm1 + vpsrlq ymm7, ymm2, 32  ┃  vpsrlq ymm7, ymm2, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm2, ymm2, ymm0  ┃  vpmuludq ymm2, ymm2, ymm0 + vpaddq ymm2, ymm2, ymm6  ┃  vpaddq ymm2, ymm2, ymm6 + vpmuludq ymm6, ymm3, ymm1  ┃  vpmuludq ymm6, ymm3, ymm1 + vpsrlq ymm7, ymm3, 32  ┃  vpsrlq ymm7, ymm3, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm3, ymm3, ymm0  ┃  vpmuludq ymm3, ymm3, ymm0 + vpaddq ymm3, ymm3, ymm6  ┃  vpaddq ymm3, ymm3, ymm6 + vpmuludq ymm6, ymm4, ymm1  ┃  vpmuludq ymm6, ymm4, ymm1 + vpsrlq ymm7, ymm4, 32  ┃  vpsrlq ymm7, ymm4, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm4, ymm4, ymm0  ┃  vpmuludq ymm4, ymm4, ymm0 + vpaddq ymm4, ymm4, ymm6  ┃  vpaddq ymm4, ymm4, ymm6 + vpmuludq ymm6, ymm5, ymm1  ┃  vpmuludq ymm6, ymm5, ymm1 + vpsrlq ymm7, ymm5, 32  ┃  vpsrlq ymm7, ymm5, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm5, ymm5, ymm0  ┃  vpmuludq ymm5, ymm5, ymm0 + vpaddq ymm5, ymm5, ymm6  ┃  vpaddq ymm5, ymm5, ymm6 + vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx] + vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 32] + vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 64] + vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 96] + vmovdqu ymmword ptr [rbx + 8*rdi], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm2 + vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm3 + vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm4 + vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm5 + vmovdqu ymm2, ymmword ptr [r15 + 8*rdi + 128] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx + 128] + vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 160] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 160] + vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 192] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 192] + vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 224] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 224] + vpmuludq ymm6, ymm2, ymm1  ┃  vpmuludq ymm6, ymm2, ymm1 + vpsrlq ymm7, ymm2, 32  ┃  vpsrlq ymm7, ymm2, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm2, ymm2, ymm0  ┃  vpmuludq ymm2, ymm2, ymm0 + vpaddq ymm2, ymm2, ymm6  ┃  vpaddq ymm2, ymm2, ymm6 + vpmuludq ymm6, ymm3, ymm1  ┃  vpmuludq ymm6, ymm3, ymm1 + vpsrlq ymm7, ymm3, 32  ┃  vpsrlq ymm7, ymm3, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm3, ymm3, ymm0  ┃  vpmuludq ymm3, ymm3, ymm0 + vpaddq ymm3, ymm3, ymm6  ┃  vpaddq ymm3, ymm3, ymm6 + vpmuludq ymm6, ymm4, ymm1  ┃  vpmuludq ymm6, ymm4, ymm1 + vpsrlq ymm7, ymm4, 32  ┃  vpsrlq ymm7, ymm4, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm4, ymm4, ymm0  ┃  vpmuludq ymm4, ymm4, ymm0 + vpaddq ymm4, ymm4, ymm6  ┃  vpaddq ymm4, ymm4, ymm6 + vpmuludq ymm6, ymm5, ymm1  ┃  vpmuludq ymm6, ymm5, ymm1 + vpsrlq ymm7, ymm5, 32  ┃  vpsrlq ymm7, ymm5, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm5, ymm5, ymm0  ┃  vpmuludq ymm5, ymm5, ymm0 + vpaddq ymm5, ymm5, ymm6  ┃  vpaddq ymm5, ymm5, ymm6 + vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi + 128] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx + 128] + vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 160] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 160] + vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 192] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 192] + vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 224] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 224] + vmovdqu ymmword ptr [rbx + 8*rdi + 128], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 128], ymm2 + vmovdqu ymmword ptr [rbx + 8*rdi + 160], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 160], ymm3 + vmovdqu ymmword ptr [rbx + 8*rdi + 192], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 192], ymm4 + vmovdqu ymmword ptr [rbx + 8*rdi + 224], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 224], ymm5 + add rdi, 32 ⟪╋⟫ add rcx, 32 + add r13, -2 ⟪╋⟫ add rdi, -2 + jne .LBB0_20 ⟪╋⟫ jne .LBB0_6 + jmp .LBB0_21 ⟪┫  +.LBB0_5: ⟪┫  + mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫  + mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫  + jmp .LBB0_6 ⟪┫  +.LBB0_18: ⟪┫  + xor edi, edi ⟪┫  +.LBB0_21: # %middle.block.u…⟪╋⟫# %bb.7:  # %middle.block.u… + test al, 1 ⟪╋⟫ test sil, 1 + mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫  + je .LBB0_23 ⟪╋⟫ je .LBB0_9 +# %bb.22: # %vector.body.ep…⟪╋⟫.LBB0_8:  # %vector.body.ep… + vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] + vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] + vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] + vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] + vpmuludq ymm6, ymm2, ymm1  ┃  vpmuludq ymm6, ymm2, ymm1 + vpsrlq ymm7, ymm2, 32  ┃  vpsrlq ymm7, ymm2, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm2, ymm2, ymm0  ┃  vpmuludq ymm2, ymm2, ymm0 + vpaddq ymm2, ymm2, ymm6  ┃  vpaddq ymm2, ymm2, ymm6 + vpmuludq ymm6, ymm3, ymm1  ┃  vpmuludq ymm6, ymm3, ymm1 + vpsrlq ymm7, ymm3, 32  ┃  vpsrlq ymm7, ymm3, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm3, ymm3, ymm0  ┃  vpmuludq ymm3, ymm3, ymm0 + vpaddq ymm3, ymm3, ymm6  ┃  vpaddq ymm3, ymm3, ymm6 + vpmuludq ymm6, ymm4, ymm1  ┃  vpmuludq ymm6, ymm4, ymm1 + vpsrlq ymm7, ymm4, 32  ┃  vpsrlq ymm7, ymm4, 32 + vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 + vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 + vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 + vpmuludq ymm4, ymm4, ymm0  ┃  vpmuludq ymm4, ymm4, ymm0 + vpaddq ymm4, ymm4, ymm6  ┃  vpaddq ymm4, ymm4, ymm6 + vpmuludq ymm1, ymm5, ymm1  ┃  vpmuludq ymm1, ymm5, ymm1 + vpsrlq ymm6, ymm5, 32  ┃  vpsrlq ymm6, ymm5, 32 + vpmuludq ymm6, ymm6, ymm0  ┃  vpmuludq ymm6, ymm6, ymm0 + vpaddq ymm1, ymm1, ymm6  ┃  vpaddq ymm1, ymm1, ymm6 + vpsllq ymm1, ymm1, 32  ┃  vpsllq ymm1, ymm1, 32 + vpmuludq ymm0, ymm5, ymm0  ┃  vpmuludq ymm0, ymm5, ymm0 + vpaddq ymm0, ymm0, ymm1  ┃  vpaddq ymm0, ymm0, ymm1 + vpaddq ymm1, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm1, ymm2, ymmword ptr [r9 + 8*rcx] + vpaddq ymm2, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm2, ymm3, ymmword ptr [r9 + 8*rcx + 32] + vpaddq ymm3, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm3, ymm4, ymmword ptr [r9 + 8*rcx + 64] + vpaddq ymm0, ymm0, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm0, ymm0, ymmword ptr [r9 + 8*rcx + 96] + vmovdqu ymmword ptr [rbx + 8*rdi], ymm1 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm1 + vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm2 + vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm3 + vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm0 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm0 +.LBB0_23: # %middle.block ⟪╋⟫.LBB0_9:  # %middle.block + lea rdi, [rsi + 1] ⟪┫  + cmp r12, rsi ⟪╋⟫ cmp r10, rax + jne .LBB0_6 ⟪╋⟫ je .LBB0_11 +# %bb.24: ⟪┫  + mov rax, r12 ⟪┫  + jmp .LBB0_9 ⟪┫  +.LBB0_25: # %oob ⟪╋⟫.LBB0_10: # %L12 + mov eax, 16 ⟪┫  + movabs r11, offset ___chkstk_ms ⟪┫  + call r11 ⟪┫  + sub rsp, rax ⟪┫  + mov rdx, rsp ⟪┫  + mov qword ptr [rdx], rdi ⟪┫  + sub rsp, 32 ⟪┫  + movabs rax, offset ijl_bounds_error_ints ⟪┫  + mov r8d, 1 ⟪┫  + mov rcx, r13 ⟪┫  + vzeroupper ⟪┫  + call rax ⟪┫  +.LBB0_26: # %oob10 ⟪╋⟫.LBB0_11: # %L32 + mov eax, 16 ⟪┫  + movabs r11, offset ___chkstk_ms ⟪┫  + call r11 ⟪┫  + sub rsp, rax ⟪┫  + mov rdx, rsp ⟪┫  + mov qword ptr [rdx], rdi ⟪┫  + sub rsp, 32 ⟪╋⟫ add rsp, 32 + movabs rax, offset ijl_bounds_error_ints ⟪┫  + mov r8d, 1 ⟪┫  + mov rcx, r9 ⟪┫  + vzeroupper ⟪┫  + call rax ⟪┫  +.LBB0_27: # %oob19 ⟪┫  + mov eax, 16 ⟪┫  + movabs r11, offset ___chkstk_ms ⟪┫  + call r11 ⟪┫  + sub rsp, rax ⟪┫  + mov rdx, rsp ⟪┫  + mov qword ptr [rdx], rdi ⟪┫  + sub rsp, 32 ⟪┫  + movabs rax, offset ijl_bounds_error_ints ⟪┫  + mov r8d, 1 ⟪┫  +  ┣⟫ .p2align 4, 0x90 +  ┣⟫ # =>This Inner Lo… +  ┣⟫ mov rcx, qword ptr [r8 + 8*rax] +  ┣⟫ imul rcx, rdx +  ┣⟫ add rcx, qword ptr [r9 + 8*rax] +  ┣⟫ mov qword ptr [r11 + 8*rax], rcx +  ┣⟫ inc rax +  ┣⟫ cmp r10, rax +  ┣⟫ jne .LBB0_10 +  ┣⟫ vmovaps xmm6, xmmword ptr [rbp - 48] # 16-byte … +  ┣⟫ vmovaps xmm7, xmmword ptr [rbp - 32] # 16-byte … +  ┣⟫ pop rdi +  ┣⟫ pop rsi +  ┣⟫ pop rbp + vzeroupper  ┃  vzeroupper + call rax ⟪┫  +  ┣⟫ ret +  ┣⟫.LBB0_4: +  ┣⟫ xor ecx, ecx +  ┣⟫ test sil, 1 +  ┣⟫ je .LBB0_9 +  ┣⟫ jmp .LBB0_8 +.Lfunc_end0:  ┃ .Lfunc_end0: + .size saxpy, .Lfunc_end0-saxpy ⟪╋⟫ .size saxpy_simd, .Lfunc_end0-saxpy_simd + .cfi_endproc  ┃  .cfi_endproc + # -- End function  ┃  # -- End function + .section ".note.GNU-stack","",@progbits  ┃  .section ".note.GNU-stack","",@progbits +  ┃  \ No newline at end of file diff --git a/test/references/saxpy_x86_COLOR.ll b/test/references/saxpy_x86_COLOR.ll new file mode 100644 index 0000000..7b7329a --- /dev/null +++ b/test/references/saxpy_x86_COLOR.ll @@ -0,0 +1,324 @@ +; Function Attrs: uwtable  ┃ ; Function Attrs: uwtable +define void @saxpy({}* noundef nonnull align 16 dereferen…⟪╋⟫define void @saxpy_simd({}* noundef nonnull align 16 dere… +top:  ┃ top: + %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }*  ┃  %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* + %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… ┃  %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… + %arraylen = load i64, i64* %arraylen_ptr, align 8  ┃  %arraylen = load i64, i64* %arraylen_ptr, align 8 + %.not.not = icmp eq i64 %arraylen, 0 ⟪╋⟫ %.not = icmp eq i64 %arraylen, 0 + br i1 %.not.not, label %L31, label %L13.preheader ⟪╋⟫ br i1 %.not, label %L32, label %L12.lr.ph + ⟪┫  +L13.preheader: ; preds…⟪┫  + %5 = bitcast {}* %2 to { i8*, i64, i16, i16, i32 }* ⟪┫  + %arraylen_ptr5 = getelementptr inbounds { i8*, i64, i16…⟪┫  + %arraylen6 = load i64, i64* %arraylen_ptr5, align 8 ⟪┫  + %6 = bitcast {}* %3 to { i8*, i64, i16, i16, i32 }* ⟪┫  + %arraylen_ptr7 = getelementptr inbounds { i8*, i64, i16…⟪┫  + %arraylen8 = load i64, i64* %arraylen_ptr7, align 8 ⟪┫  + %7 = bitcast {}* %2 to i64** ⟪┫  + %arrayptr29 = load i64*, i64** %7, align 8 ⟪┫  + %8 = bitcast {}* %3 to i64** ⟪┫  + %arrayptr1430 = load i64*, i64** %8, align 8 ⟪┫  + %9 = bitcast {}* %0 to i64** ⟪┫  + %arrayptr2331 = load i64*, i64** %9, align 8 ⟪┫  + %umin = call i64 @llvm.umin.i64(i64 %arraylen6, i64 %ar…⟪┫  + %smin = call i64 @llvm.smin.i64(i64 %arraylen8, i64 0) ⟪┫  + %10 = sub i64 %arraylen8, %smin ⟪┫  + %smax = call i64 @llvm.smax.i64(i64 %smin, i64 -1) ⟪┫  + %11 = add nsw i64 %smax, 1 ⟪┫  + %12 = mul nuw nsw i64 %10, %11 ⟪┫  + %umin36 = call i64 @llvm.umin.i64(i64 %umin, i64 %12) ⟪┫  + %exit.mainloop.at = call i64 @llvm.umin.i64(i64 %umin36…⟪┫  + %.not = icmp eq i64 %exit.mainloop.at, 0 ⟪┫  + br i1 %.not, label %main.pseudo.exit, label %idxend21.p…⟪┫  + ⟪┫  +idxend21.preheader: ; preds…⟪┫  + %min.iters.check = icmp ult i64 %exit.mainloop.at, 16 ⟪┫  + br i1 %min.iters.check, label %scalar.ph, label %vector…⟪┫  +  ┃  +vector.memcheck: ; preds…⟪┫  + %scevgep = getelementptr i64, i64* %arrayptr2331, i64 %…⟪┫  + %scevgep58 = getelementptr i64, i64* %arrayptr29, i64 %…⟪┫  + %scevgep61 = getelementptr i64, i64* %arrayptr1430, i64…⟪┫  + %bound0 = icmp ult i64* %arrayptr2331, %scevgep58 ⟪┫  + %bound1 = icmp ult i64* %arrayptr29, %scevgep ⟪┫  + %found.conflict = and i1 %bound0, %bound1 ⟪┫  + %bound063 = icmp ult i64* %arrayptr2331, %scevgep61 ⟪┫  + %bound164 = icmp ult i64* %arrayptr1430, %scevgep ⟪┫  + %found.conflict65 = and i1 %bound063, %bound164 ⟪┫  + %conflict.rdx = or i1 %found.conflict, %found.conflict6…⟪┫  + br i1 %conflict.rdx, label %scalar.ph, label %vector.ph ⟪╋⟫ br i1 %min.iters.check, label %scalar.ph, label %vector… +  ┣⟫L12.lr.ph: ; preds… +  ┣⟫ %5 = bitcast {}* %2 to i64** +  ┣⟫ %arrayptr8 = load i64*, i64** %5, align 8 +  ┣⟫ %6 = bitcast {}* %3 to i64** +  ┣⟫ %arrayptr29 = load i64*, i64** %6, align 8 +  ┣⟫ %7 = bitcast {}* %0 to i64** +  ┣⟫ %arrayptr510 = load i64*, i64** %7, align 8 +  ┣⟫ %min.iters.check = icmp ult i64 %arraylen, 16 +  ┃  +vector.ph: ; preds…⟪╋⟫vector.ph: ; preds… + %n.vec = and i64 %exit.mainloop.at, 9223372036854775792 ⟪╋⟫ %n.vec = and i64 %arraylen, 9223372036854775792 + %ind.end = or i64 %n.vec, 1 ⟪┫  + %broadcast.splatinsert = insertelement <4 x i64> poison… ┃  %broadcast.splatinsert = insertelement <4 x i64> poison… + %broadcast.splat = shufflevector <4 x i64> %broadcast.s… ┃  %broadcast.splat = shufflevector <4 x i64> %broadcast.s… + %13 = add nsw i64 %n.vec, -16 ⟪╋⟫ %8 = add nsw i64 %n.vec, -16 + %14 = lshr exact i64 %13, 4 ⟪╋⟫ %9 = lshr exact i64 %8, 4 + %15 = add nuw nsw i64 %14, 1 ⟪╋⟫ %10 = add nuw nsw i64 %9, 1 + %xtraiter = and i64 %15, 1 ⟪╋⟫ %xtraiter = and i64 %10, 1 + %16 = icmp eq i64 %13, 0 ⟪╋⟫ %11 = icmp eq i64 %8, 0 + br i1 %16, label %middle.block.unr-lcssa, label %vector…⟪╋⟫ br i1 %11, label %middle.block.unr-lcssa, label %vector… +  ┃  +vector.ph.new: ; preds… ┃ vector.ph.new: ; preds… + %unroll_iter = and i64 %15, 2305843009213693950 ⟪╋⟫ %unroll_iter = and i64 %10, 2305843009213693950 + br label %vector.body  ┃  br label %vector.body +  ┃  +vector.body: ; preds… ┃ vector.body: ; preds… + %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… ┃  %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… + %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… ┃  %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… + %17 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %12 = getelementptr inbounds i64, i64* %arrayptr8, i64 … + %18 = bitcast i64* %17 to <4 x i64>* ⟪╋⟫ %13 = bitcast i64* %12 to <4 x i64>* + %wide.load = load <4 x i64>, <4 x i64>* %18, align 8 ⟪╋⟫ %wide.load = load <4 x i64>, <4 x i64>* %13, align 8 + %19 = getelementptr inbounds i64, i64* %17, i64 4 ⟪╋⟫ %14 = getelementptr inbounds i64, i64* %12, i64 4 + %20 = bitcast i64* %19 to <4 x i64>* ⟪╋⟫ %15 = bitcast i64* %14 to <4 x i64>* + %wide.load66 = load <4 x i64>, <4 x i64>* %20, align 8 ⟪╋⟫ %wide.load13 = load <4 x i64>, <4 x i64>* %15, align 8 + %21 = getelementptr inbounds i64, i64* %17, i64 8 ⟪╋⟫ %16 = getelementptr inbounds i64, i64* %12, i64 8 + %22 = bitcast i64* %21 to <4 x i64>* ⟪╋⟫ %17 = bitcast i64* %16 to <4 x i64>* + %wide.load67 = load <4 x i64>, <4 x i64>* %22, align 8 ⟪╋⟫ %wide.load14 = load <4 x i64>, <4 x i64>* %17, align 8 + %23 = getelementptr inbounds i64, i64* %17, i64 12 ⟪╋⟫ %18 = getelementptr inbounds i64, i64* %12, i64 12 + %24 = bitcast i64* %23 to <4 x i64>* ⟪╋⟫ %19 = bitcast i64* %18 to <4 x i64>* + %wide.load68 = load <4 x i64>, <4 x i64>* %24, align 8 ⟪╋⟫ %wide.load15 = load <4 x i64>, <4 x i64>* %19, align 8 + %25 = mul <4 x i64> %wide.load, %broadcast.splat ⟪╋⟫ %20 = mul <4 x i64> %wide.load, %broadcast.splat + %26 = mul <4 x i64> %wide.load66, %broadcast.splat ⟪╋⟫ %21 = mul <4 x i64> %wide.load13, %broadcast.splat + %27 = mul <4 x i64> %wide.load67, %broadcast.splat ⟪╋⟫ %22 = mul <4 x i64> %wide.load14, %broadcast.splat + %28 = mul <4 x i64> %wide.load68, %broadcast.splat ⟪╋⟫ %23 = mul <4 x i64> %wide.load15, %broadcast.splat + %29 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %24 = getelementptr inbounds i64, i64* %arrayptr29, i64… + %30 = bitcast i64* %29 to <4 x i64>* ⟪╋⟫ %25 = bitcast i64* %24 to <4 x i64>* + %wide.load75 = load <4 x i64>, <4 x i64>* %30, align 8 ⟪╋⟫ %wide.load22 = load <4 x i64>, <4 x i64>* %25, align 8 + %31 = getelementptr inbounds i64, i64* %29, i64 4 ⟪╋⟫ %26 = getelementptr inbounds i64, i64* %24, i64 4 + %32 = bitcast i64* %31 to <4 x i64>* ⟪╋⟫ %27 = bitcast i64* %26 to <4 x i64>* + %wide.load76 = load <4 x i64>, <4 x i64>* %32, align 8 ⟪╋⟫ %wide.load23 = load <4 x i64>, <4 x i64>* %27, align 8 + %33 = getelementptr inbounds i64, i64* %29, i64 8 ⟪╋⟫ %28 = getelementptr inbounds i64, i64* %24, i64 8 + %34 = bitcast i64* %33 to <4 x i64>* ⟪╋⟫ %29 = bitcast i64* %28 to <4 x i64>* + %wide.load77 = load <4 x i64>, <4 x i64>* %34, align 8 ⟪╋⟫ %wide.load24 = load <4 x i64>, <4 x i64>* %29, align 8 + %35 = getelementptr inbounds i64, i64* %29, i64 12 ⟪╋⟫ %30 = getelementptr inbounds i64, i64* %24, i64 12 + %36 = bitcast i64* %35 to <4 x i64>* ⟪╋⟫ %31 = bitcast i64* %30 to <4 x i64>* + %wide.load78 = load <4 x i64>, <4 x i64>* %36, align 8 ⟪╋⟫ %wide.load25 = load <4 x i64>, <4 x i64>* %31, align 8 + %37 = add <4 x i64> %wide.load75, %25 ⟪╋⟫ %32 = add <4 x i64> %wide.load22, %20 + %38 = add <4 x i64> %wide.load76, %26 ⟪╋⟫ %33 = add <4 x i64> %wide.load23, %21 + %39 = add <4 x i64> %wide.load77, %27 ⟪╋⟫ %34 = add <4 x i64> %wide.load24, %22 + %40 = add <4 x i64> %wide.load78, %28 ⟪╋⟫ %35 = add <4 x i64> %wide.load25, %23 + %41 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %36 = getelementptr inbounds i64, i64* %arrayptr510, i6… + %42 = bitcast i64* %41 to <4 x i64>* ⟪╋⟫ %37 = bitcast i64* %36 to <4 x i64>* + store <4 x i64> %37, <4 x i64>* %42, align 8 ⟪╋⟫ store <4 x i64> %32, <4 x i64>* %37, align 8 + %43 = getelementptr inbounds i64, i64* %41, i64 4 ⟪╋⟫ %38 = getelementptr inbounds i64, i64* %36, i64 4 + %44 = bitcast i64* %43 to <4 x i64>* ⟪╋⟫ %39 = bitcast i64* %38 to <4 x i64>* + store <4 x i64> %38, <4 x i64>* %44, align 8 ⟪╋⟫ store <4 x i64> %33, <4 x i64>* %39, align 8 + %45 = getelementptr inbounds i64, i64* %41, i64 8 ⟪╋⟫ %40 = getelementptr inbounds i64, i64* %36, i64 8 + %46 = bitcast i64* %45 to <4 x i64>* ⟪╋⟫ %41 = bitcast i64* %40 to <4 x i64>* + store <4 x i64> %39, <4 x i64>* %46, align 8 ⟪╋⟫ store <4 x i64> %34, <4 x i64>* %41, align 8 + %47 = getelementptr inbounds i64, i64* %41, i64 12 ⟪╋⟫ %42 = getelementptr inbounds i64, i64* %36, i64 12 + %48 = bitcast i64* %47 to <4 x i64>* ⟪╋⟫ %43 = bitcast i64* %42 to <4 x i64>* + store <4 x i64> %40, <4 x i64>* %48, align 8 ⟪╋⟫ store <4 x i64> %35, <4 x i64>* %43, align 8 + %index.next = or i64 %index, 16  ┃  %index.next = or i64 %index, 16 + %49 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %44 = getelementptr inbounds i64, i64* %arrayptr8, i64 … + %50 = bitcast i64* %49 to <4 x i64>* ⟪╋⟫ %45 = bitcast i64* %44 to <4 x i64>* + %wide.load.1 = load <4 x i64>, <4 x i64>* %50, align 8 ⟪╋⟫ %wide.load.1 = load <4 x i64>, <4 x i64>* %45, align 8 + %51 = getelementptr inbounds i64, i64* %49, i64 4 ⟪╋⟫ %46 = getelementptr inbounds i64, i64* %44, i64 4 + %52 = bitcast i64* %51 to <4 x i64>* ⟪╋⟫ %47 = bitcast i64* %46 to <4 x i64>* + %wide.load66.1 = load <4 x i64>, <4 x i64>* %52, align …⟪╋⟫ %wide.load13.1 = load <4 x i64>, <4 x i64>* %47, align … + %53 = getelementptr inbounds i64, i64* %49, i64 8 ⟪╋⟫ %48 = getelementptr inbounds i64, i64* %44, i64 8 + %54 = bitcast i64* %53 to <4 x i64>* ⟪╋⟫ %49 = bitcast i64* %48 to <4 x i64>* + %wide.load67.1 = load <4 x i64>, <4 x i64>* %54, align …⟪╋⟫ %wide.load14.1 = load <4 x i64>, <4 x i64>* %49, align … + %55 = getelementptr inbounds i64, i64* %49, i64 12 ⟪╋⟫ %50 = getelementptr inbounds i64, i64* %44, i64 12 + %56 = bitcast i64* %55 to <4 x i64>* ⟪╋⟫ %51 = bitcast i64* %50 to <4 x i64>* + %wide.load68.1 = load <4 x i64>, <4 x i64>* %56, align …⟪╋⟫ %wide.load15.1 = load <4 x i64>, <4 x i64>* %51, align … + %57 = mul <4 x i64> %wide.load.1, %broadcast.splat ⟪╋⟫ %52 = mul <4 x i64> %wide.load.1, %broadcast.splat + %58 = mul <4 x i64> %wide.load66.1, %broadcast.splat ⟪╋⟫ %53 = mul <4 x i64> %wide.load13.1, %broadcast.splat + %59 = mul <4 x i64> %wide.load67.1, %broadcast.splat ⟪╋⟫ %54 = mul <4 x i64> %wide.load14.1, %broadcast.splat + %60 = mul <4 x i64> %wide.load68.1, %broadcast.splat ⟪╋⟫ %55 = mul <4 x i64> %wide.load15.1, %broadcast.splat + %61 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %56 = getelementptr inbounds i64, i64* %arrayptr29, i64… + %62 = bitcast i64* %61 to <4 x i64>* ⟪╋⟫ %57 = bitcast i64* %56 to <4 x i64>* + %wide.load75.1 = load <4 x i64>, <4 x i64>* %62, align …⟪╋⟫ %wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align … + %63 = getelementptr inbounds i64, i64* %61, i64 4 ⟪╋⟫ %58 = getelementptr inbounds i64, i64* %56, i64 4 + %64 = bitcast i64* %63 to <4 x i64>* ⟪╋⟫ %59 = bitcast i64* %58 to <4 x i64>* + %wide.load76.1 = load <4 x i64>, <4 x i64>* %64, align …⟪╋⟫ %wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align … + %65 = getelementptr inbounds i64, i64* %61, i64 8 ⟪╋⟫ %60 = getelementptr inbounds i64, i64* %56, i64 8 + %66 = bitcast i64* %65 to <4 x i64>* ⟪╋⟫ %61 = bitcast i64* %60 to <4 x i64>* + %wide.load77.1 = load <4 x i64>, <4 x i64>* %66, align …⟪╋⟫ %wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align … + %67 = getelementptr inbounds i64, i64* %61, i64 12 ⟪╋⟫ %62 = getelementptr inbounds i64, i64* %56, i64 12 + %68 = bitcast i64* %67 to <4 x i64>* ⟪╋⟫ %63 = bitcast i64* %62 to <4 x i64>* + %wide.load78.1 = load <4 x i64>, <4 x i64>* %68, align …⟪╋⟫ %wide.load25.1 = load <4 x i64>, <4 x i64>* %63, align … + %69 = add <4 x i64> %wide.load75.1, %57 ⟪╋⟫ %64 = add <4 x i64> %wide.load22.1, %52 + %70 = add <4 x i64> %wide.load76.1, %58 ⟪╋⟫ %65 = add <4 x i64> %wide.load23.1, %53 + %71 = add <4 x i64> %wide.load77.1, %59 ⟪╋⟫ %66 = add <4 x i64> %wide.load24.1, %54 + %72 = add <4 x i64> %wide.load78.1, %60 ⟪╋⟫ %67 = add <4 x i64> %wide.load25.1, %55 + %73 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %68 = getelementptr inbounds i64, i64* %arrayptr510, i6… + %74 = bitcast i64* %73 to <4 x i64>* ⟪╋⟫ %69 = bitcast i64* %68 to <4 x i64>* + store <4 x i64> %69, <4 x i64>* %74, align 8 ⟪╋⟫ store <4 x i64> %64, <4 x i64>* %69, align 8 + %75 = getelementptr inbounds i64, i64* %73, i64 4 ⟪╋⟫ %70 = getelementptr inbounds i64, i64* %68, i64 4 + %76 = bitcast i64* %75 to <4 x i64>* ⟪╋⟫ %71 = bitcast i64* %70 to <4 x i64>* + store <4 x i64> %70, <4 x i64>* %76, align 8 ⟪╋⟫ store <4 x i64> %65, <4 x i64>* %71, align 8 + %77 = getelementptr inbounds i64, i64* %73, i64 8 ⟪╋⟫ %72 = getelementptr inbounds i64, i64* %68, i64 8 + %78 = bitcast i64* %77 to <4 x i64>* ⟪╋⟫ %73 = bitcast i64* %72 to <4 x i64>* + store <4 x i64> %71, <4 x i64>* %78, align 8 ⟪╋⟫ store <4 x i64> %66, <4 x i64>* %73, align 8 + %79 = getelementptr inbounds i64, i64* %73, i64 12 ⟪╋⟫ %74 = getelementptr inbounds i64, i64* %68, i64 12 + %80 = bitcast i64* %79 to <4 x i64>* ⟪╋⟫ %75 = bitcast i64* %74 to <4 x i64>* + store <4 x i64> %72, <4 x i64>* %80, align 8 ⟪╋⟫ store <4 x i64> %67, <4 x i64>* %75, align 8 + %index.next.1 = add nuw i64 %index, 32  ┃  %index.next.1 = add nuw i64 %index, 32 + %niter.next.1 = add i64 %niter, 2  ┃  %niter.next.1 = add i64 %niter, 2 + %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter  ┃  %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter + br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… ┃  br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… +  ┃  +middle.block.unr-lcssa: ; preds… ┃ middle.block.unr-lcssa: ; preds… + %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… ┃  %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… + %lcmp.mod.not = icmp eq i64 %xtraiter, 0  ┃  %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %middle.block, label %vector… ┃  br i1 %lcmp.mod.not, label %middle.block, label %vector… +  ┃  +vector.body.epil.preheader: ; preds… ┃ vector.body.epil.preheader: ; preds… + %81 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %76 = getelementptr inbounds i64, i64* %arrayptr8, i64 … + %82 = bitcast i64* %81 to <4 x i64>* ⟪╋⟫ %77 = bitcast i64* %76 to <4 x i64>* + %wide.load.epil = load <4 x i64>, <4 x i64>* %82, align…⟪╋⟫ %wide.load.epil = load <4 x i64>, <4 x i64>* %77, align… + %83 = getelementptr inbounds i64, i64* %81, i64 4 ⟪╋⟫ %78 = getelementptr inbounds i64, i64* %76, i64 4 + %84 = bitcast i64* %83 to <4 x i64>* ⟪╋⟫ %79 = bitcast i64* %78 to <4 x i64>* + %wide.load66.epil = load <4 x i64>, <4 x i64>* %84, ali…⟪╋⟫ %wide.load13.epil = load <4 x i64>, <4 x i64>* %79, ali… + %85 = getelementptr inbounds i64, i64* %81, i64 8 ⟪╋⟫ %80 = getelementptr inbounds i64, i64* %76, i64 8 + %86 = bitcast i64* %85 to <4 x i64>* ⟪╋⟫ %81 = bitcast i64* %80 to <4 x i64>* + %wide.load67.epil = load <4 x i64>, <4 x i64>* %86, ali…⟪╋⟫ %wide.load14.epil = load <4 x i64>, <4 x i64>* %81, ali… + %87 = getelementptr inbounds i64, i64* %81, i64 12 ⟪╋⟫ %82 = getelementptr inbounds i64, i64* %76, i64 12 + %88 = bitcast i64* %87 to <4 x i64>* ⟪╋⟫ %83 = bitcast i64* %82 to <4 x i64>* + %wide.load68.epil = load <4 x i64>, <4 x i64>* %88, ali…⟪╋⟫ %wide.load15.epil = load <4 x i64>, <4 x i64>* %83, ali… + %89 = mul <4 x i64> %wide.load.epil, %broadcast.splat ⟪╋⟫ %84 = mul <4 x i64> %wide.load.epil, %broadcast.splat + %90 = mul <4 x i64> %wide.load66.epil, %broadcast.splat ⟪╋⟫ %85 = mul <4 x i64> %wide.load13.epil, %broadcast.splat + %91 = mul <4 x i64> %wide.load67.epil, %broadcast.splat ⟪╋⟫ %86 = mul <4 x i64> %wide.load14.epil, %broadcast.splat + %92 = mul <4 x i64> %wide.load68.epil, %broadcast.splat ⟪╋⟫ %87 = mul <4 x i64> %wide.load15.epil, %broadcast.splat + %93 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %88 = getelementptr inbounds i64, i64* %arrayptr29, i64… + %94 = bitcast i64* %93 to <4 x i64>* ⟪╋⟫ %89 = bitcast i64* %88 to <4 x i64>* + %wide.load75.epil = load <4 x i64>, <4 x i64>* %94, ali…⟪╋⟫ %wide.load22.epil = load <4 x i64>, <4 x i64>* %89, ali… + %95 = getelementptr inbounds i64, i64* %93, i64 4 ⟪╋⟫ %90 = getelementptr inbounds i64, i64* %88, i64 4 + %96 = bitcast i64* %95 to <4 x i64>* ⟪╋⟫ %91 = bitcast i64* %90 to <4 x i64>* + %wide.load76.epil = load <4 x i64>, <4 x i64>* %96, ali…⟪╋⟫ %wide.load23.epil = load <4 x i64>, <4 x i64>* %91, ali… + %97 = getelementptr inbounds i64, i64* %93, i64 8 ⟪╋⟫ %92 = getelementptr inbounds i64, i64* %88, i64 8 + %98 = bitcast i64* %97 to <4 x i64>* ⟪╋⟫ %93 = bitcast i64* %92 to <4 x i64>* + %wide.load77.epil = load <4 x i64>, <4 x i64>* %98, ali…⟪╋⟫ %wide.load24.epil = load <4 x i64>, <4 x i64>* %93, ali… + %99 = getelementptr inbounds i64, i64* %93, i64 12 ⟪╋⟫ %94 = getelementptr inbounds i64, i64* %88, i64 12 + %100 = bitcast i64* %99 to <4 x i64>* ⟪╋⟫ %95 = bitcast i64* %94 to <4 x i64>* + %wide.load78.epil = load <4 x i64>, <4 x i64>* %100, al…⟪╋⟫ %wide.load25.epil = load <4 x i64>, <4 x i64>* %95, ali… + %101 = add <4 x i64> %wide.load75.epil, %89 ⟪╋⟫ %96 = add <4 x i64> %wide.load22.epil, %84 + %102 = add <4 x i64> %wide.load76.epil, %90 ⟪╋⟫ %97 = add <4 x i64> %wide.load23.epil, %85 + %103 = add <4 x i64> %wide.load77.epil, %91 ⟪╋⟫ %98 = add <4 x i64> %wide.load24.epil, %86 + %104 = add <4 x i64> %wide.load78.epil, %92 ⟪╋⟫ %99 = add <4 x i64> %wide.load25.epil, %87 + %105 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪╋⟫ %100 = getelementptr inbounds i64, i64* %arrayptr510, i… + %106 = bitcast i64* %105 to <4 x i64>* ⟪╋⟫ %101 = bitcast i64* %100 to <4 x i64>* + store <4 x i64> %101, <4 x i64>* %106, align 8 ⟪╋⟫ store <4 x i64> %96, <4 x i64>* %101, align 8 + %107 = getelementptr inbounds i64, i64* %105, i64 4 ⟪╋⟫ %102 = getelementptr inbounds i64, i64* %100, i64 4 + %108 = bitcast i64* %107 to <4 x i64>* ⟪╋⟫ %103 = bitcast i64* %102 to <4 x i64>* + store <4 x i64> %102, <4 x i64>* %108, align 8 ⟪╋⟫ store <4 x i64> %97, <4 x i64>* %103, align 8 + %109 = getelementptr inbounds i64, i64* %105, i64 8 ⟪╋⟫ %104 = getelementptr inbounds i64, i64* %100, i64 8 + %110 = bitcast i64* %109 to <4 x i64>* ⟪╋⟫ %105 = bitcast i64* %104 to <4 x i64>* + store <4 x i64> %103, <4 x i64>* %110, align 8 ⟪╋⟫ store <4 x i64> %98, <4 x i64>* %105, align 8 + %111 = getelementptr inbounds i64, i64* %105, i64 12 ⟪╋⟫ %106 = getelementptr inbounds i64, i64* %100, i64 12 + %112 = bitcast i64* %111 to <4 x i64>* ⟪╋⟫ %107 = bitcast i64* %106 to <4 x i64>* + store <4 x i64> %104, <4 x i64>* %112, align 8 ⟪╋⟫ store <4 x i64> %99, <4 x i64>* %107, align 8 + br label %middle.block  ┃  br label %middle.block +  ┃  +middle.block: ; preds… ┃ middle.block: ; preds… + %cmp.n = icmp eq i64 %exit.mainloop.at, %n.vec ⟪╋⟫ %cmp.n = icmp eq i64 %arraylen, %n.vec + br i1 %cmp.n, label %main.exit.selector, label %scalar.…⟪┫  + ⟪┫  +scalar.ph: ; preds…⟪┫  + %bc.resume.val = phi i64 [ %ind.end, %middle.block ], […⟪┫  + br label %idxend21 ⟪┫  + ⟪┫  +L31: ; preds…⟪┫  + ret void ⟪┫  + ⟪┫  +oob: ; preds…⟪┫  + %errorbox = alloca i64, align 8 ⟪┫  + store i64 %value_phi3.postloop, i64* %errorbox, align 8 ⟪┫  + call void @ijl_bounds_error_ints({}* %2, i64* nonnull %…⟪┫  + unreachable ⟪┫  + ⟪┫  +oob10: ; preds…⟪┫  + %errorbox11 = alloca i64, align 8 ⟪┫  + store i64 %value_phi3.postloop, i64* %errorbox11, align…⟪┫  + call void @ijl_bounds_error_ints({}* %3, i64* nonnull %…⟪┫  + unreachable ⟪┫  + ⟪┫  +oob19: ; preds…⟪┫  + %errorbox20 = alloca i64, align 8 ⟪┫  + store i64 %value_phi3.postloop, i64* %errorbox20, align…⟪┫  + call void @ijl_bounds_error_ints({}* %0, i64* nonnull %…⟪┫  + unreachable ⟪┫  + ⟪┫  +idxend21: ; preds…⟪┫  + %value_phi3 = phi i64 [ %119, %idxend21 ], [ %bc.resume…⟪┫  + %113 = add nsw i64 %value_phi3, -1 ⟪┫  + %114 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫  + %arrayref = load i64, i64* %114, align 8 ⟪┫  + %115 = mul i64 %arrayref, %1 ⟪┫  + %116 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫  + %arrayref15 = load i64, i64* %116, align 8 ⟪┫  + %117 = add i64 %arrayref15, %115 ⟪┫  + %118 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫  + store i64 %117, i64* %118, align 8 ⟪┫  + %119 = add nuw nsw i64 %value_phi3, 1 ⟪┫  + %.not51 = icmp ult i64 %value_phi3, %exit.mainloop.at ⟪┫  + br i1 %.not51, label %idxend21, label %main.exit.select…⟪┫  + ⟪┫  +main.exit.selector: ; preds…⟪┫  + %value_phi3.lcssa = phi i64 [ %exit.mainloop.at, %middl…⟪┫  + %.lcssa = phi i64 [ %ind.end, %middle.block ], [ %119, …⟪┫  + %120 = icmp ult i64 %value_phi3.lcssa, %arraylen ⟪┫  + br i1 %120, label %main.pseudo.exit, label %L31 ⟪┫  + ⟪┫  +main.pseudo.exit: ; preds…⟪┫  + %value_phi3.copy = phi i64 [ 1, %L13.preheader ], [ %.l…⟪┫  + br label %L13.postloop ⟪┫  + ⟪┫  +L13.postloop: ; preds…⟪┫  + %value_phi3.postloop = phi i64 [ %127, %idxend21.postlo…⟪┫  + %121 = add i64 %value_phi3.postloop, -1 ⟪┫  + %inbounds.postloop = icmp ult i64 %121, %arraylen6 ⟪┫  + br i1 %inbounds.postloop, label %idxend.postloop, label…⟪┫  +  ┣⟫ br i1 %cmp.n, label %L32, label %scalar.ph +  ┃  +idxend.postloop: ; preds…⟪┫  + %inbounds9.postloop = icmp ult i64 %121, %arraylen8 ⟪┫  + br i1 %inbounds9.postloop, label %idxend12.postloop, la…⟪┫  +  ┣⟫scalar.ph: ; preds… +  ┣⟫ %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0… +  ┣⟫ br label %L12 +  ┃  +idxend12.postloop: ; preds…⟪┫  + %inbounds18.postloop = icmp ult i64 %121, %arraylen ⟪┫  + br i1 %inbounds18.postloop, label %idxend21.postloop, l…⟪┫  +  ┣⟫L12: ; preds… +  ┣⟫ %value_phi12 = phi i64 [ %bc.resume.val, %scalar.ph ], … +  ┣⟫ %108 = getelementptr inbounds i64, i64* %arrayptr8, i64… +  ┣⟫ %arrayref = load i64, i64* %108, align 8 +  ┣⟫ %109 = mul i64 %arrayref, %1 +  ┣⟫ %110 = getelementptr inbounds i64, i64* %arrayptr29, i6… +  ┣⟫ %arrayref3 = load i64, i64* %110, align 8 +  ┣⟫ %111 = add i64 %arrayref3, %109 +  ┣⟫ %112 = getelementptr inbounds i64, i64* %arrayptr510, i… +  ┣⟫ store i64 %111, i64* %112, align 8 +  ┣⟫ %113 = add nuw nsw i64 %value_phi12, 1 +  ┣⟫ %exitcond.not = icmp eq i64 %113, %arraylen +  ┣⟫ br i1 %exitcond.not, label %L32, label %L12 +  ┃  +idxend21.postloop: ; preds…⟪┫  + %122 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫  + %arrayref.postloop = load i64, i64* %122, align 8 ⟪┫  + %123 = mul i64 %arrayref.postloop, %1 ⟪┫  + %124 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫  + %arrayref15.postloop = load i64, i64* %124, align 8 ⟪┫  + %125 = add i64 %arrayref15.postloop, %123 ⟪┫  + %126 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫  + store i64 %125, i64* %126, align 8 ⟪┫  + %.not.not32.postloop = icmp eq i64 %value_phi3.postloop…⟪┫  + %127 = add nuw nsw i64 %value_phi3.postloop, 1 ⟪┫  + br i1 %.not.not32.postloop, label %L31, label %L13.post…⟪┫  +  ┣⟫L32: ; preds… +  ┣⟫ ret void +}  ┃ } +  ┃  \ No newline at end of file diff --git a/test/references/saxpy_x86_LINES.ll b/test/references/saxpy_x86_LINES.ll new file mode 100644 index 0000000..737fb07 --- /dev/null +++ b/test/references/saxpy_x86_LINES.ll @@ -0,0 +1,324 @@ + 1 ; Function Attrs: uwtable ┃ ; Function Attrs: uwtable 1 + 2 define void @saxpy({}* noundef nonnull align 16 deref…⟪╋⟫define void @saxpy_simd({}* noundef nonnull align 16 …2 + 3 top: ┃ top: 3 + 4 %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* ┃ %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* 4 + 5 %arraylen_ptr = getelementptr inbounds { i8*, i64, … ┃ %arraylen_ptr = getelementptr inbounds { i8*, i64, …5 + 6 %arraylen = load i64, i64* %arraylen_ptr, align 8 ┃ %arraylen = load i64, i64* %arraylen_ptr, align 8 6 + 7 %.not.not = icmp eq i64 %arraylen, 0 ⟪╋⟫ %.not = icmp eq i64 %arraylen, 0 7 + 8 br i1 %.not.not, label %L31, label %L13.preheader ⟪╋⟫ br i1 %.not, label %L32, label %L12.lr.ph 8 + 9 ⟪┫ + 10 L13.preheader: ; p…⟪┫ + 11 %5 = bitcast {}* %2 to { i8*, i64, i16, i16, i32 }* ⟪┫ + 12 %arraylen_ptr5 = getelementptr inbounds { i8*, i64,…⟪┫ + 13 %arraylen6 = load i64, i64* %arraylen_ptr5, align 8 ⟪┫ + 14 %6 = bitcast {}* %3 to { i8*, i64, i16, i16, i32 }* ⟪┫ + 15 %arraylen_ptr7 = getelementptr inbounds { i8*, i64,…⟪┫ + 16 %arraylen8 = load i64, i64* %arraylen_ptr7, align 8 ⟪┫ + 17 %7 = bitcast {}* %2 to i64** ⟪┫ + 18 %arrayptr29 = load i64*, i64** %7, align 8 ⟪┫ + 19 %8 = bitcast {}* %3 to i64** ⟪┫ + 20 %arrayptr1430 = load i64*, i64** %8, align 8 ⟪┫ + 21 %9 = bitcast {}* %0 to i64** ⟪┫ + 22 %arrayptr2331 = load i64*, i64** %9, align 8 ⟪┫ + 23 %umin = call i64 @llvm.umin.i64(i64 %arraylen6, i64…⟪┫ + 24 %smin = call i64 @llvm.smin.i64(i64 %arraylen8, i64…⟪┫ + 25 %10 = sub i64 %arraylen8, %smin ⟪┫ + 26 %smax = call i64 @llvm.smax.i64(i64 %smin, i64 -1) ⟪┫ + 27 %11 = add nsw i64 %smax, 1 ⟪┫ + 28 %12 = mul nuw nsw i64 %10, %11 ⟪┫ + 29 %umin36 = call i64 @llvm.umin.i64(i64 %umin, i64 %1…⟪┫ + 30 %exit.mainloop.at = call i64 @llvm.umin.i64(i64 %um…⟪┫ + 31 %.not = icmp eq i64 %exit.mainloop.at, 0 ⟪┫ + 32 br i1 %.not, label %main.pseudo.exit, label %idxend…⟪┫ + 33 ⟪┫ + 34 idxend21.preheader: ; p…⟪┫ + 35 %min.iters.check = icmp ult i64 %exit.mainloop.at, …⟪┫ + 36 br i1 %min.iters.check, label %scalar.ph, label %ve…⟪┫ + 37 ┃ 9 + 38 vector.memcheck: ; p…⟪┫ + 39 %scevgep = getelementptr i64, i64* %arrayptr2331, i…⟪┫ + 40 %scevgep58 = getelementptr i64, i64* %arrayptr29, i…⟪┫ + 41 %scevgep61 = getelementptr i64, i64* %arrayptr1430,…⟪┫ + 42 %bound0 = icmp ult i64* %arrayptr2331, %scevgep58 ⟪┫ + 43 %bound1 = icmp ult i64* %arrayptr29, %scevgep ⟪┫ + 44 %found.conflict = and i1 %bound0, %bound1 ⟪┫ + 45 %bound063 = icmp ult i64* %arrayptr2331, %scevgep61 ⟪┫ + 46 %bound164 = icmp ult i64* %arrayptr1430, %scevgep ⟪┫ + 47 %found.conflict65 = and i1 %bound063, %bound164 ⟪┫ + 48 %conflict.rdx = or i1 %found.conflict, %found.confl…⟪┫ + 49 br i1 %conflict.rdx, label %scalar.ph, label %vecto…⟪╋⟫ br i1 %min.iters.check, label %scalar.ph, label %ve…10 + ┣⟫L12.lr.ph: ; p…11 + ┣⟫ %5 = bitcast {}* %2 to i64** 12 + ┣⟫ %arrayptr8 = load i64*, i64** %5, align 8 13 + ┣⟫ %6 = bitcast {}* %3 to i64** 14 + ┣⟫ %arrayptr29 = load i64*, i64** %6, align 8 15 + ┣⟫ %7 = bitcast {}* %0 to i64** 16 + ┣⟫ %arrayptr510 = load i64*, i64** %7, align 8 17 + ┣⟫ %min.iters.check = icmp ult i64 %arraylen, 16 18 + 50 ┃ 19 + 51 vector.ph: ; p…⟪╋⟫vector.ph: ; p…20 + 52 %n.vec = and i64 %exit.mainloop.at, 922337203685477…⟪╋⟫ %n.vec = and i64 %arraylen, 9223372036854775792 21 + 53 %ind.end = or i64 %n.vec, 1 ⟪┫ + 54 %broadcast.splatinsert = insertelement <4 x i64> po… ┃ %broadcast.splatinsert = insertelement <4 x i64> po…22 + 55 %broadcast.splat = shufflevector <4 x i64> %broadca… ┃ %broadcast.splat = shufflevector <4 x i64> %broadca…23 + 56 %13 = add nsw i64 %n.vec, -16 ⟪╋⟫ %8 = add nsw i64 %n.vec, -16 24 + 57 %14 = lshr exact i64 %13, 4 ⟪╋⟫ %9 = lshr exact i64 %8, 4 25 + 58 %15 = add nuw nsw i64 %14, 1 ⟪╋⟫ %10 = add nuw nsw i64 %9, 1 26 + 59 %xtraiter = and i64 %15, 1 ⟪╋⟫ %xtraiter = and i64 %10, 1 27 + 60 %16 = icmp eq i64 %13, 0 ⟪╋⟫ %11 = icmp eq i64 %8, 0 28 + 61 br i1 %16, label %middle.block.unr-lcssa, label %ve…⟪╋⟫ br i1 %11, label %middle.block.unr-lcssa, label %ve…29 + 62 ┃ 30 + 63 vector.ph.new: ; p… ┃ vector.ph.new: ; p…31 + 64 %unroll_iter = and i64 %15, 2305843009213693950 ⟪╋⟫ %unroll_iter = and i64 %10, 2305843009213693950 32 + 65 br label %vector.body ┃ br label %vector.body 33 + 66 ┃ 34 + 67 vector.body: ; p… ┃ vector.body: ; p…35 + 68 %index = phi i64 [ 0, %vector.ph.new ], [ %index.ne… ┃ %index = phi i64 [ 0, %vector.ph.new ], [ %index.ne…36 + 69 %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.ne… ┃ %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.ne…37 + 70 %17 = getelementptr inbounds i64, i64* %arrayptr29,…⟪╋⟫ %12 = getelementptr inbounds i64, i64* %arrayptr8, …38 + 71 %18 = bitcast i64* %17 to <4 x i64>* ⟪╋⟫ %13 = bitcast i64* %12 to <4 x i64>* 39 + 72 %wide.load = load <4 x i64>, <4 x i64>* %18, align …⟪╋⟫ %wide.load = load <4 x i64>, <4 x i64>* %13, align …40 + 73 %19 = getelementptr inbounds i64, i64* %17, i64 4 ⟪╋⟫ %14 = getelementptr inbounds i64, i64* %12, i64 4 41 + 74 %20 = bitcast i64* %19 to <4 x i64>* ⟪╋⟫ %15 = bitcast i64* %14 to <4 x i64>* 42 + 75 %wide.load66 = load <4 x i64>, <4 x i64>* %20, alig…⟪╋⟫ %wide.load13 = load <4 x i64>, <4 x i64>* %15, alig…43 + 76 %21 = getelementptr inbounds i64, i64* %17, i64 8 ⟪╋⟫ %16 = getelementptr inbounds i64, i64* %12, i64 8 44 + 77 %22 = bitcast i64* %21 to <4 x i64>* ⟪╋⟫ %17 = bitcast i64* %16 to <4 x i64>* 45 + 78 %wide.load67 = load <4 x i64>, <4 x i64>* %22, alig…⟪╋⟫ %wide.load14 = load <4 x i64>, <4 x i64>* %17, alig…46 + 79 %23 = getelementptr inbounds i64, i64* %17, i64 12 ⟪╋⟫ %18 = getelementptr inbounds i64, i64* %12, i64 12 47 + 80 %24 = bitcast i64* %23 to <4 x i64>* ⟪╋⟫ %19 = bitcast i64* %18 to <4 x i64>* 48 + 81 %wide.load68 = load <4 x i64>, <4 x i64>* %24, alig…⟪╋⟫ %wide.load15 = load <4 x i64>, <4 x i64>* %19, alig…49 + 82 %25 = mul <4 x i64> %wide.load, %broadcast.splat ⟪╋⟫ %20 = mul <4 x i64> %wide.load, %broadcast.splat 50 + 83 %26 = mul <4 x i64> %wide.load66, %broadcast.splat ⟪╋⟫ %21 = mul <4 x i64> %wide.load13, %broadcast.splat 51 + 84 %27 = mul <4 x i64> %wide.load67, %broadcast.splat ⟪╋⟫ %22 = mul <4 x i64> %wide.load14, %broadcast.splat 52 + 85 %28 = mul <4 x i64> %wide.load68, %broadcast.splat ⟪╋⟫ %23 = mul <4 x i64> %wide.load15, %broadcast.splat 53 + 86 %29 = getelementptr inbounds i64, i64* %arrayptr143…⟪╋⟫ %24 = getelementptr inbounds i64, i64* %arrayptr29,…54 + 87 %30 = bitcast i64* %29 to <4 x i64>* ⟪╋⟫ %25 = bitcast i64* %24 to <4 x i64>* 55 + 88 %wide.load75 = load <4 x i64>, <4 x i64>* %30, alig…⟪╋⟫ %wide.load22 = load <4 x i64>, <4 x i64>* %25, alig…56 + 89 %31 = getelementptr inbounds i64, i64* %29, i64 4 ⟪╋⟫ %26 = getelementptr inbounds i64, i64* %24, i64 4 57 + 90 %32 = bitcast i64* %31 to <4 x i64>* ⟪╋⟫ %27 = bitcast i64* %26 to <4 x i64>* 58 + 91 %wide.load76 = load <4 x i64>, <4 x i64>* %32, alig…⟪╋⟫ %wide.load23 = load <4 x i64>, <4 x i64>* %27, alig…59 + 92 %33 = getelementptr inbounds i64, i64* %29, i64 8 ⟪╋⟫ %28 = getelementptr inbounds i64, i64* %24, i64 8 60 + 93 %34 = bitcast i64* %33 to <4 x i64>* ⟪╋⟫ %29 = bitcast i64* %28 to <4 x i64>* 61 + 94 %wide.load77 = load <4 x i64>, <4 x i64>* %34, alig…⟪╋⟫ %wide.load24 = load <4 x i64>, <4 x i64>* %29, alig…62 + 95 %35 = getelementptr inbounds i64, i64* %29, i64 12 ⟪╋⟫ %30 = getelementptr inbounds i64, i64* %24, i64 12 63 + 96 %36 = bitcast i64* %35 to <4 x i64>* ⟪╋⟫ %31 = bitcast i64* %30 to <4 x i64>* 64 + 97 %wide.load78 = load <4 x i64>, <4 x i64>* %36, alig…⟪╋⟫ %wide.load25 = load <4 x i64>, <4 x i64>* %31, alig…65 + 98 %37 = add <4 x i64> %wide.load75, %25 ⟪╋⟫ %32 = add <4 x i64> %wide.load22, %20 66 + 99 %38 = add <4 x i64> %wide.load76, %26 ⟪╋⟫ %33 = add <4 x i64> %wide.load23, %21 67 +100 %39 = add <4 x i64> %wide.load77, %27 ⟪╋⟫ %34 = add <4 x i64> %wide.load24, %22 68 +101 %40 = add <4 x i64> %wide.load78, %28 ⟪╋⟫ %35 = add <4 x i64> %wide.load25, %23 69 +102 %41 = getelementptr inbounds i64, i64* %arrayptr233…⟪╋⟫ %36 = getelementptr inbounds i64, i64* %arrayptr510…70 +103 %42 = bitcast i64* %41 to <4 x i64>* ⟪╋⟫ %37 = bitcast i64* %36 to <4 x i64>* 71 +104 store <4 x i64> %37, <4 x i64>* %42, align 8 ⟪╋⟫ store <4 x i64> %32, <4 x i64>* %37, align 8 72 +105 %43 = getelementptr inbounds i64, i64* %41, i64 4 ⟪╋⟫ %38 = getelementptr inbounds i64, i64* %36, i64 4 73 +106 %44 = bitcast i64* %43 to <4 x i64>* ⟪╋⟫ %39 = bitcast i64* %38 to <4 x i64>* 74 +107 store <4 x i64> %38, <4 x i64>* %44, align 8 ⟪╋⟫ store <4 x i64> %33, <4 x i64>* %39, align 8 75 +108 %45 = getelementptr inbounds i64, i64* %41, i64 8 ⟪╋⟫ %40 = getelementptr inbounds i64, i64* %36, i64 8 76 +109 %46 = bitcast i64* %45 to <4 x i64>* ⟪╋⟫ %41 = bitcast i64* %40 to <4 x i64>* 77 +110 store <4 x i64> %39, <4 x i64>* %46, align 8 ⟪╋⟫ store <4 x i64> %34, <4 x i64>* %41, align 8 78 +111 %47 = getelementptr inbounds i64, i64* %41, i64 12 ⟪╋⟫ %42 = getelementptr inbounds i64, i64* %36, i64 12 79 +112 %48 = bitcast i64* %47 to <4 x i64>* ⟪╋⟫ %43 = bitcast i64* %42 to <4 x i64>* 80 +113 store <4 x i64> %40, <4 x i64>* %48, align 8 ⟪╋⟫ store <4 x i64> %35, <4 x i64>* %43, align 8 81 +114 %index.next = or i64 %index, 16 ┃ %index.next = or i64 %index, 16 82 +115 %49 = getelementptr inbounds i64, i64* %arrayptr29,…⟪╋⟫ %44 = getelementptr inbounds i64, i64* %arrayptr8, …83 +116 %50 = bitcast i64* %49 to <4 x i64>* ⟪╋⟫ %45 = bitcast i64* %44 to <4 x i64>* 84 +117 %wide.load.1 = load <4 x i64>, <4 x i64>* %50, alig…⟪╋⟫ %wide.load.1 = load <4 x i64>, <4 x i64>* %45, alig…85 +118 %51 = getelementptr inbounds i64, i64* %49, i64 4 ⟪╋⟫ %46 = getelementptr inbounds i64, i64* %44, i64 4 86 +119 %52 = bitcast i64* %51 to <4 x i64>* ⟪╋⟫ %47 = bitcast i64* %46 to <4 x i64>* 87 +120 %wide.load66.1 = load <4 x i64>, <4 x i64>* %52, al…⟪╋⟫ %wide.load13.1 = load <4 x i64>, <4 x i64>* %47, al…88 +121 %53 = getelementptr inbounds i64, i64* %49, i64 8 ⟪╋⟫ %48 = getelementptr inbounds i64, i64* %44, i64 8 89 +122 %54 = bitcast i64* %53 to <4 x i64>* ⟪╋⟫ %49 = bitcast i64* %48 to <4 x i64>* 90 +123 %wide.load67.1 = load <4 x i64>, <4 x i64>* %54, al…⟪╋⟫ %wide.load14.1 = load <4 x i64>, <4 x i64>* %49, al…91 +124 %55 = getelementptr inbounds i64, i64* %49, i64 12 ⟪╋⟫ %50 = getelementptr inbounds i64, i64* %44, i64 12 92 +125 %56 = bitcast i64* %55 to <4 x i64>* ⟪╋⟫ %51 = bitcast i64* %50 to <4 x i64>* 93 +126 %wide.load68.1 = load <4 x i64>, <4 x i64>* %56, al…⟪╋⟫ %wide.load15.1 = load <4 x i64>, <4 x i64>* %51, al…94 +127 %57 = mul <4 x i64> %wide.load.1, %broadcast.splat ⟪╋⟫ %52 = mul <4 x i64> %wide.load.1, %broadcast.splat 95 +128 %58 = mul <4 x i64> %wide.load66.1, %broadcast.spla…⟪╋⟫ %53 = mul <4 x i64> %wide.load13.1, %broadcast.spla…96 +129 %59 = mul <4 x i64> %wide.load67.1, %broadcast.spla…⟪╋⟫ %54 = mul <4 x i64> %wide.load14.1, %broadcast.spla…97 +130 %60 = mul <4 x i64> %wide.load68.1, %broadcast.spla…⟪╋⟫ %55 = mul <4 x i64> %wide.load15.1, %broadcast.spla…98 +131 %61 = getelementptr inbounds i64, i64* %arrayptr143…⟪╋⟫ %56 = getelementptr inbounds i64, i64* %arrayptr29,…99 +132 %62 = bitcast i64* %61 to <4 x i64>* ⟪╋⟫ %57 = bitcast i64* %56 to <4 x i64>* 100 +133 %wide.load75.1 = load <4 x i64>, <4 x i64>* %62, al…⟪╋⟫ %wide.load22.1 = load <4 x i64>, <4 x i64>* %57, al…101 +134 %63 = getelementptr inbounds i64, i64* %61, i64 4 ⟪╋⟫ %58 = getelementptr inbounds i64, i64* %56, i64 4 102 +135 %64 = bitcast i64* %63 to <4 x i64>* ⟪╋⟫ %59 = bitcast i64* %58 to <4 x i64>* 103 +136 %wide.load76.1 = load <4 x i64>, <4 x i64>* %64, al…⟪╋⟫ %wide.load23.1 = load <4 x i64>, <4 x i64>* %59, al…104 +137 %65 = getelementptr inbounds i64, i64* %61, i64 8 ⟪╋⟫ %60 = getelementptr inbounds i64, i64* %56, i64 8 105 +138 %66 = bitcast i64* %65 to <4 x i64>* ⟪╋⟫ %61 = bitcast i64* %60 to <4 x i64>* 106 +139 %wide.load77.1 = load <4 x i64>, <4 x i64>* %66, al…⟪╋⟫ %wide.load24.1 = load <4 x i64>, <4 x i64>* %61, al…107 +140 %67 = getelementptr inbounds i64, i64* %61, i64 12 ⟪╋⟫ %62 = getelementptr inbounds i64, i64* %56, i64 12 108 +141 %68 = bitcast i64* %67 to <4 x i64>* ⟪╋⟫ %63 = bitcast i64* %62 to <4 x i64>* 109 +142 %wide.load78.1 = load <4 x i64>, <4 x i64>* %68, al…⟪╋⟫ %wide.load25.1 = load <4 x i64>, <4 x i64>* %63, al…110 +143 %69 = add <4 x i64> %wide.load75.1, %57 ⟪╋⟫ %64 = add <4 x i64> %wide.load22.1, %52 111 +144 %70 = add <4 x i64> %wide.load76.1, %58 ⟪╋⟫ %65 = add <4 x i64> %wide.load23.1, %53 112 +145 %71 = add <4 x i64> %wide.load77.1, %59 ⟪╋⟫ %66 = add <4 x i64> %wide.load24.1, %54 113 +146 %72 = add <4 x i64> %wide.load78.1, %60 ⟪╋⟫ %67 = add <4 x i64> %wide.load25.1, %55 114 +147 %73 = getelementptr inbounds i64, i64* %arrayptr233…⟪╋⟫ %68 = getelementptr inbounds i64, i64* %arrayptr510…115 +148 %74 = bitcast i64* %73 to <4 x i64>* ⟪╋⟫ %69 = bitcast i64* %68 to <4 x i64>* 116 +149 store <4 x i64> %69, <4 x i64>* %74, align 8 ⟪╋⟫ store <4 x i64> %64, <4 x i64>* %69, align 8 117 +150 %75 = getelementptr inbounds i64, i64* %73, i64 4 ⟪╋⟫ %70 = getelementptr inbounds i64, i64* %68, i64 4 118 +151 %76 = bitcast i64* %75 to <4 x i64>* ⟪╋⟫ %71 = bitcast i64* %70 to <4 x i64>* 119 +152 store <4 x i64> %70, <4 x i64>* %76, align 8 ⟪╋⟫ store <4 x i64> %65, <4 x i64>* %71, align 8 120 +153 %77 = getelementptr inbounds i64, i64* %73, i64 8 ⟪╋⟫ %72 = getelementptr inbounds i64, i64* %68, i64 8 121 +154 %78 = bitcast i64* %77 to <4 x i64>* ⟪╋⟫ %73 = bitcast i64* %72 to <4 x i64>* 122 +155 store <4 x i64> %71, <4 x i64>* %78, align 8 ⟪╋⟫ store <4 x i64> %66, <4 x i64>* %73, align 8 123 +156 %79 = getelementptr inbounds i64, i64* %73, i64 12 ⟪╋⟫ %74 = getelementptr inbounds i64, i64* %68, i64 12 124 +157 %80 = bitcast i64* %79 to <4 x i64>* ⟪╋⟫ %75 = bitcast i64* %74 to <4 x i64>* 125 +158 store <4 x i64> %72, <4 x i64>* %80, align 8 ⟪╋⟫ store <4 x i64> %67, <4 x i64>* %75, align 8 126 +159 %index.next.1 = add nuw i64 %index, 32 ┃ %index.next.1 = add nuw i64 %index, 32 127 +160 %niter.next.1 = add i64 %niter, 2 ┃ %niter.next.1 = add i64 %niter, 2 128 +161 %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_… ┃ %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_…129 +162 br i1 %niter.ncmp.1, label %middle.block.unr-lcssa,… ┃ br i1 %niter.ncmp.1, label %middle.block.unr-lcssa,…130 +163 ┃ 131 +164 middle.block.unr-lcssa: ; p… ┃ middle.block.unr-lcssa: ; p…132 +165 %index.unr = phi i64 [ 0, %vector.ph ], [ %index.ne… ┃ %index.unr = phi i64 [ 0, %vector.ph ], [ %index.ne…133 +166 %lcmp.mod.not = icmp eq i64 %xtraiter, 0 ┃ %lcmp.mod.not = icmp eq i64 %xtraiter, 0 134 +167 br i1 %lcmp.mod.not, label %middle.block, label %ve… ┃ br i1 %lcmp.mod.not, label %middle.block, label %ve…135 +168 ┃ 136 +169 vector.body.epil.preheader: ; p… ┃ vector.body.epil.preheader: ; p…137 +170 %81 = getelementptr inbounds i64, i64* %arrayptr29,…⟪╋⟫ %76 = getelementptr inbounds i64, i64* %arrayptr8, …138 +171 %82 = bitcast i64* %81 to <4 x i64>* ⟪╋⟫ %77 = bitcast i64* %76 to <4 x i64>* 139 +172 %wide.load.epil = load <4 x i64>, <4 x i64>* %82, a…⟪╋⟫ %wide.load.epil = load <4 x i64>, <4 x i64>* %77, a…140 +173 %83 = getelementptr inbounds i64, i64* %81, i64 4 ⟪╋⟫ %78 = getelementptr inbounds i64, i64* %76, i64 4 141 +174 %84 = bitcast i64* %83 to <4 x i64>* ⟪╋⟫ %79 = bitcast i64* %78 to <4 x i64>* 142 +175 %wide.load66.epil = load <4 x i64>, <4 x i64>* %84,…⟪╋⟫ %wide.load13.epil = load <4 x i64>, <4 x i64>* %79,…143 +176 %85 = getelementptr inbounds i64, i64* %81, i64 8 ⟪╋⟫ %80 = getelementptr inbounds i64, i64* %76, i64 8 144 +177 %86 = bitcast i64* %85 to <4 x i64>* ⟪╋⟫ %81 = bitcast i64* %80 to <4 x i64>* 145 +178 %wide.load67.epil = load <4 x i64>, <4 x i64>* %86,…⟪╋⟫ %wide.load14.epil = load <4 x i64>, <4 x i64>* %81,…146 +179 %87 = getelementptr inbounds i64, i64* %81, i64 12 ⟪╋⟫ %82 = getelementptr inbounds i64, i64* %76, i64 12 147 +180 %88 = bitcast i64* %87 to <4 x i64>* ⟪╋⟫ %83 = bitcast i64* %82 to <4 x i64>* 148 +181 %wide.load68.epil = load <4 x i64>, <4 x i64>* %88,…⟪╋⟫ %wide.load15.epil = load <4 x i64>, <4 x i64>* %83,…149 +182 %89 = mul <4 x i64> %wide.load.epil, %broadcast.spl…⟪╋⟫ %84 = mul <4 x i64> %wide.load.epil, %broadcast.spl…150 +183 %90 = mul <4 x i64> %wide.load66.epil, %broadcast.s…⟪╋⟫ %85 = mul <4 x i64> %wide.load13.epil, %broadcast.s…151 +184 %91 = mul <4 x i64> %wide.load67.epil, %broadcast.s…⟪╋⟫ %86 = mul <4 x i64> %wide.load14.epil, %broadcast.s…152 +185 %92 = mul <4 x i64> %wide.load68.epil, %broadcast.s…⟪╋⟫ %87 = mul <4 x i64> %wide.load15.epil, %broadcast.s…153 +186 %93 = getelementptr inbounds i64, i64* %arrayptr143…⟪╋⟫ %88 = getelementptr inbounds i64, i64* %arrayptr29,…154 +187 %94 = bitcast i64* %93 to <4 x i64>* ⟪╋⟫ %89 = bitcast i64* %88 to <4 x i64>* 155 +188 %wide.load75.epil = load <4 x i64>, <4 x i64>* %94,…⟪╋⟫ %wide.load22.epil = load <4 x i64>, <4 x i64>* %89,…156 +189 %95 = getelementptr inbounds i64, i64* %93, i64 4 ⟪╋⟫ %90 = getelementptr inbounds i64, i64* %88, i64 4 157 +190 %96 = bitcast i64* %95 to <4 x i64>* ⟪╋⟫ %91 = bitcast i64* %90 to <4 x i64>* 158 +191 %wide.load76.epil = load <4 x i64>, <4 x i64>* %96,…⟪╋⟫ %wide.load23.epil = load <4 x i64>, <4 x i64>* %91,…159 +192 %97 = getelementptr inbounds i64, i64* %93, i64 8 ⟪╋⟫ %92 = getelementptr inbounds i64, i64* %88, i64 8 160 +193 %98 = bitcast i64* %97 to <4 x i64>* ⟪╋⟫ %93 = bitcast i64* %92 to <4 x i64>* 161 +194 %wide.load77.epil = load <4 x i64>, <4 x i64>* %98,…⟪╋⟫ %wide.load24.epil = load <4 x i64>, <4 x i64>* %93,…162 +195 %99 = getelementptr inbounds i64, i64* %93, i64 12 ⟪╋⟫ %94 = getelementptr inbounds i64, i64* %88, i64 12 163 +196 %100 = bitcast i64* %99 to <4 x i64>* ⟪╋⟫ %95 = bitcast i64* %94 to <4 x i64>* 164 +197 %wide.load78.epil = load <4 x i64>, <4 x i64>* %100…⟪╋⟫ %wide.load25.epil = load <4 x i64>, <4 x i64>* %95,…165 +198 %101 = add <4 x i64> %wide.load75.epil, %89 ⟪╋⟫ %96 = add <4 x i64> %wide.load22.epil, %84 166 +199 %102 = add <4 x i64> %wide.load76.epil, %90 ⟪╋⟫ %97 = add <4 x i64> %wide.load23.epil, %85 167 +200 %103 = add <4 x i64> %wide.load77.epil, %91 ⟪╋⟫ %98 = add <4 x i64> %wide.load24.epil, %86 168 +201 %104 = add <4 x i64> %wide.load78.epil, %92 ⟪╋⟫ %99 = add <4 x i64> %wide.load25.epil, %87 169 +202 %105 = getelementptr inbounds i64, i64* %arrayptr23…⟪╋⟫ %100 = getelementptr inbounds i64, i64* %arrayptr51…170 +203 %106 = bitcast i64* %105 to <4 x i64>* ⟪╋⟫ %101 = bitcast i64* %100 to <4 x i64>* 171 +204 store <4 x i64> %101, <4 x i64>* %106, align 8 ⟪╋⟫ store <4 x i64> %96, <4 x i64>* %101, align 8 172 +205 %107 = getelementptr inbounds i64, i64* %105, i64 4 ⟪╋⟫ %102 = getelementptr inbounds i64, i64* %100, i64 4 173 +206 %108 = bitcast i64* %107 to <4 x i64>* ⟪╋⟫ %103 = bitcast i64* %102 to <4 x i64>* 174 +207 store <4 x i64> %102, <4 x i64>* %108, align 8 ⟪╋⟫ store <4 x i64> %97, <4 x i64>* %103, align 8 175 +208 %109 = getelementptr inbounds i64, i64* %105, i64 8 ⟪╋⟫ %104 = getelementptr inbounds i64, i64* %100, i64 8 176 +209 %110 = bitcast i64* %109 to <4 x i64>* ⟪╋⟫ %105 = bitcast i64* %104 to <4 x i64>* 177 +210 store <4 x i64> %103, <4 x i64>* %110, align 8 ⟪╋⟫ store <4 x i64> %98, <4 x i64>* %105, align 8 178 +211 %111 = getelementptr inbounds i64, i64* %105, i64 1…⟪╋⟫ %106 = getelementptr inbounds i64, i64* %100, i64 1…179 +212 %112 = bitcast i64* %111 to <4 x i64>* ⟪╋⟫ %107 = bitcast i64* %106 to <4 x i64>* 180 +213 store <4 x i64> %104, <4 x i64>* %112, align 8 ⟪╋⟫ store <4 x i64> %99, <4 x i64>* %107, align 8 181 +214 br label %middle.block ┃ br label %middle.block 182 +215 ┃ 183 +216 middle.block: ; p… ┃ middle.block: ; p…184 +217 %cmp.n = icmp eq i64 %exit.mainloop.at, %n.vec ⟪╋⟫ %cmp.n = icmp eq i64 %arraylen, %n.vec 185 +218 br i1 %cmp.n, label %main.exit.selector, label %sca…⟪┫ +219 ⟪┫ +220 scalar.ph: ; p…⟪┫ +221 %bc.resume.val = phi i64 [ %ind.end, %middle.block …⟪┫ +222 br label %idxend21 ⟪┫ +223 ⟪┫ +224 L31: ; p…⟪┫ +225 ret void ⟪┫ +226 ⟪┫ +227 oob: ; p…⟪┫ +228 %errorbox = alloca i64, align 8 ⟪┫ +229 store i64 %value_phi3.postloop, i64* %errorbox, ali…⟪┫ +230 call void @ijl_bounds_error_ints({}* %2, i64* nonnu…⟪┫ +231 unreachable ⟪┫ +232 ⟪┫ +233 oob10: ; p…⟪┫ +234 %errorbox11 = alloca i64, align 8 ⟪┫ +235 store i64 %value_phi3.postloop, i64* %errorbox11, a…⟪┫ +236 call void @ijl_bounds_error_ints({}* %3, i64* nonnu…⟪┫ +237 unreachable ⟪┫ +238 ⟪┫ +239 oob19: ; p…⟪┫ +240 %errorbox20 = alloca i64, align 8 ⟪┫ +241 store i64 %value_phi3.postloop, i64* %errorbox20, a…⟪┫ +242 call void @ijl_bounds_error_ints({}* %0, i64* nonnu…⟪┫ +243 unreachable ⟪┫ +244 ⟪┫ +245 idxend21: ; p…⟪┫ +246 %value_phi3 = phi i64 [ %119, %idxend21 ], [ %bc.re…⟪┫ +247 %113 = add nsw i64 %value_phi3, -1 ⟪┫ +248 %114 = getelementptr inbounds i64, i64* %arrayptr29…⟪┫ +249 %arrayref = load i64, i64* %114, align 8 ⟪┫ +250 %115 = mul i64 %arrayref, %1 ⟪┫ +251 %116 = getelementptr inbounds i64, i64* %arrayptr14…⟪┫ +252 %arrayref15 = load i64, i64* %116, align 8 ⟪┫ +253 %117 = add i64 %arrayref15, %115 ⟪┫ +254 %118 = getelementptr inbounds i64, i64* %arrayptr23…⟪┫ +255 store i64 %117, i64* %118, align 8 ⟪┫ +256 %119 = add nuw nsw i64 %value_phi3, 1 ⟪┫ +257 %.not51 = icmp ult i64 %value_phi3, %exit.mainloop.…⟪┫ +258 br i1 %.not51, label %idxend21, label %main.exit.se…⟪┫ +259 ⟪┫ +260 main.exit.selector: ; p…⟪┫ +261 %value_phi3.lcssa = phi i64 [ %exit.mainloop.at, %m…⟪┫ +262 %.lcssa = phi i64 [ %ind.end, %middle.block ], [ %1…⟪┫ +263 %120 = icmp ult i64 %value_phi3.lcssa, %arraylen ⟪┫ +264 br i1 %120, label %main.pseudo.exit, label %L31 ⟪┫ +265 ⟪┫ +266 main.pseudo.exit: ; p…⟪┫ +267 %value_phi3.copy = phi i64 [ 1, %L13.preheader ], […⟪┫ +268 br label %L13.postloop ⟪┫ +269 ⟪┫ +270 L13.postloop: ; p…⟪┫ +271 %value_phi3.postloop = phi i64 [ %127, %idxend21.po…⟪┫ +272 %121 = add i64 %value_phi3.postloop, -1 ⟪┫ +273 %inbounds.postloop = icmp ult i64 %121, %arraylen6 ⟪┫ +274 br i1 %inbounds.postloop, label %idxend.postloop, l…⟪┫ + ┣⟫ br i1 %cmp.n, label %L32, label %scalar.ph 186 +275 ┃ 187 +276 idxend.postloop: ; p…⟪┫ +277 %inbounds9.postloop = icmp ult i64 %121, %arraylen8 ⟪┫ +278 br i1 %inbounds9.postloop, label %idxend12.postloop…⟪┫ + ┣⟫scalar.ph: ; p…188 + ┣⟫ %bc.resume.val = phi i64 [ %n.vec, %middle.block ],…189 + ┣⟫ br label %L12 190 +279 ┃ 191 +280 idxend12.postloop: ; p…⟪┫ +281 %inbounds18.postloop = icmp ult i64 %121, %arraylen ⟪┫ +282 br i1 %inbounds18.postloop, label %idxend21.postloo…⟪┫ + ┣⟫L12: ; p…192 + ┣⟫ %value_phi12 = phi i64 [ %bc.resume.val, %scalar.ph…193 + ┣⟫ %108 = getelementptr inbounds i64, i64* %arrayptr8,…194 + ┣⟫ %arrayref = load i64, i64* %108, align 8 195 + ┣⟫ %109 = mul i64 %arrayref, %1 196 + ┣⟫ %110 = getelementptr inbounds i64, i64* %arrayptr29…197 + ┣⟫ %arrayref3 = load i64, i64* %110, align 8 198 + ┣⟫ %111 = add i64 %arrayref3, %109 199 + ┣⟫ %112 = getelementptr inbounds i64, i64* %arrayptr51…200 + ┣⟫ store i64 %111, i64* %112, align 8 201 + ┣⟫ %113 = add nuw nsw i64 %value_phi12, 1 202 + ┣⟫ %exitcond.not = icmp eq i64 %113, %arraylen 203 + ┣⟫ br i1 %exitcond.not, label %L32, label %L12 204 +283 ┃ 205 +284 idxend21.postloop: ; p…⟪┫ +285 %122 = getelementptr inbounds i64, i64* %arrayptr29…⟪┫ +286 %arrayref.postloop = load i64, i64* %122, align 8 ⟪┫ +287 %123 = mul i64 %arrayref.postloop, %1 ⟪┫ +288 %124 = getelementptr inbounds i64, i64* %arrayptr14…⟪┫ +289 %arrayref15.postloop = load i64, i64* %124, align 8 ⟪┫ +290 %125 = add i64 %arrayref15.postloop, %123 ⟪┫ +291 %126 = getelementptr inbounds i64, i64* %arrayptr23…⟪┫ +292 store i64 %125, i64* %126, align 8 ⟪┫ +293 %.not.not32.postloop = icmp eq i64 %value_phi3.post…⟪┫ +294 %127 = add nuw nsw i64 %value_phi3.postloop, 1 ⟪┫ +295 br i1 %.not.not32.postloop, label %L31, label %L13.…⟪┫ + ┣⟫L32: ; p…206 + ┣⟫ ret void 207 +296 } ┃ } 208 +297 ┃ 209 \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index b7f2cb3..1f56189 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,10 +1,184 @@ -using CodeDifferences -using Test using Aqua +using CodeDiffs +using DeepDiffs +using InteractiveUtils +using ReferenceTests +using Test + + +const NATIVE_ARCH = InteractiveUtils.sys_arch_category() + -@testset "CodeDifferences.jl" begin +function display_str(v; mime=MIME"text/plain"(), compact=false, color=true, columns=nothing) + # Fancy print `v` to a string + columns = @something columns displaysize(stdout)[2] + io = IOBuffer() + io_ctx = IOContext(io, :compact => compact, :color => color) + withenv("COLUMNS" => columns) do + if mime === nothing + Base.show(io_ctx, v) + else + Base.show(io_ctx, mime, v) + end + end + return String(take!(io)) +end + + +@testset "CodeDiffs.jl" begin @testset "Code quality (Aqua.jl)" begin - Aqua.test_all(CodeDifferences) + Aqua.test_all(CodeDiffs) + end + + @testset "AST" begin + diff = CodeDiffs.compare_ast(:(1+2), :(1+2); color=false, prettify=false, lines=false, alias=false) + @test CodeDiffs.issame(diff) + @test diff.before == diff.highlighted_before == "quote\n 1 + 2\nend" + + diff = CodeDiffs.compare_ast(:(1+2), :(1+3); color=false, prettify=false, lines=false, alias=false) + @test !CodeDiffs.issame(diff) + @test length(DeepDiffs.added(diff)) == length(DeepDiffs.removed(diff)) == 1 + + e = quote + $(LineNumberNode(42, :file)) + 1+2 + end + diff = CodeDiffs.compare_ast(e, :(1+2); color=false, prettify=false, lines=true, alias=false) + @test !CodeDiffs.issame(diff) + @test occursin("#= file:42 =#", diff.before) + diff = CodeDiffs.compare_ast(e, :(1+2); color=false, prettify=true, lines=false, alias=false) + @test CodeDiffs.issame(diff) + @test diff == (@code_diff color=false e :(1+2)) + end + + @testset "Basic function" begin + f1() = 1 + f2() = 2 + + @testset "Typed" begin + diff = CodeDiffs.compare_code_typed(f1, Tuple{}, f1, Tuple{}; color=false) + @test CodeDiffs.issame(diff) + + diff = CodeDiffs.compare_code_typed(f1, Tuple{}, f2, Tuple{}; color=false) + @test !CodeDiffs.issame(diff) + @test length(DeepDiffs.added(diff)) == length(DeepDiffs.removed(diff)) == 1 + @test diff == (@code_diff type=:typed color=false f1() f2()) + end + + @testset "LLVM" begin + diff = CodeDiffs.compare_code_llvm(f1, Tuple{}, f1, Tuple{}; color=false) + @test CodeDiffs.issame(diff) + @test !occursin(r"julia_f1", diff.before) # LLVM module names should have been cleaned up + + diff = CodeDiffs.compare_code_llvm(f1, Tuple{}, f2, Tuple{}; color=false) + @test !CodeDiffs.issame(diff) + @test length(DeepDiffs.added(diff)) == length(DeepDiffs.removed(diff)) == 1 + 2 # ret + name*2 + @test diff == (@code_diff type=:llvm color=false f1() f2()) + end + + @testset "Native" begin + diff = CodeDiffs.compare_code_native(f1, Tuple{}, f1, Tuple{}; color=false) + @test CodeDiffs.issame(diff) + @test !occursin(r"julia_f1", diff.before) # LLVM module names should have been cleaned up + + diff = CodeDiffs.compare_code_native(f1, Tuple{}, f2, Tuple{}; color=false) + @test !CodeDiffs.issame(diff) + @test diff == (@code_diff type=:native color=false f1() f2()) + end + end + + @testset "Changes" begin + A = quote + 1 + 2 + f(a, b) + g(c, d) + "test" + end + + B = quote + 1 + 3 + f(a, d) + g(c, b) + "test2" + end + + diff = CodeDiffs.compare_ast(A, B; color=false) + @test !CodeDiffs.issame(diff) + # All statements were marked as changed + @test length(DeepDiffs.added(diff)) == length(DeepDiffs.changed(diff)) == 4 + end + + @testset "Display" begin + function test_cmp_display(cmp_name, f₁, args₁, f₂, args₂) + @testset "Typed" begin + diff = CodeDiffs.compare_code_typed(f₁, args₁, f₂, args₂; color=false) + @test_reference "references/$(cmp_name).jl_typed" display_str(diff; color=false, columns=120) + + diff = CodeDiffs.compare_code_typed(f₁, args₁, f₂, args₂; color=true) + @test findfirst(CodeDiffs.ANSI_REGEX, diff.before) === nothing + @test findfirst(CodeDiffs.ANSI_REGEX, diff.highlighted_before) !== nothing + @test_reference "references/$(cmp_name)_COLOR.jl_typed" display_str(diff; columns=120) + end + + @testset "LLVM" begin + diff = CodeDiffs.compare_code_llvm(f₁, args₁, f₂, args₂; color=false, debuginfo=:none) + @test_reference "references/$(cmp_name)_$(NATIVE_ARCH).ll" display_str(diff; color=false, columns=120) + + diff = CodeDiffs.compare_code_llvm(f₁, args₁, f₂, args₂; color=true, debuginfo=:none) + @test findfirst(CodeDiffs.ANSI_REGEX, diff.before) === nothing + @test findfirst(CodeDiffs.ANSI_REGEX, diff.highlighted_before) !== nothing + @test_reference "references/$(cmp_name)_$(NATIVE_ARCH)_COLOR.ll" display_str(diff; columns=120) + end + + @testset "Native" begin + diff = CodeDiffs.compare_code_native(f₁, args₁, f₂, args₂; color=false, debuginfo=:none) + @test_reference "references/$(cmp_name)_$(NATIVE_ARCH).S" display_str(diff; color=false, columns=120) + + diff = CodeDiffs.compare_code_native(f₁, args₁, f₂, args₂; color=true, debuginfo=:none) + @test findfirst(CodeDiffs.ANSI_REGEX, diff.before) === nothing + @test findfirst(CodeDiffs.ANSI_REGEX, diff.highlighted_before) !== nothing + @test_reference "references/$(cmp_name)_$(NATIVE_ARCH)_COLOR.S" display_str(diff; columns=120) + end + + @testset "Line numbers" begin + diff = CodeDiffs.compare_code_llvm(f₁, args₁, f₂, args₂; color=false, debuginfo=:none) + withenv("CODE_DIFFS_LINE_NUMBERS" => true) do + @test_reference "references/$(cmp_name)_$(NATIVE_ARCH)_LINES.ll" display_str(diff; color=false, columns=120) + end + end + end + + @testset "f1" begin + f() = 1 + test_cmp_display("f1", f, Tuple{}, f, Tuple{}) + end + + @testset "saxpy" begin + function saxpy(r, a, x, y) + for i in eachindex(r) + r[i] = a * x[i] + y[i] + end + end + + function saxpy_simd(r, a, x, y) + @inbounds @simd ivdep for i in eachindex(r) + r[i] = a * x[i] + y[i] + end + end + + saxpy_args = Tuple{Vector{Int}, Int, Vector{Int}, Vector{Int}} + test_cmp_display("saxpy", saxpy, saxpy_args, saxpy_simd, saxpy_args) + end + end + + @testset "LLVM module name" begin + @test CodeDiffs.replace_llvm_module_name("julia_f_1") == "f" + if Sys.islinux() + @eval var"@f"() = 1 + @test occursin(r"julia_f_\d+", @io2str code_native(::IO, var"@f", Tuple{})) + @test CodeDiffs.replace_llvm_module_name("julia_f_1", "@f") == "f" + else + @test CodeDiffs.replace_llvm_module_name("julia_@f_1", "@f") == "@f" + end end - # Write your tests here. end