diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 9a2c504..a2c8687 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -24,7 +24,7 @@ jobs: matrix: version: - '1.10' - - '1.6' + - '1.7' - 'nightly' os: - ubuntu-latest @@ -40,12 +40,18 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: files: lcov.info docs: name: Documentation - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + arch: + - x64 permissions: actions: write # needed to allow julia-actions/cache to proactively delete old caches that it has created contents: write @@ -55,6 +61,7 @@ jobs: - uses: julia-actions/setup-julia@v1 with: version: '1' + arch: ${{ matrix.arch }} - uses: julia-actions/cache@v1 - name: Configure doc environment shell: julia --project=docs --color=yes {0} diff --git a/Project.toml b/Project.toml index ccd951b..0bb1fc7 100644 --- a/Project.toml +++ b/Project.toml @@ -21,7 +21,7 @@ ReferenceTests = "0.10" StringDistances = "0.11" Test = "1" WidthLimitedIO = "1" -julia = "1.6" +julia = "1.7" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" diff --git a/README.md b/README.md index a6bf0b4..5d2e1cf 100644 --- a/README.md +++ b/README.md @@ -9,33 +9,29 @@ Compare code and display the difference in the terminal side-by-side. Supports syntax highlighting. -The [`@code_diff`](@ref) macro is the main entry point. If possible, the code type will be +The `@code_diff` macro is the main entry point. If possible, the code type will be detected automatically, otherwise add e.g. `type=:native` for native assembly comparison: ```julia julia> f1(a) = a + 1 f1 (generic function with 1 method) julia> @code_diff type=:llvm debuginfo=:none f1(Int64(1)) f1(Int8(1)) -; Function Attrs: uwtable ┃ ; Function Attrs: uwtable define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f1(i8 signext %0) #0 { top: ┃ top: + ┣⟫ %1 = sext i8 %0 to i64 %1 = add i64 %0, 1 ⟪╋⟫ %2 = add nsw i64 %1, 1 ret i64 %1 ⟪╋⟫ ret i64 %2 - ┣⟫ %1 = sext i8 %0 to i64 } ┃ } - ┃ julia> f2(a) = a - 1 f2 (generic function with 1 method) julia> @code_diff type=:llvm debuginfo=:none f1(1) f2(1) -; Function Attrs: uwtable ┃ ; Function Attrs: uwtable define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f2(i64 signext %0) #0 { top: ┃ top: %1 = add i64 %0, 1 ⟪╋⟫ %1 = add i64 %0, -1 ret i64 %1 ┃ ret i64 %1 } ┃ } - ┃ ``` ## Supported languages diff --git a/docs/make.jl b/docs/make.jl index 923a516..811468f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -3,6 +3,8 @@ using Documenter DocMeta.setdocmeta!(CodeDiffs, :DocTestSetup, :(using CodeDiffs); recursive=true) +can_doctest = Sys.islinux() && Sys.ARCH === :x86_84 + makedocs(; modules=[CodeDiffs], authors="Luc Briand <34173752+Keluaa@users.noreply.github.com> and contributors", @@ -15,6 +17,7 @@ makedocs(; pages=[ "Home" => "index.md", ], + doctest = can_doctest ) deploydocs(; diff --git a/docs/src/index.md b/docs/src/index.md index 76b3772..41cae0b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -23,26 +23,22 @@ julia> f1(a) = a + 1 f1 (generic function with 1 method) julia> @code_diff type=:llvm debuginfo=:none color=false f1(Int64(1)) f1(Int8(1)) -; Function Attrs: uwtable ┃ ; Function Attrs: uwtable define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f1(i8 signext %0) #0 { top: ┃ top: + ┣⟫ %1 = sext i8 %0 to i64 %1 = add i64 %0, 1 ⟪╋⟫ %2 = add nsw i64 %1, 1 ret i64 %1 ⟪╋⟫ ret i64 %2 - ┣⟫ %1 = sext i8 %0 to i64 } ┃ } - ┃ julia> f2(a) = a - 1 f2 (generic function with 1 method) julia> @code_diff type=:llvm debuginfo=:none color=false f1(1) f2(1) -; Function Attrs: uwtable ┃ ; Function Attrs: uwtable define i64 @f1(i64 signext %0) #0 { ⟪╋⟫define i64 @f2(i64 signext %0) #0 { top: ┃ top: %1 = add i64 %0, 1 ⟪╋⟫ %1 = add i64 %0, -1 ret i64 %1 ┃ ret i64 %1 } ┃ } - ┃ ``` Setting the environment variable `"CODE_DIFFS_LINE_NUMBERS"` to `true` will display line diff --git a/generated.rb b/generated.rb new file mode 100644 index 0000000..6a501ae --- /dev/null +++ b/generated.rb @@ -0,0 +1 @@ +test/reference/** linguist-generated diff --git a/src/CodeDiff.jl b/src/CodeDiff.jl index c344145..57ecccd 100644 --- a/src/CodeDiff.jl +++ b/src/CodeDiff.jl @@ -18,8 +18,8 @@ Fancy REPL output is done with [`side_by_side_diff`](@ref). struct CodeDiff <: DeepDiffs.DeepDiff before::String after::String - changed::Dict{Int, DeepDiffs.StringDiff} - ignore_added::Set{Int} + changed::Dict{Int, Tuple{Vector{Int}, DeepDiffs.StringDiff}} # line idx => (line idxs added before the change, change diff) + ignore_added::Set{Int} # Line idxs which are part of `changed`, including line idxs added before changes diff::DeepDiffs.VectorDiff highlighted_before::String highlighted_after::String @@ -58,11 +58,17 @@ Base.show(io::IO, ::MIME"text/plain", diff::CodeDiff) = side_by_side_diff(io, di function Base.show(io::IO, diff::CodeDiff) xlines = split(diff.before, '\n') ylines = split(diff.after, '\n') - DeepDiffs.visitall(diff.diff) do idx, state, last - if state == :removed + DeepDiffs.visitall(diff) do idx, state, last + if state === :removed printstyled(io, "- ", xlines[idx], color=:red) - elseif state == :added + elseif state === :added printstyled(io, "+ ", ylines[idx], color=:green) + elseif state === :changed + printstyled(io, "~ ", color=:yellow) + io_buf = IOBuffer() + io_ctx = IOContext(io_buf, io) + Base.show(io_ctx, diff.changed[idx][2]) + printstyled(io, String(take!(io_buf))[2:end-1]) # unquote the line diff else print(io, " ", xlines[idx]) end @@ -86,6 +92,7 @@ function optimize_line_changes!(diff::CodeDiff; dist=StringDistances.Levenshtein empty!(diff.changed) empty!(diff.ignore_added) previously_removed = Vector{Int}() + added_before = Vector{Int}() removed_start = 1 iadded = 1 @@ -95,20 +102,50 @@ function optimize_line_changes!(diff::CodeDiff; dist=StringDistances.Levenshtein push!(previously_removed, idx) elseif state == :added iadded += 1 + changed = false for (li, removed_line) in enumerate(previously_removed[removed_start:end]) if StringDistances.compare(xlines[removed_line], ylines[idx], dist) ≥ tol - diff.changed[removed_line] = DeepDiffs.deepdiff(xlines[removed_line], ylines[idx]) + # `(lines added before this changed line, change diff)` + diff.changed[removed_line] = (copy(added_before), DeepDiffs.deepdiff(xlines[removed_line], ylines[idx])) + if !isempty(added_before) + push!(diff.ignore_added, added_before...) + empty!(added_before) + end push!(diff.ignore_added, idx) removed_start += li # The next added lines will start from the next removed line + changed = true break end end + !changed && push!(added_before, idx) else # Treat conserved lines as a "reset" point empty!(previously_removed) + empty!(added_before) removed_start = 1 end end return diff end + + +function DeepDiffs.visitall(f, diff::CodeDiff) + DeepDiffs.visitall(diff.diff) do idx, state, last + if state == :removed + if haskey(diff.changed, idx) + added_lines_before, _ = diff.changed[idx] + for line_idx in added_lines_before + f(line_idx, :added, false) + end + f(idx, :changed, last) + else + f(idx, :removed, last) + end + elseif state == :added + idx ∉ diff.ignore_added && f(idx, state, last) + else + f(idx, state, last) + end + end +end diff --git a/src/compare.jl b/src/compare.jl index 3b3b2e2..bd07d10 100644 --- a/src/compare.jl +++ b/src/compare.jl @@ -85,6 +85,13 @@ function compare_code(code₁::AbstractString, code₂::AbstractString, highligh code₂_colored = code₂ end + if endswith(code₁, '\n') && endswith(code₂, '\n') + code₁ = rstrip(==('\n'), code₁) + code₁_colored = rstrip(==('\n'), code₁_colored) + code₂ = rstrip(==('\n'), code₂) + code₂_colored = rstrip(==('\n'), code₂_colored) + end + diff = CodeDiff(code₁, code₂, code₁_colored, code₂_colored) optimize_line_changes!(diff) return diff @@ -130,6 +137,13 @@ function compare_show(code₁, code₂; color=true, force_no_ansi=false) code₂_colored = code_str₂ end + if endswith(code_str₁, '\n') && endswith(code_str₂, '\n') + code_str₁ = rstrip(==('\n'), code_str₁) + code₁_colored = rstrip(==('\n'), code₁_colored) + code_str₂ = rstrip(==('\n'), code_str₂) + code₂_colored = rstrip(==('\n'), code₂_colored) + end + diff = CodeDiff(code_str₁, code_str₂, code₁_colored, code₂_colored) optimize_line_changes!(diff) return diff @@ -488,7 +502,8 @@ might be inferred automatically. """ macro code_diff(args...) length(args) < 2 && throw(ArgumentError("@code_diff takes at least 2 arguments")) - options..., code₁, code₂ = args + options = args[1:end-2] + code₁, code₂ = args[end-1:end] options = map(options) do option !(option isa Expr && option.head === :(=)) && diff --git a/src/display.jl b/src/display.jl index 7dc56e1..17f84d8 100644 --- a/src/display.jl +++ b/src/display.jl @@ -128,12 +128,14 @@ Side by side display of a [`CodeDiff`](@ref) to `io` (defaults to `stdout`). environment variable `"CODE_DIFFS_LINE_NUMBERS"`, which itself defaults to `false`. """ function side_by_side_diff(io::IO, diff::CodeDiff; tab_width=4, width=nothing, line_numbers=nothing) - line_numbers = @something line_numbers parse(Bool, get(ENV, "CODE_DIFFS_LINE_NUMBERS", "false")) + line_numbers = !isnothing(line_numbers) ? line_numbers : parse(Bool, get(ENV, "CODE_DIFFS_LINE_NUMBERS", "false")) + + # TODO: `tab_width` shouldn't simply replace '\t' by spaces, but rather pad mod `tab_width` xlines = split(diff.highlighted_before, '\n') ylines = split(diff.highlighted_after, '\n') - width = @something width displaysize(io)[2] + width = !isnothing(width) ? width : displaysize(io)[2] if line_numbers max_line = length(xlines) + length(DeepDiffs.added(diff)) line_num_width = length(string(max_line)) @@ -156,42 +158,37 @@ function side_by_side_diff(io::IO, diff::CodeDiff; tab_width=4, width=nothing, l left_line = 1 right_line = 1 - DeepDiffs.visitall(diff.diff) do idx, state, last - if line_numbers - if state !== :added - line_num = lpad(string(left_line), line_num_width) - printstyled(io, line_num, ' '; color=:light_black) - left_line += 1 - end - end - - right_printed = true - if state == :removed - if haskey(diff.changed, idx) - line_diff = diff.changed[idx] - print_columns_change(io, column_width, line_diff, xlines[idx], - sep_changed_to, empty_column, tab) - else - print_columns(io, column_width, xlines[idx], sep_removed, "", empty_column, tab) - right_printed = false - end - elseif state == :added - if idx ∈ diff.ignore_added - return - else - printstyled(io, empty_line_num) - print_columns(io, column_width, "", sep_added, ylines[idx], empty_column, tab) - end + function print_line_num(side) + !line_numbers && return + if side === :left + line_num = lpad(string(left_line), line_num_width) + printstyled(io, line_num, ' '; color=:light_black) + left_line += 1 else - print_columns(io, column_width, xlines[idx], sep_same, xlines[idx], empty_column, tab) - end - - if line_numbers && right_printed line_num = rpad(string(right_line), line_num_width) printstyled(io, line_num; color=:light_black) right_line += 1 end + end + DeepDiffs.visitall(diff) do idx, state, last + if state === :removed + print_line_num(:left) + print_columns(io, column_width, xlines[idx], sep_removed, "", empty_column, tab) + elseif state === :added + printstyled(io, empty_line_num) + print_columns(io, column_width, "", sep_added, ylines[idx], empty_column, tab) + print_line_num(:right) + elseif state === :changed + print_line_num(:left) + _, line_diff = diff.changed[idx] + print_columns_change(io, column_width, line_diff, xlines[idx], sep_changed_to, empty_column, tab) + print_line_num(:right) + else + print_line_num(:left) + print_columns(io, column_width, xlines[idx], sep_same, xlines[idx], empty_column, tab) + print_line_num(:right) + end !last && println(io) end end diff --git a/test/references/a_vs_b.jl_ast b/test/references/a_vs_b.jl_ast new file mode 100644 index 0000000..08586fa --- /dev/null +++ b/test/references/a_vs_b.jl_ast @@ -0,0 +1,8 @@ +quote ┃ quote + ┣⟫ println("B") + 1 + 2 ⟪╋⟫ 1 + 3 + f(a, b) ⟪╋⟫ f(a, d) + g(c, d) ⟪╋⟫ g(c, b) + ┣⟫ h(x, y) + "test" ⟪╋⟫ "test2" +end ┃ end \ No newline at end of file diff --git a/test/references/a_vs_b_COLOR.jl_ast b/test/references/a_vs_b_COLOR.jl_ast new file mode 100644 index 0000000..4ab7dac --- /dev/null +++ b/test/references/a_vs_b_COLOR.jl_ast @@ -0,0 +1,8 @@ + begin  ┃  begin +  ┣⟫ println("B") + 1 + 2 ⟪╋⟫ 1 + 3 + f(a, b) ⟪╋⟫ f(a, d) + g(c, d) ⟪╋⟫ g(c, b) +  ┣⟫ h(x, y) + "test" ⟪╋⟫ "test2" + end  ┃  end \ No newline at end of file diff --git a/test/references/a_vs_b_LINES.jl_ast b/test/references/a_vs_b_LINES.jl_ast new file mode 100644 index 0000000..0286d58 --- /dev/null +++ b/test/references/a_vs_b_LINES.jl_ast @@ -0,0 +1,8 @@ + 1 quote ┃ quote 1 + ┣⟫ println("B") 2 + 2 1 + 2 ⟪╋⟫ 1 + 3 3 + 3 f(a, b) ⟪╋⟫ f(a, d) 4 + 4 g(c, d) ⟪╋⟫ g(c, b) 5 + ┣⟫ h(x, y) 6 + 5 "test" ⟪╋⟫ "test2" 7 + 6 end ┃ end 8 \ No newline at end of file diff --git a/test/references/a_vs_b_PRINT.jl_ast b/test/references/a_vs_b_PRINT.jl_ast new file mode 100644 index 0000000..9485283 --- /dev/null +++ b/test/references/a_vs_b_PRINT.jl_ast @@ -0,0 +1,8 @@ + quote ++ println("B") +~ 1 + {-2-}{+3+} +~ f(a, {-b-}{+d+}) +~ g(c, {-d-}{+b+}) ++ h(x, y) +~ "test{+2+}" + end \ No newline at end of file diff --git a/test/references/f1.jl_typed b/test/references/f1.jl_typed deleted file mode 100644 index ebb3c0b..0000000 --- a/test/references/f1.jl_typed +++ /dev/null @@ -1,3 +0,0 @@ -CodeInfo( ┃ CodeInfo( -1 ─ return 1 ┃ 1 ─ return 1 -) => Int64 ┃ ) => Int64 \ No newline at end of file diff --git a/test/references/f1_COLOR.jl_typed b/test/references/f1_COLOR.jl_typed deleted file mode 100644 index 8138db6..0000000 --- a/test/references/f1_COLOR.jl_typed +++ /dev/null @@ -1,3 +0,0 @@ -CodeInfo(  ┃ CodeInfo( -1 ─ return 1  ┃ 1 ─ return 1 -) => Int64  ┃ ) => Int64 \ No newline at end of file diff --git a/test/references/f1_x86.S b/test/references/f1_x86.S deleted file mode 100644 index 6f4fe17..0000000 --- a/test/references/f1_x86.S +++ /dev/null @@ -1,22 +0,0 @@ - .text ┃ .text - .file "f" ┃ .file "f" - .globl f # -- Begin function f ┃ .globl f # -- Begin function f - .p2align 4, 0x90 ┃ .p2align 4, 0x90 - .type f,@function ┃ .type f,@function -f: # @f ┃ f: # @f - .cfi_startproc ┃ .cfi_startproc -# %bb.0: # %top ┃ # %bb.0: # %top - push rbp ┃ push rbp - .cfi_def_cfa_offset 16 ┃ .cfi_def_cfa_offset 16 - .cfi_offset rbp, -16 ┃ .cfi_offset rbp, -16 - mov rbp, rsp ┃ mov rbp, rsp - .cfi_def_cfa_register rbp ┃ .cfi_def_cfa_register rbp - mov eax, 1 ┃ mov eax, 1 - pop rbp ┃ pop rbp - ret ┃ ret -.Lfunc_end0: ┃ .Lfunc_end0: - .size f, .Lfunc_end0-f ┃ .size f, .Lfunc_end0-f - .cfi_endproc ┃ .cfi_endproc - # -- End function ┃ # -- End function - .section ".note.GNU-stack","",@progbits ┃ .section ".note.GNU-stack","",@progbits - ┃ \ No newline at end of file diff --git a/test/references/f1_x86.ll b/test/references/f1_x86.ll deleted file mode 100644 index acd1c92..0000000 --- a/test/references/f1_x86.ll +++ /dev/null @@ -1,6 +0,0 @@ -; Function Attrs: uwtable ┃ ; Function Attrs: uwtable -define i64 @f() #0 { ┃ define i64 @f() #0 { -top: ┃ top: - ret i64 1 ┃ ret i64 1 -} ┃ } - ┃ \ No newline at end of file diff --git a/test/references/f1_x86_COLOR.S b/test/references/f1_x86_COLOR.S deleted file mode 100644 index aa34020..0000000 --- a/test/references/f1_x86_COLOR.S +++ /dev/null @@ -1,22 +0,0 @@ - .text  ┃  .text - .file "f"  ┃  .file "f" - .globl f # -- Begin function f  ┃  .globl f # -- Begin function f - .p2align 4, 0x90  ┃  .p2align 4, 0x90 - .type f,@function  ┃  .type f,@function -f: # @f  ┃ f: # @f - .cfi_startproc  ┃  .cfi_startproc -# %bb.0: # %top  ┃ # %bb.0: # %top - push rbp  ┃  push rbp - .cfi_def_cfa_offset 16  ┃  .cfi_def_cfa_offset 16 - .cfi_offset rbp, -16  ┃  .cfi_offset rbp, -16 - mov rbp, rsp  ┃  mov rbp, rsp - .cfi_def_cfa_register rbp  ┃  .cfi_def_cfa_register rbp - mov eax, 1  ┃  mov eax, 1 - pop rbp  ┃  pop rbp - ret  ┃  ret -.Lfunc_end0:  ┃ .Lfunc_end0: - .size f, .Lfunc_end0-f  ┃  .size f, .Lfunc_end0-f - .cfi_endproc  ┃  .cfi_endproc - # -- End function  ┃  # -- End function - .section ".note.GNU-stack","",@progbits  ┃  .section ".note.GNU-stack","",@progbits -  ┃  \ No newline at end of file diff --git a/test/references/f1_x86_COLOR.ll b/test/references/f1_x86_COLOR.ll deleted file mode 100644 index 56346e4..0000000 --- a/test/references/f1_x86_COLOR.ll +++ /dev/null @@ -1,6 +0,0 @@ -; Function Attrs: uwtable  ┃ ; Function Attrs: uwtable -define i64 @f() #0 {  ┃ define i64 @f() #0 { -top:  ┃ top: - ret i64 1  ┃  ret i64 1 -}  ┃ } -  ┃  \ No newline at end of file diff --git a/test/references/f1_x86_LINES.ll b/test/references/f1_x86_LINES.ll deleted file mode 100644 index 1eb9589..0000000 --- a/test/references/f1_x86_LINES.ll +++ /dev/null @@ -1,6 +0,0 @@ -1 ; Function Attrs: uwtable ┃ ; Function Attrs: uwtable 1 -2 define i64 @f() #0 { ┃ define i64 @f() #0 { 2 -3 top: ┃ top: 3 -4 ret i64 1 ┃ ret i64 1 4 -5 } ┃ } 5 -6 ┃ 6 \ No newline at end of file diff --git a/test/references/saxpy.jl_typed b/test/references/saxpy.jl_typed deleted file mode 100644 index 858a51c..0000000 --- a/test/references/saxpy.jl_typed +++ /dev/null @@ -1,55 +0,0 @@ -CodeInfo( ┃ CodeInfo( -1 ── %1 = Base.arraysize(r, 1)::Int64 ┃ 1 ── %1 = Base.arraysize(r, 1)::Int64 -│ %2 = Base.slt_int(%1, 0)::Bool ┃ │ %2 = Base.slt_int(%1, 0)::Bool -│ %3 = Core.ifelse(%2, 0, %1)::Int64 ┃ │ %3 = Core.ifelse(%2, 0, %1)::Int64 -│ %4 = Base.slt_int(%3, 1)::Bool ⟪┫ -└─── goto #3 if not %4 ⟪╋⟫└─── goto #13 if not true -2 ── goto #4 ⟪┫ -3 ── goto #4 ⟪┫ -4 ┄─ %8 = φ (#2 => true, #3 => false)::Bool ⟪┫ -│ %9 = φ (#3 => 1)::Int64 ⟪┫ -│ %10 = φ (#3 => 1)::Int64 ⟪┫ -│ %11 = Base.not_int(%8)::Bool ⟪┫ -└─── goto #10 if not %11 ⟪╋⟫└─── goto #12 if not %6 -5 ┄─ %13 = φ (#4 => %9, #9 => %25)::Int64 ⟪╋⟫4 ┄─ %9 = φ (#3 => 0, #11 => %28)::Int64 -│ %14 = φ (#4 => %10, #9 => %26)::Int64 ⟪┫ -│ %15 = Base.arrayref(true, x, %13)::Int64 ⟪┫ -│ %16 = Base.mul_int(a, %15)::Int64 ⟪┫ -│ %17 = Base.arrayref(true, y, %13)::Int64 ⟪┫ -│ %18 = Base.add_int(%16, %17)::Int64 ⟪┫ -│ Base.arrayset(true, r, %18, %13)::Vector{Int64…⟪┫ -│ %20 = (%14 === %3)::Bool ⟪┫ -└─── goto #7 if not %20 ⟪┫ -6 ── goto #8 ⟪┫ -7 ── %23 = Base.add_int(%14, 1)::Int64 ⟪┫ -└─── goto #8 ⟪┫ -8 ┄─ %25 = φ (#7 => %23)::Int64 ⟪┫ -│ %26 = φ (#7 => %23)::Int64 ⟪┫ -│ %27 = φ (#6 => true, #7 => false)::Bool ⟪┫ -│ %28 = Base.not_int(%27)::Bool ⟪╋⟫│ %10 = Base.slt_int(%9, %3)::Bool -└─── goto #10 if not %28 ⟪╋⟫└─── goto #12 if not %10 -9 ── goto #5 ⟪╋⟫7 ── goto #9 -10 ┄ return nothing ⟪╋⟫13 ┄ return Main.nothing - ┣⟫│ %4 = %new(Base.OneTo{Int64}, %3)::Base.OneTo{Int64} - ┣⟫2 ── %6 = Base.slt_int(0, %3)::Bool - ┣⟫3 ── nothing::Nothing - ┣⟫5 ── %12 = Base.add_int(%9, 1)::Int64 - ┣⟫└─── goto #9 if not false - ┣⟫6 ── %14 = Base.slt_int(0, %12)::Bool - ┣⟫│ %15 = Base.sle_int(%12, %3)::Bool - ┣⟫│ %16 = Base.and_int(%14, %15)::Bool - ┣⟫└─── goto #8 if not %16 - ┣⟫8 ── invoke Base.throw_boundserror(%4::Base.OneTo{I… - ┣⟫└─── unreachable - ┣⟫9 ┄─ goto #10 - ┣⟫10 ─ goto #11 - ┣⟫11 ─ %23 = Base.arrayref(false, x, %12)::Int64 - ┣⟫│ %24 = Base.mul_int(a, %23)::Int64 - ┣⟫│ %25 = Base.arrayref(false, y, %12)::Int64 - ┣⟫│ %26 = Base.add_int(%24, %25)::Int64 - ┣⟫│ Base.arrayset(false, r, %26, %12)::Vector{Int6… - ┣⟫│ %28 = Base.add_int(%9, 1)::Int64 - ┣⟫│ $(Expr(:loopinfo, Symbol("julia.simdloop"), Sy… - ┣⟫└─── goto #4 - ┣⟫12 ┄ nothing::Nothing -) => Nothing ┃ ) => Nothing \ No newline at end of file diff --git a/test/references/saxpy_COLOR.jl_typed b/test/references/saxpy_COLOR.jl_typed deleted file mode 100644 index 3ffce3a..0000000 --- a/test/references/saxpy_COLOR.jl_typed +++ /dev/null @@ -1,55 +0,0 @@ -CodeInfo(  ┃ CodeInfo( -1 ── %1 = Base.arraysize(r, 1)::Int64  ┃ 1 ── %1 = Base.arraysize(r, 1)::Int64 -│  %2 = Base.slt_int(%1, 0)::Bool  ┃ │  %2 = Base.slt_int(%1, 0)::Bool -│  %3 = Core.ifelse(%2, 0, %1)::Int64  ┃ │  %3 = Core.ifelse(%2, 0, %1)::Int64 -│  %4 = Base.slt_int(%3, 1)::Bool ⟪┫  -└─── goto #3 if not %4 ⟪╋⟫└─── goto #13 if not true -2 ── goto #4 ⟪┫  -3 ── goto #4 ⟪┫  -4 ┄─ %8 = φ (#2 => true, #3 => false)::Bool ⟪┫  -│  %9 = φ (#3 => 1)::Int64 ⟪┫  -│  %10 = φ (#3 => 1)::Int64 ⟪┫  -│  %11 = Base.not_int(%8)::Bool ⟪┫  -└─── goto #10 if not %11 ⟪╋⟫└─── goto #12 if not %6 -5 ┄─ %13 = φ (#4 => %9, #9 => %25)::Int64 ⟪╋⟫4 ┄─ %9  = φ (#3 => 0, #11 => %28)::Int64 -│  %14 = φ (#4 => %10, #9 => %26)::Int64 ⟪┫  -│  %15 = Base.arrayref(true, x, %13)::Int64 ⟪┫  -│  %16 = Base.mul_int(a, %15)::Int64 ⟪┫  -│  %17 = Base.arrayref(true, y, %13)::Int64 ⟪┫  -│  %18 = Base.add_int(%16, %17)::Int64 ⟪┫  -│  Base.arrayset(true, r, %18, %13)::Vector{Int64…⟪┫  -│  %20 = (%14 === %3)::Bool ⟪┫  -└─── goto #7 if not %20 ⟪┫  -6 ── goto #8 ⟪┫  -7 ── %23 = Base.add_int(%14, 1)::Int64 ⟪┫  -└─── goto #8 ⟪┫  -8 ┄─ %25 = φ (#7 => %23)::Int64 ⟪┫  -│  %26 = φ (#7 => %23)::Int64 ⟪┫  -│  %27 = φ (#6 => true, #7 => false)::Bool ⟪┫  -│ %28 = Base.not_int(%27)::Bool ⟪╋⟫│ %10 = Base.slt_int(%9, %3)::Bool -└─── goto #10 if not %28 ⟪╋⟫└─── goto #12 if not %10 -9 ──  goto #5 ⟪╋⟫7 ──  goto #9 -10 ┄  return nothing ⟪╋⟫13 ┄  return Main.nothing -  ┣⟫│  %4 = %new(Base.OneTo{Int64}, %3)::Base.OneTo{Int64} -  ┣⟫2 ── %6 = Base.slt_int(0, %3)::Bool -  ┣⟫3 ── nothing::Nothing -  ┣⟫5 ── %12 = Base.add_int(%9, 1)::Int64 -  ┣⟫└─── goto #9 if not false -  ┣⟫6 ── %14 = Base.slt_int(0, %12)::Bool -  ┣⟫│  %15 = Base.sle_int(%12, %3)::Bool -  ┣⟫│  %16 = Base.and_int(%14, %15)::Bool -  ┣⟫└─── goto #8 if not %16 -  ┣⟫8 ── invoke Base.throw_boundserror(%4::Base.OneTo{I… -  ┣⟫└─── unreachable -  ┣⟫9 ┄─ goto #10 -  ┣⟫10 ─ goto #11 -  ┣⟫11 ─ %23 = Base.arrayref(false, x, %12)::Int64 -  ┣⟫│  %24 = Base.mul_int(a, %23)::Int64 -  ┣⟫│  %25 = Base.arrayref(false, y, %12)::Int64 -  ┣⟫│  %26 = Base.add_int(%24, %25)::Int64 -  ┣⟫│  Base.arrayset(false, r, %26, %12)::Vector{Int6… -  ┣⟫│  %28 = Base.add_int(%9, 1)::Int64 -  ┣⟫│  $(Expr(:loopinfo, Symbol("julia.simdloop"), Sy… -  ┣⟫└─── goto #4 -  ┣⟫12 ┄ nothing::Nothing -) => Nothing  ┃ ) => Nothing \ No newline at end of file diff --git a/test/references/saxpy_x86.S b/test/references/saxpy_x86.S deleted file mode 100644 index 91d1cac..0000000 --- a/test/references/saxpy_x86.S +++ /dev/null @@ -1,379 +0,0 @@ - .text ┃ .text - .file "saxpy" ⟪╋⟫ .file "saxpy_simd" - .globl saxpy # -- Begin function sa…⟪╋⟫ .globl saxpy_simd # -- Begin function sa… - .p2align 4, 0x90 ┃ .p2align 4, 0x90 - .type saxpy,@function ⟪╋⟫ .type saxpy_simd,@function -saxpy: # @saxpy ⟪╋⟫saxpy_simd: # @saxpy_simd - .cfi_startproc ┃ .cfi_startproc -# %bb.0: # %top ┃ # %bb.0: # %top - push rbp ┃ push rbp - .cfi_def_cfa_offset 16 ┃ .cfi_def_cfa_offset 16 - .cfi_offset rbp, -16 ┃ .cfi_offset rbp, -16 - mov rbp, rsp ┃ mov rbp, rsp - .cfi_def_cfa_register rbp ┃ .cfi_def_cfa_register rbp - push r15 ⟪┫ - push r14 ⟪┫ - push r13 ⟪┫ - push r12 ⟪┫ - push rsi ┃ push rsi - push rdi ┃ push rdi - push rbx ⟪┫ - sub rsp, 72 ⟪╋⟫ sub rsp, 32 - vmovdqa xmmword ptr [rbp - 80], xmm7 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 32], xmm7 # 16-byte … - vmovdqa xmmword ptr [rbp - 96], xmm6 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 48], xmm6 # 16-byte … - .cfi_offset rbx, -72 ⟪╋⟫ .cfi_offset rdi, -32 - .cfi_offset rdi, -64 ⟪╋⟫ .cfi_offset rsi, -24 - .cfi_offset rsi, -56 ⟪╋⟫ .cfi_offset xmm6, -64 - .cfi_offset r12, -48 ⟪╋⟫ .cfi_offset xmm7, -48 - .cfi_offset r13, -40 ⟪┫ - .cfi_offset r14, -32 ⟪┫ - .cfi_offset r15, -24 ⟪┫ - .cfi_offset xmm6, -112 ⟪┫ - .cfi_offset xmm7, -96 ⟪┫ - mov r13, r8 ⟪┫ - mov r8, qword ptr [rcx + 8] ⟪╋⟫ mov r10, qword ptr [rcx + 8] - test r8, r8 ⟪╋⟫ test r10, r10 - je .LBB0_15 ⟪╋⟫ je .LBB0_11 -# %bb.1: # %L13.preheader ⟪╋⟫# %bb.1: # %L12.lr.ph - mov r15, qword ptr [r13] ⟪╋⟫ mov r8, qword ptr [r8] - mov r11, qword ptr [r13 + 8] ⟪╋⟫ mov r9, qword ptr [r9] - mov r10, qword ptr [r9] ⟪╋⟫ mov r11, qword ptr [rcx] - mov r14, qword ptr [r9 + 8] ⟪┫ - mov qword ptr [rbp - 120], rcx # 8-byte Spill ⟪┫ - mov rbx, qword ptr [rcx] ⟪┫ - cmp r11, r8 ⟪╋⟫ cmp r10, 16 - mov rax, r8 ⟪┫ - cmovb rax, r11 ⟪┫ - mov rsi, r14 ⟪┫ - sar rsi, 63 ⟪┫ - and rsi, r14 ⟪┫ - mov rdi, r14 ⟪┫ - sub rdi, rsi ⟪┫ - test rsi, rsi ⟪┫ - mov r12, -1 ⟪┫ - cmovns r12, rsi ⟪┫ - inc r12 ⟪┫ - imul r12, rdi ⟪┫ - cmp rax, r12 ⟪┫ - cmovb r12, rax ⟪┫ - cmp r12, r8 ⟪┫ - cmovae r12, r8 ⟪┫ - mov edi, 1 ⟪┫ - test r12, r12 ⟪┫ - je .LBB0_2 ⟪╋⟫ jae .LBB0_3 -# %bb.3: # %idxend21.prehe…⟪┫ - cmp r12, 16 ⟪┫ - jae .LBB0_4 ⟪┫ -.LBB0_6: # %scalar.ph ⟪┫ - dec rdi ⟪┫ - mov rax, rdi ⟪┫ - .p2align 4, 0x90 ⟪┫ -.LBB0_7: # %idxend21 ⟪┫ - # =>This Inner Lo…⟪┫ - mov rcx, qword ptr [r15 + 8*rax] ⟪┫ - imul rcx, rdx ⟪┫ - add rcx, qword ptr [r10 + 8*rax] ⟪┫ - mov qword ptr [rbx + 8*rax], rcx ⟪┫ - inc rax ⟪┫ - cmp rax, r12 ⟪┫ - jb .LBB0_7 ⟪┫ -# %bb.8: # %main.exit.sele…⟪┫ - lea rdi, [rax + 1] ⟪┫ -.LBB0_9: # %main.exit.sele…⟪┫ - cmp rax, r8 ⟪┫ - mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫ - jae .LBB0_15 ⟪┫ - ┣⟫# %bb.2: - ┣⟫ xor eax, eax - jmp .LBB0_10 ┃ jmp .LBB0_10 -.LBB0_2: ⟪┫ - mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫ -.LBB0_10: # %main.pseudo.ex…⟪╋⟫.LBB0_3: # %vector.ph - lea rax, [r8 + 1] ⟪┫ - .p2align 4, 0x90 ⟪┫ -.LBB0_11: # %L13.postloop ⟪┫ - # =>This Inner Lo…⟪┫ - lea rsi, [rdi - 1] ⟪┫ - cmp rsi, r11 ⟪┫ - jae .LBB0_25 ⟪┫ -# %bb.12: # %idxend.postloo…⟪┫ - # in Loop: Head…⟪┫ - cmp rsi, r14 ⟪┫ - jae .LBB0_26 ⟪┫ -# %bb.13: # %idxend12.postl…⟪┫ - # in Loop: Head…⟪┫ - cmp rsi, r8 ⟪┫ - jae .LBB0_27 ⟪┫ -# %bb.14: # %idxend21.postl…⟪┫ - # in Loop: Head…⟪┫ - mov rsi, qword ptr [r15 + 8*rdi - 8] ⟪┫ - imul rsi, rdx ⟪┫ - add rsi, qword ptr [r10 + 8*rdi - 8] ⟪┫ - mov qword ptr [rbx + 8*rdi - 8], rsi ⟪┫ - inc rdi ⟪┫ - cmp rax, rdi ⟪┫ - jne .LBB0_11 ⟪┫ -.LBB0_15: # %L31 ⟪┫ - vmovaps xmm6, xmmword ptr [rbp - 96] # 16-byte …⟪┫ - vmovaps xmm7, xmmword ptr [rbp - 80] # 16-byte …⟪┫ - lea rsp, [rbp - 56] ⟪┫ - pop rbx ⟪┫ - pop rdi ⟪┫ - pop rsi ⟪┫ - pop r12 ⟪┫ - pop r13 ⟪┫ - pop r14 ⟪┫ - pop r15 ⟪┫ - pop rbp ⟪┫ - vzeroupper ⟪┫ - ret ⟪┫ -.LBB0_4: # %vector.memchec…⟪┫ - mov qword ptr [rbp - 104], r13 # 8-byte Spill ⟪┫ - mov qword ptr [rbp - 112], r9 # 8-byte Spill ⟪┫ - lea rsi, [rbx + 8*r12] ⟪┫ - lea rax, [r15 + 8*r12] ⟪┫ - lea r13, [r10 + 8*r12] ⟪┫ - cmp rbx, rax ⟪┫ - setb r9b ⟪┫ - cmp r15, rsi ⟪┫ - setb cl ⟪┫ - cmp rbx, r13 ⟪┫ - setb al ⟪┫ - cmp r10, rsi ⟪┫ - setb sil ⟪┫ - test r9b, cl ⟪┫ - jne .LBB0_5 ⟪┫ -# %bb.16: # %vector.memchec…⟪┫ - and al, sil ⟪┫ - mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫ - mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫ - jne .LBB0_6 ⟪┫ -# %bb.17: # %vector.ph ⟪┫ - movabs rsi, 9223372036854775792 ⟪┫ - and rsi, r12 ⟪┫ - ┣⟫ mov rax, r10 - ┣⟫ and rax, -16 - vmovq xmm0, rdx ┃ vmovq xmm0, rdx - vpbroadcastq ymm0, xmm0 ┃ vpbroadcastq ymm0, xmm0 - lea rcx, [rsi - 16] ⟪╋⟫ lea rcx, [rax - 16] - mov rax, rcx ⟪╋⟫ mov rsi, rcx - shr rax, 4 ⟪╋⟫ shr rsi, 4 - inc rax ⟪╋⟫ inc rsi - vpsrlq ymm1, ymm0, 32 ┃ vpsrlq ymm1, ymm0, 32 - test rcx, rcx ┃ test rcx, rcx - je .LBB0_18 ⟪╋⟫ je .LBB0_4 -# %bb.19: # %vector.ph.new ⟪╋⟫# %bb.5: # %vector.ph.new - mov r13, rax ⟪┫ - and r13, -2 ⟪╋⟫ and rdi, -2 - xor edi, edi ⟪┫ - ┣⟫ mov rdi, rsi - ┣⟫ xor ecx, ecx - .p2align 4, 0x90 ┃ .p2align 4, 0x90 -.LBB0_20: # %vector.body ⟪╋⟫.LBB0_6: # %vector.body - # =>This Inner Lo… ┃ # =>This Inner Lo… - vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] - vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] - vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] - vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] - vpmuludq ymm6, ymm2, ymm1 ┃ vpmuludq ymm6, ymm2, ymm1 - vpsrlq ymm7, ymm2, 32 ┃ vpsrlq ymm7, ymm2, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm2, ymm2, ymm0 ┃ vpmuludq ymm2, ymm2, ymm0 - vpaddq ymm2, ymm2, ymm6 ┃ vpaddq ymm2, ymm2, ymm6 - vpmuludq ymm6, ymm3, ymm1 ┃ vpmuludq ymm6, ymm3, ymm1 - vpsrlq ymm7, ymm3, 32 ┃ vpsrlq ymm7, ymm3, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm3, ymm3, ymm0 ┃ vpmuludq ymm3, ymm3, ymm0 - vpaddq ymm3, ymm3, ymm6 ┃ vpaddq ymm3, ymm3, ymm6 - vpmuludq ymm6, ymm4, ymm1 ┃ vpmuludq ymm6, ymm4, ymm1 - vpsrlq ymm7, ymm4, 32 ┃ vpsrlq ymm7, ymm4, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm4, ymm4, ymm0 ┃ vpmuludq ymm4, ymm4, ymm0 - vpaddq ymm4, ymm4, ymm6 ┃ vpaddq ymm4, ymm4, ymm6 - vpmuludq ymm6, ymm5, ymm1 ┃ vpmuludq ymm6, ymm5, ymm1 - vpsrlq ymm7, ymm5, 32 ┃ vpsrlq ymm7, ymm5, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm5, ymm5, ymm0 ┃ vpmuludq ymm5, ymm5, ymm0 - vpaddq ymm5, ymm5, ymm6 ┃ vpaddq ymm5, ymm5, ymm6 - vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx] - vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 32] - vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 64] - vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 96] - vmovdqu ymmword ptr [rbx + 8*rdi], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm2 - vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm3 - vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm4 - vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm5 - vmovdqu ymm2, ymmword ptr [r15 + 8*rdi + 128] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx + 128] - vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 160] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 160] - vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 192] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 192] - vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 224] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 224] - vpmuludq ymm6, ymm2, ymm1 ┃ vpmuludq ymm6, ymm2, ymm1 - vpsrlq ymm7, ymm2, 32 ┃ vpsrlq ymm7, ymm2, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm2, ymm2, ymm0 ┃ vpmuludq ymm2, ymm2, ymm0 - vpaddq ymm2, ymm2, ymm6 ┃ vpaddq ymm2, ymm2, ymm6 - vpmuludq ymm6, ymm3, ymm1 ┃ vpmuludq ymm6, ymm3, ymm1 - vpsrlq ymm7, ymm3, 32 ┃ vpsrlq ymm7, ymm3, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm3, ymm3, ymm0 ┃ vpmuludq ymm3, ymm3, ymm0 - vpaddq ymm3, ymm3, ymm6 ┃ vpaddq ymm3, ymm3, ymm6 - vpmuludq ymm6, ymm4, ymm1 ┃ vpmuludq ymm6, ymm4, ymm1 - vpsrlq ymm7, ymm4, 32 ┃ vpsrlq ymm7, ymm4, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm4, ymm4, ymm0 ┃ vpmuludq ymm4, ymm4, ymm0 - vpaddq ymm4, ymm4, ymm6 ┃ vpaddq ymm4, ymm4, ymm6 - vpmuludq ymm6, ymm5, ymm1 ┃ vpmuludq ymm6, ymm5, ymm1 - vpsrlq ymm7, ymm5, 32 ┃ vpsrlq ymm7, ymm5, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm5, ymm5, ymm0 ┃ vpmuludq ymm5, ymm5, ymm0 - vpaddq ymm5, ymm5, ymm6 ┃ vpaddq ymm5, ymm5, ymm6 - vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi + 128] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx + 128] - vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 160] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 160] - vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 192] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 192] - vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 224] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 224] - vmovdqu ymmword ptr [rbx + 8*rdi + 128], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 128], ymm2 - vmovdqu ymmword ptr [rbx + 8*rdi + 160], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 160], ymm3 - vmovdqu ymmword ptr [rbx + 8*rdi + 192], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 192], ymm4 - vmovdqu ymmword ptr [rbx + 8*rdi + 224], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 224], ymm5 - add rdi, 32 ⟪╋⟫ add rcx, 32 - add r13, -2 ⟪╋⟫ add rdi, -2 - jne .LBB0_20 ⟪╋⟫ jne .LBB0_6 - jmp .LBB0_21 ⟪┫ -.LBB0_5: ⟪┫ - mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫ - mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫ - jmp .LBB0_6 ⟪┫ -.LBB0_18: ⟪┫ - xor edi, edi ⟪┫ -.LBB0_21: # %middle.block.u…⟪╋⟫# %bb.7: # %middle.block.u… - test al, 1 ⟪╋⟫ test sil, 1 - mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫ - je .LBB0_23 ⟪╋⟫ je .LBB0_9 -# %bb.22: # %vector.body.ep…⟪╋⟫.LBB0_8: # %vector.body.ep… - vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] - vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] - vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] - vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] - vpmuludq ymm6, ymm2, ymm1 ┃ vpmuludq ymm6, ymm2, ymm1 - vpsrlq ymm7, ymm2, 32 ┃ vpsrlq ymm7, ymm2, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm2, ymm2, ymm0 ┃ vpmuludq ymm2, ymm2, ymm0 - vpaddq ymm2, ymm2, ymm6 ┃ vpaddq ymm2, ymm2, ymm6 - vpmuludq ymm6, ymm3, ymm1 ┃ vpmuludq ymm6, ymm3, ymm1 - vpsrlq ymm7, ymm3, 32 ┃ vpsrlq ymm7, ymm3, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm3, ymm3, ymm0 ┃ vpmuludq ymm3, ymm3, ymm0 - vpaddq ymm3, ymm3, ymm6 ┃ vpaddq ymm3, ymm3, ymm6 - vpmuludq ymm6, ymm4, ymm1 ┃ vpmuludq ymm6, ymm4, ymm1 - vpsrlq ymm7, ymm4, 32 ┃ vpsrlq ymm7, ymm4, 32 - vpmuludq ymm7, ymm7, ymm0 ┃ vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7 ┃ vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32 ┃ vpsllq ymm6, ymm6, 32 - vpmuludq ymm4, ymm4, ymm0 ┃ vpmuludq ymm4, ymm4, ymm0 - vpaddq ymm4, ymm4, ymm6 ┃ vpaddq ymm4, ymm4, ymm6 - vpmuludq ymm1, ymm5, ymm1 ┃ vpmuludq ymm1, ymm5, ymm1 - vpsrlq ymm6, ymm5, 32 ┃ vpsrlq ymm6, ymm5, 32 - vpmuludq ymm6, ymm6, ymm0 ┃ vpmuludq ymm6, ymm6, ymm0 - vpaddq ymm1, ymm1, ymm6 ┃ vpaddq ymm1, ymm1, ymm6 - vpsllq ymm1, ymm1, 32 ┃ vpsllq ymm1, ymm1, 32 - vpmuludq ymm0, ymm5, ymm0 ┃ vpmuludq ymm0, ymm5, ymm0 - vpaddq ymm0, ymm0, ymm1 ┃ vpaddq ymm0, ymm0, ymm1 - vpaddq ymm1, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm1, ymm2, ymmword ptr [r9 + 8*rcx] - vpaddq ymm2, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm2, ymm3, ymmword ptr [r9 + 8*rcx + 32] - vpaddq ymm3, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm3, ymm4, ymmword ptr [r9 + 8*rcx + 64] - vpaddq ymm0, ymm0, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm0, ymm0, ymmword ptr [r9 + 8*rcx + 96] - vmovdqu ymmword ptr [rbx + 8*rdi], ymm1 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm1 - vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm2 - vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm3 - vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm0 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm0 -.LBB0_23: # %middle.block ⟪╋⟫.LBB0_9: # %middle.block - lea rdi, [rsi + 1] ⟪┫ - cmp r12, rsi ⟪╋⟫ cmp r10, rax - jne .LBB0_6 ⟪╋⟫ je .LBB0_11 -# %bb.24: ⟪┫ - mov rax, r12 ⟪┫ - jmp .LBB0_9 ⟪┫ -.LBB0_25: # %oob ⟪╋⟫.LBB0_10: # %L12 - mov eax, 16 ⟪┫ - movabs r11, offset ___chkstk_ms ⟪┫ - call r11 ⟪┫ - sub rsp, rax ⟪┫ - mov rdx, rsp ⟪┫ - mov qword ptr [rdx], rdi ⟪┫ - sub rsp, 32 ⟪┫ - movabs rax, offset ijl_bounds_error_ints ⟪┫ - mov r8d, 1 ⟪┫ - mov rcx, r13 ⟪┫ - vzeroupper ⟪┫ - call rax ⟪┫ -.LBB0_26: # %oob10 ⟪╋⟫.LBB0_11: # %L32 - mov eax, 16 ⟪┫ - movabs r11, offset ___chkstk_ms ⟪┫ - call r11 ⟪┫ - sub rsp, rax ⟪┫ - mov rdx, rsp ⟪┫ - mov qword ptr [rdx], rdi ⟪┫ - sub rsp, 32 ⟪╋⟫ add rsp, 32 - movabs rax, offset ijl_bounds_error_ints ⟪┫ - mov r8d, 1 ⟪┫ - mov rcx, r9 ⟪┫ - vzeroupper ⟪┫ - call rax ⟪┫ -.LBB0_27: # %oob19 ⟪┫ - mov eax, 16 ⟪┫ - movabs r11, offset ___chkstk_ms ⟪┫ - call r11 ⟪┫ - sub rsp, rax ⟪┫ - mov rdx, rsp ⟪┫ - mov qword ptr [rdx], rdi ⟪┫ - sub rsp, 32 ⟪┫ - movabs rax, offset ijl_bounds_error_ints ⟪┫ - mov r8d, 1 ⟪┫ - ┣⟫ .p2align 4, 0x90 - ┣⟫ # =>This Inner Lo… - ┣⟫ mov rcx, qword ptr [r8 + 8*rax] - ┣⟫ imul rcx, rdx - ┣⟫ add rcx, qword ptr [r9 + 8*rax] - ┣⟫ mov qword ptr [r11 + 8*rax], rcx - ┣⟫ inc rax - ┣⟫ cmp r10, rax - ┣⟫ jne .LBB0_10 - ┣⟫ vmovaps xmm6, xmmword ptr [rbp - 48] # 16-byte … - ┣⟫ vmovaps xmm7, xmmword ptr [rbp - 32] # 16-byte … - ┣⟫ pop rdi - ┣⟫ pop rsi - ┣⟫ pop rbp - vzeroupper ┃ vzeroupper - call rax ⟪┫ - ┣⟫ ret - ┣⟫.LBB0_4: - ┣⟫ xor ecx, ecx - ┣⟫ test sil, 1 - ┣⟫ je .LBB0_9 - ┣⟫ jmp .LBB0_8 -.Lfunc_end0: ┃ .Lfunc_end0: - .size saxpy, .Lfunc_end0-saxpy ⟪╋⟫ .size saxpy_simd, .Lfunc_end0-saxpy_simd - .cfi_endproc ┃ .cfi_endproc - # -- End function ┃ # -- End function - .section ".note.GNU-stack","",@progbits ┃ .section ".note.GNU-stack","",@progbits - ┃ \ No newline at end of file diff --git a/test/references/saxpy_x86.ll b/test/references/saxpy_x86.ll deleted file mode 100644 index 0e90e52..0000000 --- a/test/references/saxpy_x86.ll +++ /dev/null @@ -1,324 +0,0 @@ -; Function Attrs: uwtable ┃ ; Function Attrs: uwtable -define void @saxpy({}* noundef nonnull align 16 dereferen…⟪╋⟫define void @saxpy_simd({}* noundef nonnull align 16 dere… -top: ┃ top: - %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* ┃ %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* - %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… ┃ %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… - %arraylen = load i64, i64* %arraylen_ptr, align 8 ┃ %arraylen = load i64, i64* %arraylen_ptr, align 8 - %.not.not = icmp eq i64 %arraylen, 0 ⟪╋⟫ %.not = icmp eq i64 %arraylen, 0 - br i1 %.not.not, label %L31, label %L13.preheader ⟪╋⟫ br i1 %.not, label %L32, label %L12.lr.ph - ⟪┫ -L13.preheader: ; preds…⟪┫ - %5 = bitcast {}* %2 to { i8*, i64, i16, i16, i32 }* ⟪┫ - %arraylen_ptr5 = getelementptr inbounds { i8*, i64, i16…⟪┫ - %arraylen6 = load i64, i64* %arraylen_ptr5, align 8 ⟪┫ - %6 = bitcast {}* %3 to { i8*, i64, i16, i16, i32 }* ⟪┫ - %arraylen_ptr7 = getelementptr inbounds { i8*, i64, i16…⟪┫ - %arraylen8 = load i64, i64* %arraylen_ptr7, align 8 ⟪┫ - %7 = bitcast {}* %2 to i64** ⟪┫ - %arrayptr29 = load i64*, i64** %7, align 8 ⟪┫ - %8 = bitcast {}* %3 to i64** ⟪┫ - %arrayptr1430 = load i64*, i64** %8, align 8 ⟪┫ - %9 = bitcast {}* %0 to i64** ⟪┫ - %arrayptr2331 = load i64*, i64** %9, align 8 ⟪┫ - %umin = call i64 @llvm.umin.i64(i64 %arraylen6, i64 %ar…⟪┫ - %smin = call i64 @llvm.smin.i64(i64 %arraylen8, i64 0) ⟪┫ - %10 = sub i64 %arraylen8, %smin ⟪┫ - %smax = call i64 @llvm.smax.i64(i64 %smin, i64 -1) ⟪┫ - %11 = add nsw i64 %smax, 1 ⟪┫ - %12 = mul nuw nsw i64 %10, %11 ⟪┫ - %umin36 = call i64 @llvm.umin.i64(i64 %umin, i64 %12) ⟪┫ - %exit.mainloop.at = call i64 @llvm.umin.i64(i64 %umin36…⟪┫ - %.not = icmp eq i64 %exit.mainloop.at, 0 ⟪┫ - br i1 %.not, label %main.pseudo.exit, label %idxend21.p…⟪┫ - ⟪┫ -idxend21.preheader: ; preds…⟪┫ - %min.iters.check = icmp ult i64 %exit.mainloop.at, 16 ⟪┫ - br i1 %min.iters.check, label %scalar.ph, label %vector…⟪┫ - ┃ -vector.memcheck: ; preds…⟪┫ - %scevgep = getelementptr i64, i64* %arrayptr2331, i64 %…⟪┫ - %scevgep58 = getelementptr i64, i64* %arrayptr29, i64 %…⟪┫ - %scevgep61 = getelementptr i64, i64* %arrayptr1430, i64…⟪┫ - %bound0 = icmp ult i64* %arrayptr2331, %scevgep58 ⟪┫ - %bound1 = icmp ult i64* %arrayptr29, %scevgep ⟪┫ - %found.conflict = and i1 %bound0, %bound1 ⟪┫ - %bound063 = icmp ult i64* %arrayptr2331, %scevgep61 ⟪┫ - %bound164 = icmp ult i64* %arrayptr1430, %scevgep ⟪┫ - %found.conflict65 = and i1 %bound063, %bound164 ⟪┫ - %conflict.rdx = or i1 %found.conflict, %found.conflict6…⟪┫ - br i1 %conflict.rdx, label %scalar.ph, label %vector.ph ⟪╋⟫ br i1 %min.iters.check, label %scalar.ph, label %vector… - ┣⟫L12.lr.ph: ; preds… - ┣⟫ %5 = bitcast {}* %2 to i64** - ┣⟫ %arrayptr8 = load i64*, i64** %5, align 8 - ┣⟫ %6 = bitcast {}* %3 to i64** - ┣⟫ %arrayptr29 = load i64*, i64** %6, align 8 - ┣⟫ %7 = bitcast {}* %0 to i64** - ┣⟫ %arrayptr510 = load i64*, i64** %7, align 8 - ┣⟫ %min.iters.check = icmp ult i64 %arraylen, 16 - ┃ -vector.ph: ; preds…⟪╋⟫vector.ph: ; preds… - %n.vec = and i64 %exit.mainloop.at, 9223372036854775792 ⟪╋⟫ %n.vec = and i64 %arraylen, 9223372036854775792 - %ind.end = or i64 %n.vec, 1 ⟪┫ - %broadcast.splatinsert = insertelement <4 x i64> poison… ┃ %broadcast.splatinsert = insertelement <4 x i64> poison… - %broadcast.splat = shufflevector <4 x i64> %broadcast.s… ┃ %broadcast.splat = shufflevector <4 x i64> %broadcast.s… - %13 = add nsw i64 %n.vec, -16 ⟪╋⟫ %8 = add nsw i64 %n.vec, -16 - %14 = lshr exact i64 %13, 4 ⟪╋⟫ %9 = lshr exact i64 %8, 4 - %15 = add nuw nsw i64 %14, 1 ⟪╋⟫ %10 = add nuw nsw i64 %9, 1 - %xtraiter = and i64 %15, 1 ⟪╋⟫ %xtraiter = and i64 %10, 1 - %16 = icmp eq i64 %13, 0 ⟪╋⟫ %11 = icmp eq i64 %8, 0 - br i1 %16, label %middle.block.unr-lcssa, label %vector…⟪╋⟫ br i1 %11, label %middle.block.unr-lcssa, label %vector… - ┃ -vector.ph.new: ; preds… ┃ vector.ph.new: ; preds… - %unroll_iter = and i64 %15, 2305843009213693950 ⟪╋⟫ %unroll_iter = and i64 %10, 2305843009213693950 - br label %vector.body ┃ br label %vector.body - ┃ -vector.body: ; preds… ┃ vector.body: ; preds… - %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… ┃ %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… - %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… ┃ %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… - %17 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %12 = getelementptr inbounds i64, i64* %arrayptr8, i64 … - %18 = bitcast i64* %17 to <4 x i64>* ⟪╋⟫ %13 = bitcast i64* %12 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %18, align 8 ⟪╋⟫ %wide.load = load <4 x i64>, <4 x i64>* %13, align 8 - %19 = getelementptr inbounds i64, i64* %17, i64 4 ⟪╋⟫ %14 = getelementptr inbounds i64, i64* %12, i64 4 - %20 = bitcast i64* %19 to <4 x i64>* ⟪╋⟫ %15 = bitcast i64* %14 to <4 x i64>* - %wide.load66 = load <4 x i64>, <4 x i64>* %20, align 8 ⟪╋⟫ %wide.load13 = load <4 x i64>, <4 x i64>* %15, align 8 - %21 = getelementptr inbounds i64, i64* %17, i64 8 ⟪╋⟫ %16 = getelementptr inbounds i64, i64* %12, i64 8 - %22 = bitcast i64* %21 to <4 x i64>* ⟪╋⟫ %17 = bitcast i64* %16 to <4 x i64>* - %wide.load67 = load <4 x i64>, <4 x i64>* %22, align 8 ⟪╋⟫ %wide.load14 = load <4 x i64>, <4 x i64>* %17, align 8 - %23 = getelementptr inbounds i64, i64* %17, i64 12 ⟪╋⟫ %18 = getelementptr inbounds i64, i64* %12, i64 12 - %24 = bitcast i64* %23 to <4 x i64>* ⟪╋⟫ %19 = bitcast i64* %18 to <4 x i64>* - %wide.load68 = load <4 x i64>, <4 x i64>* %24, align 8 ⟪╋⟫ %wide.load15 = load <4 x i64>, <4 x i64>* %19, align 8 - %25 = mul <4 x i64> %wide.load, %broadcast.splat ⟪╋⟫ %20 = mul <4 x i64> %wide.load, %broadcast.splat - %26 = mul <4 x i64> %wide.load66, %broadcast.splat ⟪╋⟫ %21 = mul <4 x i64> %wide.load13, %broadcast.splat - %27 = mul <4 x i64> %wide.load67, %broadcast.splat ⟪╋⟫ %22 = mul <4 x i64> %wide.load14, %broadcast.splat - %28 = mul <4 x i64> %wide.load68, %broadcast.splat ⟪╋⟫ %23 = mul <4 x i64> %wide.load15, %broadcast.splat - %29 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %24 = getelementptr inbounds i64, i64* %arrayptr29, i64… - %30 = bitcast i64* %29 to <4 x i64>* ⟪╋⟫ %25 = bitcast i64* %24 to <4 x i64>* - %wide.load75 = load <4 x i64>, <4 x i64>* %30, align 8 ⟪╋⟫ %wide.load22 = load <4 x i64>, <4 x i64>* %25, align 8 - %31 = getelementptr inbounds i64, i64* %29, i64 4 ⟪╋⟫ %26 = getelementptr inbounds i64, i64* %24, i64 4 - %32 = bitcast i64* %31 to <4 x i64>* ⟪╋⟫ %27 = bitcast i64* %26 to <4 x i64>* - %wide.load76 = load <4 x i64>, <4 x i64>* %32, align 8 ⟪╋⟫ %wide.load23 = load <4 x i64>, <4 x i64>* %27, align 8 - %33 = getelementptr inbounds i64, i64* %29, i64 8 ⟪╋⟫ %28 = getelementptr inbounds i64, i64* %24, i64 8 - %34 = bitcast i64* %33 to <4 x i64>* ⟪╋⟫ %29 = bitcast i64* %28 to <4 x i64>* - %wide.load77 = load <4 x i64>, <4 x i64>* %34, align 8 ⟪╋⟫ %wide.load24 = load <4 x i64>, <4 x i64>* %29, align 8 - %35 = getelementptr inbounds i64, i64* %29, i64 12 ⟪╋⟫ %30 = getelementptr inbounds i64, i64* %24, i64 12 - %36 = bitcast i64* %35 to <4 x i64>* ⟪╋⟫ %31 = bitcast i64* %30 to <4 x i64>* - %wide.load78 = load <4 x i64>, <4 x i64>* %36, align 8 ⟪╋⟫ %wide.load25 = load <4 x i64>, <4 x i64>* %31, align 8 - %37 = add <4 x i64> %wide.load75, %25 ⟪╋⟫ %32 = add <4 x i64> %wide.load22, %20 - %38 = add <4 x i64> %wide.load76, %26 ⟪╋⟫ %33 = add <4 x i64> %wide.load23, %21 - %39 = add <4 x i64> %wide.load77, %27 ⟪╋⟫ %34 = add <4 x i64> %wide.load24, %22 - %40 = add <4 x i64> %wide.load78, %28 ⟪╋⟫ %35 = add <4 x i64> %wide.load25, %23 - %41 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %36 = getelementptr inbounds i64, i64* %arrayptr510, i6… - %42 = bitcast i64* %41 to <4 x i64>* ⟪╋⟫ %37 = bitcast i64* %36 to <4 x i64>* - store <4 x i64> %37, <4 x i64>* %42, align 8 ⟪╋⟫ store <4 x i64> %32, <4 x i64>* %37, align 8 - %43 = getelementptr inbounds i64, i64* %41, i64 4 ⟪╋⟫ %38 = getelementptr inbounds i64, i64* %36, i64 4 - %44 = bitcast i64* %43 to <4 x i64>* ⟪╋⟫ %39 = bitcast i64* %38 to <4 x i64>* - store <4 x i64> %38, <4 x i64>* %44, align 8 ⟪╋⟫ store <4 x i64> %33, <4 x i64>* %39, align 8 - %45 = getelementptr inbounds i64, i64* %41, i64 8 ⟪╋⟫ %40 = getelementptr inbounds i64, i64* %36, i64 8 - %46 = bitcast i64* %45 to <4 x i64>* ⟪╋⟫ %41 = bitcast i64* %40 to <4 x i64>* - store <4 x i64> %39, <4 x i64>* %46, align 8 ⟪╋⟫ store <4 x i64> %34, <4 x i64>* %41, align 8 - %47 = getelementptr inbounds i64, i64* %41, i64 12 ⟪╋⟫ %42 = getelementptr inbounds i64, i64* %36, i64 12 - %48 = bitcast i64* %47 to <4 x i64>* ⟪╋⟫ %43 = bitcast i64* %42 to <4 x i64>* - store <4 x i64> %40, <4 x i64>* %48, align 8 ⟪╋⟫ store <4 x i64> %35, <4 x i64>* %43, align 8 - %index.next = or i64 %index, 16 ┃ %index.next = or i64 %index, 16 - %49 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %44 = getelementptr inbounds i64, i64* %arrayptr8, i64 … - %50 = bitcast i64* %49 to <4 x i64>* ⟪╋⟫ %45 = bitcast i64* %44 to <4 x i64>* - %wide.load.1 = load <4 x i64>, <4 x i64>* %50, align 8 ⟪╋⟫ %wide.load.1 = load <4 x i64>, <4 x i64>* %45, align 8 - %51 = getelementptr inbounds i64, i64* %49, i64 4 ⟪╋⟫ %46 = getelementptr inbounds i64, i64* %44, i64 4 - %52 = bitcast i64* %51 to <4 x i64>* ⟪╋⟫ %47 = bitcast i64* %46 to <4 x i64>* - %wide.load66.1 = load <4 x i64>, <4 x i64>* %52, align …⟪╋⟫ %wide.load13.1 = load <4 x i64>, <4 x i64>* %47, align … - %53 = getelementptr inbounds i64, i64* %49, i64 8 ⟪╋⟫ %48 = getelementptr inbounds i64, i64* %44, i64 8 - %54 = bitcast i64* %53 to <4 x i64>* ⟪╋⟫ %49 = bitcast i64* %48 to <4 x i64>* - %wide.load67.1 = load <4 x i64>, <4 x i64>* %54, align …⟪╋⟫ %wide.load14.1 = load <4 x i64>, <4 x i64>* %49, align … - %55 = getelementptr inbounds i64, i64* %49, i64 12 ⟪╋⟫ %50 = getelementptr inbounds i64, i64* %44, i64 12 - %56 = bitcast i64* %55 to <4 x i64>* ⟪╋⟫ %51 = bitcast i64* %50 to <4 x i64>* - %wide.load68.1 = load <4 x i64>, <4 x i64>* %56, align …⟪╋⟫ %wide.load15.1 = load <4 x i64>, <4 x i64>* %51, align … - %57 = mul <4 x i64> %wide.load.1, %broadcast.splat ⟪╋⟫ %52 = mul <4 x i64> %wide.load.1, %broadcast.splat - %58 = mul <4 x i64> %wide.load66.1, %broadcast.splat ⟪╋⟫ %53 = mul <4 x i64> %wide.load13.1, %broadcast.splat - %59 = mul <4 x i64> %wide.load67.1, %broadcast.splat ⟪╋⟫ %54 = mul <4 x i64> %wide.load14.1, %broadcast.splat - %60 = mul <4 x i64> %wide.load68.1, %broadcast.splat ⟪╋⟫ %55 = mul <4 x i64> %wide.load15.1, %broadcast.splat - %61 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %56 = getelementptr inbounds i64, i64* %arrayptr29, i64… - %62 = bitcast i64* %61 to <4 x i64>* ⟪╋⟫ %57 = bitcast i64* %56 to <4 x i64>* - %wide.load75.1 = load <4 x i64>, <4 x i64>* %62, align …⟪╋⟫ %wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align … - %63 = getelementptr inbounds i64, i64* %61, i64 4 ⟪╋⟫ %58 = getelementptr inbounds i64, i64* %56, i64 4 - %64 = bitcast i64* %63 to <4 x i64>* ⟪╋⟫ %59 = bitcast i64* %58 to <4 x i64>* - %wide.load76.1 = load <4 x i64>, <4 x i64>* %64, align …⟪╋⟫ %wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align … - %65 = getelementptr inbounds i64, i64* %61, i64 8 ⟪╋⟫ %60 = getelementptr inbounds i64, i64* %56, i64 8 - %66 = bitcast i64* %65 to <4 x i64>* ⟪╋⟫ %61 = bitcast i64* %60 to <4 x i64>* - %wide.load77.1 = load <4 x i64>, <4 x i64>* %66, align …⟪╋⟫ %wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align … - %67 = getelementptr inbounds i64, i64* %61, i64 12 ⟪╋⟫ %62 = getelementptr inbounds i64, i64* %56, i64 12 - %68 = bitcast i64* %67 to <4 x i64>* ⟪╋⟫ %63 = bitcast i64* %62 to <4 x i64>* - %wide.load78.1 = load <4 x i64>, <4 x i64>* %68, align …⟪╋⟫ %wide.load25.1 = load <4 x i64>, <4 x i64>* %63, align … - %69 = add <4 x i64> %wide.load75.1, %57 ⟪╋⟫ %64 = add <4 x i64> %wide.load22.1, %52 - %70 = add <4 x i64> %wide.load76.1, %58 ⟪╋⟫ %65 = add <4 x i64> %wide.load23.1, %53 - %71 = add <4 x i64> %wide.load77.1, %59 ⟪╋⟫ %66 = add <4 x i64> %wide.load24.1, %54 - %72 = add <4 x i64> %wide.load78.1, %60 ⟪╋⟫ %67 = add <4 x i64> %wide.load25.1, %55 - %73 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %68 = getelementptr inbounds i64, i64* %arrayptr510, i6… - %74 = bitcast i64* %73 to <4 x i64>* ⟪╋⟫ %69 = bitcast i64* %68 to <4 x i64>* - store <4 x i64> %69, <4 x i64>* %74, align 8 ⟪╋⟫ store <4 x i64> %64, <4 x i64>* %69, align 8 - %75 = getelementptr inbounds i64, i64* %73, i64 4 ⟪╋⟫ %70 = getelementptr inbounds i64, i64* %68, i64 4 - %76 = bitcast i64* %75 to <4 x i64>* ⟪╋⟫ %71 = bitcast i64* %70 to <4 x i64>* - store <4 x i64> %70, <4 x i64>* %76, align 8 ⟪╋⟫ store <4 x i64> %65, <4 x i64>* %71, align 8 - %77 = getelementptr inbounds i64, i64* %73, i64 8 ⟪╋⟫ %72 = getelementptr inbounds i64, i64* %68, i64 8 - %78 = bitcast i64* %77 to <4 x i64>* ⟪╋⟫ %73 = bitcast i64* %72 to <4 x i64>* - store <4 x i64> %71, <4 x i64>* %78, align 8 ⟪╋⟫ store <4 x i64> %66, <4 x i64>* %73, align 8 - %79 = getelementptr inbounds i64, i64* %73, i64 12 ⟪╋⟫ %74 = getelementptr inbounds i64, i64* %68, i64 12 - %80 = bitcast i64* %79 to <4 x i64>* ⟪╋⟫ %75 = bitcast i64* %74 to <4 x i64>* - store <4 x i64> %72, <4 x i64>* %80, align 8 ⟪╋⟫ store <4 x i64> %67, <4 x i64>* %75, align 8 - %index.next.1 = add nuw i64 %index, 32 ┃ %index.next.1 = add nuw i64 %index, 32 - %niter.next.1 = add i64 %niter, 2 ┃ %niter.next.1 = add i64 %niter, 2 - %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter ┃ %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter - br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… ┃ br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… - ┃ -middle.block.unr-lcssa: ; preds… ┃ middle.block.unr-lcssa: ; preds… - %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… ┃ %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 ┃ %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %middle.block, label %vector… ┃ br i1 %lcmp.mod.not, label %middle.block, label %vector… - ┃ -vector.body.epil.preheader: ; preds… ┃ vector.body.epil.preheader: ; preds… - %81 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %76 = getelementptr inbounds i64, i64* %arrayptr8, i64 … - %82 = bitcast i64* %81 to <4 x i64>* ⟪╋⟫ %77 = bitcast i64* %76 to <4 x i64>* - %wide.load.epil = load <4 x i64>, <4 x i64>* %82, align…⟪╋⟫ %wide.load.epil = load <4 x i64>, <4 x i64>* %77, align… - %83 = getelementptr inbounds i64, i64* %81, i64 4 ⟪╋⟫ %78 = getelementptr inbounds i64, i64* %76, i64 4 - %84 = bitcast i64* %83 to <4 x i64>* ⟪╋⟫ %79 = bitcast i64* %78 to <4 x i64>* - %wide.load66.epil = load <4 x i64>, <4 x i64>* %84, ali…⟪╋⟫ %wide.load13.epil = load <4 x i64>, <4 x i64>* %79, ali… - %85 = getelementptr inbounds i64, i64* %81, i64 8 ⟪╋⟫ %80 = getelementptr inbounds i64, i64* %76, i64 8 - %86 = bitcast i64* %85 to <4 x i64>* ⟪╋⟫ %81 = bitcast i64* %80 to <4 x i64>* - %wide.load67.epil = load <4 x i64>, <4 x i64>* %86, ali…⟪╋⟫ %wide.load14.epil = load <4 x i64>, <4 x i64>* %81, ali… - %87 = getelementptr inbounds i64, i64* %81, i64 12 ⟪╋⟫ %82 = getelementptr inbounds i64, i64* %76, i64 12 - %88 = bitcast i64* %87 to <4 x i64>* ⟪╋⟫ %83 = bitcast i64* %82 to <4 x i64>* - %wide.load68.epil = load <4 x i64>, <4 x i64>* %88, ali…⟪╋⟫ %wide.load15.epil = load <4 x i64>, <4 x i64>* %83, ali… - %89 = mul <4 x i64> %wide.load.epil, %broadcast.splat ⟪╋⟫ %84 = mul <4 x i64> %wide.load.epil, %broadcast.splat - %90 = mul <4 x i64> %wide.load66.epil, %broadcast.splat ⟪╋⟫ %85 = mul <4 x i64> %wide.load13.epil, %broadcast.splat - %91 = mul <4 x i64> %wide.load67.epil, %broadcast.splat ⟪╋⟫ %86 = mul <4 x i64> %wide.load14.epil, %broadcast.splat - %92 = mul <4 x i64> %wide.load68.epil, %broadcast.splat ⟪╋⟫ %87 = mul <4 x i64> %wide.load15.epil, %broadcast.splat - %93 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %88 = getelementptr inbounds i64, i64* %arrayptr29, i64… - %94 = bitcast i64* %93 to <4 x i64>* ⟪╋⟫ %89 = bitcast i64* %88 to <4 x i64>* - %wide.load75.epil = load <4 x i64>, <4 x i64>* %94, ali…⟪╋⟫ %wide.load22.epil = load <4 x i64>, <4 x i64>* %89, ali… - %95 = getelementptr inbounds i64, i64* %93, i64 4 ⟪╋⟫ %90 = getelementptr inbounds i64, i64* %88, i64 4 - %96 = bitcast i64* %95 to <4 x i64>* ⟪╋⟫ %91 = bitcast i64* %90 to <4 x i64>* - %wide.load76.epil = load <4 x i64>, <4 x i64>* %96, ali…⟪╋⟫ %wide.load23.epil = load <4 x i64>, <4 x i64>* %91, ali… - %97 = getelementptr inbounds i64, i64* %93, i64 8 ⟪╋⟫ %92 = getelementptr inbounds i64, i64* %88, i64 8 - %98 = bitcast i64* %97 to <4 x i64>* ⟪╋⟫ %93 = bitcast i64* %92 to <4 x i64>* - %wide.load77.epil = load <4 x i64>, <4 x i64>* %98, ali…⟪╋⟫ %wide.load24.epil = load <4 x i64>, <4 x i64>* %93, ali… - %99 = getelementptr inbounds i64, i64* %93, i64 12 ⟪╋⟫ %94 = getelementptr inbounds i64, i64* %88, i64 12 - %100 = bitcast i64* %99 to <4 x i64>* ⟪╋⟫ %95 = bitcast i64* %94 to <4 x i64>* - %wide.load78.epil = load <4 x i64>, <4 x i64>* %100, al…⟪╋⟫ %wide.load25.epil = load <4 x i64>, <4 x i64>* %95, ali… - %101 = add <4 x i64> %wide.load75.epil, %89 ⟪╋⟫ %96 = add <4 x i64> %wide.load22.epil, %84 - %102 = add <4 x i64> %wide.load76.epil, %90 ⟪╋⟫ %97 = add <4 x i64> %wide.load23.epil, %85 - %103 = add <4 x i64> %wide.load77.epil, %91 ⟪╋⟫ %98 = add <4 x i64> %wide.load24.epil, %86 - %104 = add <4 x i64> %wide.load78.epil, %92 ⟪╋⟫ %99 = add <4 x i64> %wide.load25.epil, %87 - %105 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪╋⟫ %100 = getelementptr inbounds i64, i64* %arrayptr510, i… - %106 = bitcast i64* %105 to <4 x i64>* ⟪╋⟫ %101 = bitcast i64* %100 to <4 x i64>* - store <4 x i64> %101, <4 x i64>* %106, align 8 ⟪╋⟫ store <4 x i64> %96, <4 x i64>* %101, align 8 - %107 = getelementptr inbounds i64, i64* %105, i64 4 ⟪╋⟫ %102 = getelementptr inbounds i64, i64* %100, i64 4 - %108 = bitcast i64* %107 to <4 x i64>* ⟪╋⟫ %103 = bitcast i64* %102 to <4 x i64>* - store <4 x i64> %102, <4 x i64>* %108, align 8 ⟪╋⟫ store <4 x i64> %97, <4 x i64>* %103, align 8 - %109 = getelementptr inbounds i64, i64* %105, i64 8 ⟪╋⟫ %104 = getelementptr inbounds i64, i64* %100, i64 8 - %110 = bitcast i64* %109 to <4 x i64>* ⟪╋⟫ %105 = bitcast i64* %104 to <4 x i64>* - store <4 x i64> %103, <4 x i64>* %110, align 8 ⟪╋⟫ store <4 x i64> %98, <4 x i64>* %105, align 8 - %111 = getelementptr inbounds i64, i64* %105, i64 12 ⟪╋⟫ %106 = getelementptr inbounds i64, i64* %100, i64 12 - %112 = bitcast i64* %111 to <4 x i64>* ⟪╋⟫ %107 = bitcast i64* %106 to <4 x i64>* - store <4 x i64> %104, <4 x i64>* %112, align 8 ⟪╋⟫ store <4 x i64> %99, <4 x i64>* %107, align 8 - br label %middle.block ┃ br label %middle.block - ┃ -middle.block: ; preds… ┃ middle.block: ; preds… - %cmp.n = icmp eq i64 %exit.mainloop.at, %n.vec ⟪╋⟫ %cmp.n = icmp eq i64 %arraylen, %n.vec - br i1 %cmp.n, label %main.exit.selector, label %scalar.…⟪┫ - ⟪┫ -scalar.ph: ; preds…⟪┫ - %bc.resume.val = phi i64 [ %ind.end, %middle.block ], […⟪┫ - br label %idxend21 ⟪┫ - ⟪┫ -L31: ; preds…⟪┫ - ret void ⟪┫ - ⟪┫ -oob: ; preds…⟪┫ - %errorbox = alloca i64, align 8 ⟪┫ - store i64 %value_phi3.postloop, i64* %errorbox, align 8 ⟪┫ - call void @ijl_bounds_error_ints({}* %2, i64* nonnull %…⟪┫ - unreachable ⟪┫ - ⟪┫ -oob10: ; preds…⟪┫ - %errorbox11 = alloca i64, align 8 ⟪┫ - store i64 %value_phi3.postloop, i64* %errorbox11, align…⟪┫ - call void @ijl_bounds_error_ints({}* %3, i64* nonnull %…⟪┫ - unreachable ⟪┫ - ⟪┫ -oob19: ; preds…⟪┫ - %errorbox20 = alloca i64, align 8 ⟪┫ - store i64 %value_phi3.postloop, i64* %errorbox20, align…⟪┫ - call void @ijl_bounds_error_ints({}* %0, i64* nonnull %…⟪┫ - unreachable ⟪┫ - ⟪┫ -idxend21: ; preds…⟪┫ - %value_phi3 = phi i64 [ %119, %idxend21 ], [ %bc.resume…⟪┫ - %113 = add nsw i64 %value_phi3, -1 ⟪┫ - %114 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫ - %arrayref = load i64, i64* %114, align 8 ⟪┫ - %115 = mul i64 %arrayref, %1 ⟪┫ - %116 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫ - %arrayref15 = load i64, i64* %116, align 8 ⟪┫ - %117 = add i64 %arrayref15, %115 ⟪┫ - %118 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫ - store i64 %117, i64* %118, align 8 ⟪┫ - %119 = add nuw nsw i64 %value_phi3, 1 ⟪┫ - %.not51 = icmp ult i64 %value_phi3, %exit.mainloop.at ⟪┫ - br i1 %.not51, label %idxend21, label %main.exit.select…⟪┫ - ⟪┫ -main.exit.selector: ; preds…⟪┫ - %value_phi3.lcssa = phi i64 [ %exit.mainloop.at, %middl…⟪┫ - %.lcssa = phi i64 [ %ind.end, %middle.block ], [ %119, …⟪┫ - %120 = icmp ult i64 %value_phi3.lcssa, %arraylen ⟪┫ - br i1 %120, label %main.pseudo.exit, label %L31 ⟪┫ - ⟪┫ -main.pseudo.exit: ; preds…⟪┫ - %value_phi3.copy = phi i64 [ 1, %L13.preheader ], [ %.l…⟪┫ - br label %L13.postloop ⟪┫ - ⟪┫ -L13.postloop: ; preds…⟪┫ - %value_phi3.postloop = phi i64 [ %127, %idxend21.postlo…⟪┫ - %121 = add i64 %value_phi3.postloop, -1 ⟪┫ - %inbounds.postloop = icmp ult i64 %121, %arraylen6 ⟪┫ - br i1 %inbounds.postloop, label %idxend.postloop, label…⟪┫ - ┣⟫ br i1 %cmp.n, label %L32, label %scalar.ph - ┃ -idxend.postloop: ; preds…⟪┫ - %inbounds9.postloop = icmp ult i64 %121, %arraylen8 ⟪┫ - br i1 %inbounds9.postloop, label %idxend12.postloop, la…⟪┫ - ┣⟫scalar.ph: ; preds… - ┣⟫ %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0… - ┣⟫ br label %L12 - ┃ -idxend12.postloop: ; preds…⟪┫ - %inbounds18.postloop = icmp ult i64 %121, %arraylen ⟪┫ - br i1 %inbounds18.postloop, label %idxend21.postloop, l…⟪┫ - ┣⟫L12: ; preds… - ┣⟫ %value_phi12 = phi i64 [ %bc.resume.val, %scalar.ph ], … - ┣⟫ %108 = getelementptr inbounds i64, i64* %arrayptr8, i64… - ┣⟫ %arrayref = load i64, i64* %108, align 8 - ┣⟫ %109 = mul i64 %arrayref, %1 - ┣⟫ %110 = getelementptr inbounds i64, i64* %arrayptr29, i6… - ┣⟫ %arrayref3 = load i64, i64* %110, align 8 - ┣⟫ %111 = add i64 %arrayref3, %109 - ┣⟫ %112 = getelementptr inbounds i64, i64* %arrayptr510, i… - ┣⟫ store i64 %111, i64* %112, align 8 - ┣⟫ %113 = add nuw nsw i64 %value_phi12, 1 - ┣⟫ %exitcond.not = icmp eq i64 %113, %arraylen - ┣⟫ br i1 %exitcond.not, label %L32, label %L12 - ┃ -idxend21.postloop: ; preds…⟪┫ - %122 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫ - %arrayref.postloop = load i64, i64* %122, align 8 ⟪┫ - %123 = mul i64 %arrayref.postloop, %1 ⟪┫ - %124 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫ - %arrayref15.postloop = load i64, i64* %124, align 8 ⟪┫ - %125 = add i64 %arrayref15.postloop, %123 ⟪┫ - %126 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫ - store i64 %125, i64* %126, align 8 ⟪┫ - %.not.not32.postloop = icmp eq i64 %value_phi3.postloop…⟪┫ - %127 = add nuw nsw i64 %value_phi3.postloop, 1 ⟪┫ - br i1 %.not.not32.postloop, label %L31, label %L13.post…⟪┫ - ┣⟫L32: ; preds… - ┣⟫ ret void -} ┃ } - ┃ \ No newline at end of file diff --git a/test/references/saxpy_x86_COLOR.S b/test/references/saxpy_x86_COLOR.S deleted file mode 100644 index 2cd93b2..0000000 --- a/test/references/saxpy_x86_COLOR.S +++ /dev/null @@ -1,379 +0,0 @@ - .text  ┃  .text - .file "saxpy" ⟪╋⟫ .file "saxpy_simd" - .globl saxpy  # -- Begin function sa…⟪╋⟫ .globl saxpy_simd # -- Begin function sa… - .p2align 4, 0x90  ┃  .p2align 4, 0x90 - .type saxpy,@function ⟪╋⟫ .type saxpy_simd,@function -saxpy:  # @saxpy ⟪╋⟫saxpy_simd: # @saxpy_simd - .cfi_startproc  ┃  .cfi_startproc -# %bb.0: # %top  ┃ # %bb.0: # %top - push rbp  ┃  push rbp - .cfi_def_cfa_offset 16  ┃  .cfi_def_cfa_offset 16 - .cfi_offset rbp, -16  ┃  .cfi_offset rbp, -16 - mov rbp, rsp  ┃  mov rbp, rsp - .cfi_def_cfa_register rbp  ┃  .cfi_def_cfa_register rbp - push r15 ⟪┫  - push r14 ⟪┫  - push r13 ⟪┫  - push r12 ⟪┫  - push rsi  ┃  push rsi - push rdi  ┃  push rdi - push rbx ⟪┫  - sub rsp, 72 ⟪╋⟫ sub rsp, 32 - vmovdqa xmmword ptr [rbp - 80], xmm7 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 32], xmm7 # 16-byte … - vmovdqa xmmword ptr [rbp - 96], xmm6 # 16-byte …⟪╋⟫ vmovdqa xmmword ptr [rbp - 48], xmm6 # 16-byte … - .cfi_offset rbx, -72 ⟪╋⟫ .cfi_offset rdi, -32 - .cfi_offset rdi, -64 ⟪╋⟫ .cfi_offset rsi, -24 - .cfi_offset rsi, -56 ⟪╋⟫ .cfi_offset xmm6, -64 - .cfi_offset r12, -48 ⟪╋⟫ .cfi_offset xmm7, -48 - .cfi_offset r13, -40 ⟪┫  - .cfi_offset r14, -32 ⟪┫  - .cfi_offset r15, -24 ⟪┫  - .cfi_offset xmm6, -112 ⟪┫  - .cfi_offset xmm7, -96 ⟪┫  - mov r13, r8 ⟪┫  - mov r8, qword ptr [rcx + 8] ⟪╋⟫ mov r10, qword ptr [rcx + 8] - test r8, r8 ⟪╋⟫ test r10, r10 - je .LBB0_15 ⟪╋⟫ je .LBB0_11 -# %bb.1: # %L13.preheader ⟪╋⟫# %bb.1: # %L12.lr.ph - mov r15, qword ptr [r13] ⟪╋⟫ mov r8, qword ptr [r8] - mov r11, qword ptr [r13 + 8] ⟪╋⟫ mov r9, qword ptr [r9] - mov r10, qword ptr [r9] ⟪╋⟫ mov r11, qword ptr [rcx] - mov r14, qword ptr [r9 + 8] ⟪┫  - mov qword ptr [rbp - 120], rcx # 8-byte Spill ⟪┫  - mov rbx, qword ptr [rcx] ⟪┫  - cmp r11, r8 ⟪╋⟫ cmp r10, 16 - mov rax, r8 ⟪┫  - cmovb rax, r11 ⟪┫  - mov rsi, r14 ⟪┫  - sar rsi, 63 ⟪┫  - and rsi, r14 ⟪┫  - mov rdi, r14 ⟪┫  - sub rdi, rsi ⟪┫  - test rsi, rsi ⟪┫  - mov r12, -1 ⟪┫  - cmovns r12, rsi ⟪┫  - inc r12 ⟪┫  - imul r12, rdi ⟪┫  - cmp rax, r12 ⟪┫  - cmovb r12, rax ⟪┫  - cmp r12, r8 ⟪┫  - cmovae r12, r8 ⟪┫  - mov edi, 1 ⟪┫  - test r12, r12 ⟪┫  - je .LBB0_2 ⟪╋⟫ jae .LBB0_3 -# %bb.3: # %idxend21.prehe…⟪┫  - cmp r12, 16 ⟪┫  - jae .LBB0_4 ⟪┫  -.LBB0_6: # %scalar.ph ⟪┫  - dec rdi ⟪┫  - mov rax, rdi ⟪┫  - .p2align 4, 0x90 ⟪┫  -.LBB0_7: # %idxend21 ⟪┫  - # =>This Inner Lo…⟪┫  - mov rcx, qword ptr [r15 + 8*rax] ⟪┫  - imul rcx, rdx ⟪┫  - add rcx, qword ptr [r10 + 8*rax] ⟪┫  - mov qword ptr [rbx + 8*rax], rcx ⟪┫  - inc rax ⟪┫  - cmp rax, r12 ⟪┫  - jb .LBB0_7 ⟪┫  -# %bb.8: # %main.exit.sele…⟪┫  - lea rdi, [rax + 1] ⟪┫  -.LBB0_9: # %main.exit.sele…⟪┫  - cmp rax, r8 ⟪┫  - mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫  - jae .LBB0_15 ⟪┫  -  ┣⟫# %bb.2: -  ┣⟫ xor eax, eax - jmp .LBB0_10  ┃  jmp .LBB0_10 -.LBB0_2: ⟪┫  - mov rcx, qword ptr [rbp - 120] # 8-byte Reloa…⟪┫  -.LBB0_10: # %main.pseudo.ex…⟪╋⟫.LBB0_3:  # %vector.ph - lea rax, [r8 + 1] ⟪┫  - .p2align 4, 0x90 ⟪┫  -.LBB0_11: # %L13.postloop ⟪┫  - # =>This Inner Lo…⟪┫  - lea rsi, [rdi - 1] ⟪┫  - cmp rsi, r11 ⟪┫  - jae .LBB0_25 ⟪┫  -# %bb.12: # %idxend.postloo…⟪┫  - # in Loop: Head…⟪┫  - cmp rsi, r14 ⟪┫  - jae .LBB0_26 ⟪┫  -# %bb.13: # %idxend12.postl…⟪┫  - # in Loop: Head…⟪┫  - cmp rsi, r8 ⟪┫  - jae .LBB0_27 ⟪┫  -# %bb.14: # %idxend21.postl…⟪┫  - # in Loop: Head…⟪┫  - mov rsi, qword ptr [r15 + 8*rdi - 8] ⟪┫  - imul rsi, rdx ⟪┫  - add rsi, qword ptr [r10 + 8*rdi - 8] ⟪┫  - mov qword ptr [rbx + 8*rdi - 8], rsi ⟪┫  - inc rdi ⟪┫  - cmp rax, rdi ⟪┫  - jne .LBB0_11 ⟪┫  -.LBB0_15: # %L31 ⟪┫  - vmovaps xmm6, xmmword ptr [rbp - 96] # 16-byte …⟪┫  - vmovaps xmm7, xmmword ptr [rbp - 80] # 16-byte …⟪┫  - lea rsp, [rbp - 56] ⟪┫  - pop rbx ⟪┫  - pop rdi ⟪┫  - pop rsi ⟪┫  - pop r12 ⟪┫  - pop r13 ⟪┫  - pop r14 ⟪┫  - pop r15 ⟪┫  - pop rbp ⟪┫  - vzeroupper ⟪┫  - ret ⟪┫  -.LBB0_4: # %vector.memchec…⟪┫  - mov qword ptr [rbp - 104], r13 # 8-byte Spill ⟪┫  - mov qword ptr [rbp - 112], r9 # 8-byte Spill ⟪┫  - lea rsi, [rbx + 8*r12] ⟪┫  - lea rax, [r15 + 8*r12] ⟪┫  - lea r13, [r10 + 8*r12] ⟪┫  - cmp rbx, rax ⟪┫  - setb r9b ⟪┫  - cmp r15, rsi ⟪┫  - setb cl ⟪┫  - cmp rbx, r13 ⟪┫  - setb al ⟪┫  - cmp r10, rsi ⟪┫  - setb sil ⟪┫  - test r9b, cl ⟪┫  - jne .LBB0_5 ⟪┫  -# %bb.16: # %vector.memchec…⟪┫  - and al, sil ⟪┫  - mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫  - mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫  - jne .LBB0_6 ⟪┫  -# %bb.17: # %vector.ph ⟪┫  - movabs rsi, 9223372036854775792 ⟪┫  - and rsi, r12 ⟪┫  -  ┣⟫ mov rax, r10 -  ┣⟫ and rax, -16 - vmovq xmm0, rdx  ┃  vmovq xmm0, rdx - vpbroadcastq ymm0, xmm0  ┃  vpbroadcastq ymm0, xmm0 - lea rcx, [rsi - 16] ⟪╋⟫ lea rcx, [rax - 16] - mov rax, rcx ⟪╋⟫ mov rsi, rcx - shr rax, 4 ⟪╋⟫ shr rsi, 4 - inc rax ⟪╋⟫ inc rsi - vpsrlq ymm1, ymm0, 32  ┃  vpsrlq ymm1, ymm0, 32 - test rcx, rcx  ┃  test rcx, rcx - je .LBB0_18 ⟪╋⟫ je .LBB0_4 -# %bb.19: # %vector.ph.new ⟪╋⟫# %bb.5:  # %vector.ph.new - mov r13, rax ⟪┫  - and r13, -2 ⟪╋⟫ and rdi, -2 - xor edi, edi ⟪┫  -  ┣⟫ mov rdi, rsi -  ┣⟫ xor ecx, ecx - .p2align 4, 0x90  ┃  .p2align 4, 0x90 -.LBB0_20: # %vector.body ⟪╋⟫.LBB0_6:  # %vector.body - # =>This Inner Lo… ┃  # =>This Inner Lo… - vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] - vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] - vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] - vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] - vpmuludq ymm6, ymm2, ymm1  ┃  vpmuludq ymm6, ymm2, ymm1 - vpsrlq ymm7, ymm2, 32  ┃  vpsrlq ymm7, ymm2, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm2, ymm2, ymm0  ┃  vpmuludq ymm2, ymm2, ymm0 - vpaddq ymm2, ymm2, ymm6  ┃  vpaddq ymm2, ymm2, ymm6 - vpmuludq ymm6, ymm3, ymm1  ┃  vpmuludq ymm6, ymm3, ymm1 - vpsrlq ymm7, ymm3, 32  ┃  vpsrlq ymm7, ymm3, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm3, ymm3, ymm0  ┃  vpmuludq ymm3, ymm3, ymm0 - vpaddq ymm3, ymm3, ymm6  ┃  vpaddq ymm3, ymm3, ymm6 - vpmuludq ymm6, ymm4, ymm1  ┃  vpmuludq ymm6, ymm4, ymm1 - vpsrlq ymm7, ymm4, 32  ┃  vpsrlq ymm7, ymm4, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm4, ymm4, ymm0  ┃  vpmuludq ymm4, ymm4, ymm0 - vpaddq ymm4, ymm4, ymm6  ┃  vpaddq ymm4, ymm4, ymm6 - vpmuludq ymm6, ymm5, ymm1  ┃  vpmuludq ymm6, ymm5, ymm1 - vpsrlq ymm7, ymm5, 32  ┃  vpsrlq ymm7, ymm5, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm5, ymm5, ymm0  ┃  vpmuludq ymm5, ymm5, ymm0 - vpaddq ymm5, ymm5, ymm6  ┃  vpaddq ymm5, ymm5, ymm6 - vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx] - vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 32] - vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 64] - vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 96] - vmovdqu ymmword ptr [rbx + 8*rdi], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm2 - vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm3 - vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm4 - vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm5 - vmovdqu ymm2, ymmword ptr [r15 + 8*rdi + 128] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx + 128] - vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 160] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 160] - vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 192] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 192] - vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 224] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 224] - vpmuludq ymm6, ymm2, ymm1  ┃  vpmuludq ymm6, ymm2, ymm1 - vpsrlq ymm7, ymm2, 32  ┃  vpsrlq ymm7, ymm2, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm2, ymm2, ymm0  ┃  vpmuludq ymm2, ymm2, ymm0 - vpaddq ymm2, ymm2, ymm6  ┃  vpaddq ymm2, ymm2, ymm6 - vpmuludq ymm6, ymm3, ymm1  ┃  vpmuludq ymm6, ymm3, ymm1 - vpsrlq ymm7, ymm3, 32  ┃  vpsrlq ymm7, ymm3, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm3, ymm3, ymm0  ┃  vpmuludq ymm3, ymm3, ymm0 - vpaddq ymm3, ymm3, ymm6  ┃  vpaddq ymm3, ymm3, ymm6 - vpmuludq ymm6, ymm4, ymm1  ┃  vpmuludq ymm6, ymm4, ymm1 - vpsrlq ymm7, ymm4, 32  ┃  vpsrlq ymm7, ymm4, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm4, ymm4, ymm0  ┃  vpmuludq ymm4, ymm4, ymm0 - vpaddq ymm4, ymm4, ymm6  ┃  vpaddq ymm4, ymm4, ymm6 - vpmuludq ymm6, ymm5, ymm1  ┃  vpmuludq ymm6, ymm5, ymm1 - vpsrlq ymm7, ymm5, 32  ┃  vpsrlq ymm7, ymm5, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm5, ymm5, ymm0  ┃  vpmuludq ymm5, ymm5, ymm0 - vpaddq ymm5, ymm5, ymm6  ┃  vpaddq ymm5, ymm5, ymm6 - vpaddq ymm2, ymm2, ymmword ptr [r10 + 8*rdi + 128] ⟪╋⟫ vpaddq ymm2, ymm2, ymmword ptr [r9 + 8*rcx + 128] - vpaddq ymm3, ymm3, ymmword ptr [r10 + 8*rdi + 160] ⟪╋⟫ vpaddq ymm3, ymm3, ymmword ptr [r9 + 8*rcx + 160] - vpaddq ymm4, ymm4, ymmword ptr [r10 + 8*rdi + 192] ⟪╋⟫ vpaddq ymm4, ymm4, ymmword ptr [r9 + 8*rcx + 192] - vpaddq ymm5, ymm5, ymmword ptr [r10 + 8*rdi + 224] ⟪╋⟫ vpaddq ymm5, ymm5, ymmword ptr [r9 + 8*rcx + 224] - vmovdqu ymmword ptr [rbx + 8*rdi + 128], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 128], ymm2 - vmovdqu ymmword ptr [rbx + 8*rdi + 160], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 160], ymm3 - vmovdqu ymmword ptr [rbx + 8*rdi + 192], ymm4 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 192], ymm4 - vmovdqu ymmword ptr [rbx + 8*rdi + 224], ymm5 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 224], ymm5 - add rdi, 32 ⟪╋⟫ add rcx, 32 - add r13, -2 ⟪╋⟫ add rdi, -2 - jne .LBB0_20 ⟪╋⟫ jne .LBB0_6 - jmp .LBB0_21 ⟪┫  -.LBB0_5: ⟪┫  - mov r9, qword ptr [rbp - 112] # 8-byte Reloa…⟪┫  - mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫  - jmp .LBB0_6 ⟪┫  -.LBB0_18: ⟪┫  - xor edi, edi ⟪┫  -.LBB0_21: # %middle.block.u…⟪╋⟫# %bb.7:  # %middle.block.u… - test al, 1 ⟪╋⟫ test sil, 1 - mov r13, qword ptr [rbp - 104] # 8-byte Reloa…⟪┫  - je .LBB0_23 ⟪╋⟫ je .LBB0_9 -# %bb.22: # %vector.body.ep…⟪╋⟫.LBB0_8:  # %vector.body.ep… - vmovdqu ymm2, ymmword ptr [r15 + 8*rdi] ⟪╋⟫ vmovdqu ymm2, ymmword ptr [r8 + 8*rcx] - vmovdqu ymm3, ymmword ptr [r15 + 8*rdi + 32] ⟪╋⟫ vmovdqu ymm3, ymmword ptr [r8 + 8*rcx + 32] - vmovdqu ymm4, ymmword ptr [r15 + 8*rdi + 64] ⟪╋⟫ vmovdqu ymm4, ymmword ptr [r8 + 8*rcx + 64] - vmovdqu ymm5, ymmword ptr [r15 + 8*rdi + 96] ⟪╋⟫ vmovdqu ymm5, ymmword ptr [r8 + 8*rcx + 96] - vpmuludq ymm6, ymm2, ymm1  ┃  vpmuludq ymm6, ymm2, ymm1 - vpsrlq ymm7, ymm2, 32  ┃  vpsrlq ymm7, ymm2, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm2, ymm2, ymm0  ┃  vpmuludq ymm2, ymm2, ymm0 - vpaddq ymm2, ymm2, ymm6  ┃  vpaddq ymm2, ymm2, ymm6 - vpmuludq ymm6, ymm3, ymm1  ┃  vpmuludq ymm6, ymm3, ymm1 - vpsrlq ymm7, ymm3, 32  ┃  vpsrlq ymm7, ymm3, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm3, ymm3, ymm0  ┃  vpmuludq ymm3, ymm3, ymm0 - vpaddq ymm3, ymm3, ymm6  ┃  vpaddq ymm3, ymm3, ymm6 - vpmuludq ymm6, ymm4, ymm1  ┃  vpmuludq ymm6, ymm4, ymm1 - vpsrlq ymm7, ymm4, 32  ┃  vpsrlq ymm7, ymm4, 32 - vpmuludq ymm7, ymm7, ymm0  ┃  vpmuludq ymm7, ymm7, ymm0 - vpaddq ymm6, ymm6, ymm7  ┃  vpaddq ymm6, ymm6, ymm7 - vpsllq ymm6, ymm6, 32  ┃  vpsllq ymm6, ymm6, 32 - vpmuludq ymm4, ymm4, ymm0  ┃  vpmuludq ymm4, ymm4, ymm0 - vpaddq ymm4, ymm4, ymm6  ┃  vpaddq ymm4, ymm4, ymm6 - vpmuludq ymm1, ymm5, ymm1  ┃  vpmuludq ymm1, ymm5, ymm1 - vpsrlq ymm6, ymm5, 32  ┃  vpsrlq ymm6, ymm5, 32 - vpmuludq ymm6, ymm6, ymm0  ┃  vpmuludq ymm6, ymm6, ymm0 - vpaddq ymm1, ymm1, ymm6  ┃  vpaddq ymm1, ymm1, ymm6 - vpsllq ymm1, ymm1, 32  ┃  vpsllq ymm1, ymm1, 32 - vpmuludq ymm0, ymm5, ymm0  ┃  vpmuludq ymm0, ymm5, ymm0 - vpaddq ymm0, ymm0, ymm1  ┃  vpaddq ymm0, ymm0, ymm1 - vpaddq ymm1, ymm2, ymmword ptr [r10 + 8*rdi] ⟪╋⟫ vpaddq ymm1, ymm2, ymmword ptr [r9 + 8*rcx] - vpaddq ymm2, ymm3, ymmword ptr [r10 + 8*rdi + 32] ⟪╋⟫ vpaddq ymm2, ymm3, ymmword ptr [r9 + 8*rcx + 32] - vpaddq ymm3, ymm4, ymmword ptr [r10 + 8*rdi + 64] ⟪╋⟫ vpaddq ymm3, ymm4, ymmword ptr [r9 + 8*rcx + 64] - vpaddq ymm0, ymm0, ymmword ptr [r10 + 8*rdi + 96] ⟪╋⟫ vpaddq ymm0, ymm0, ymmword ptr [r9 + 8*rcx + 96] - vmovdqu ymmword ptr [rbx + 8*rdi], ymm1 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx], ymm1 - vmovdqu ymmword ptr [rbx + 8*rdi + 32], ymm2 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 32], ymm2 - vmovdqu ymmword ptr [rbx + 8*rdi + 64], ymm3 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 64], ymm3 - vmovdqu ymmword ptr [rbx + 8*rdi + 96], ymm0 ⟪╋⟫ vmovdqu ymmword ptr [r11 + 8*rcx + 96], ymm0 -.LBB0_23: # %middle.block ⟪╋⟫.LBB0_9:  # %middle.block - lea rdi, [rsi + 1] ⟪┫  - cmp r12, rsi ⟪╋⟫ cmp r10, rax - jne .LBB0_6 ⟪╋⟫ je .LBB0_11 -# %bb.24: ⟪┫  - mov rax, r12 ⟪┫  - jmp .LBB0_9 ⟪┫  -.LBB0_25: # %oob ⟪╋⟫.LBB0_10: # %L12 - mov eax, 16 ⟪┫  - movabs r11, offset ___chkstk_ms ⟪┫  - call r11 ⟪┫  - sub rsp, rax ⟪┫  - mov rdx, rsp ⟪┫  - mov qword ptr [rdx], rdi ⟪┫  - sub rsp, 32 ⟪┫  - movabs rax, offset ijl_bounds_error_ints ⟪┫  - mov r8d, 1 ⟪┫  - mov rcx, r13 ⟪┫  - vzeroupper ⟪┫  - call rax ⟪┫  -.LBB0_26: # %oob10 ⟪╋⟫.LBB0_11: # %L32 - mov eax, 16 ⟪┫  - movabs r11, offset ___chkstk_ms ⟪┫  - call r11 ⟪┫  - sub rsp, rax ⟪┫  - mov rdx, rsp ⟪┫  - mov qword ptr [rdx], rdi ⟪┫  - sub rsp, 32 ⟪╋⟫ add rsp, 32 - movabs rax, offset ijl_bounds_error_ints ⟪┫  - mov r8d, 1 ⟪┫  - mov rcx, r9 ⟪┫  - vzeroupper ⟪┫  - call rax ⟪┫  -.LBB0_27: # %oob19 ⟪┫  - mov eax, 16 ⟪┫  - movabs r11, offset ___chkstk_ms ⟪┫  - call r11 ⟪┫  - sub rsp, rax ⟪┫  - mov rdx, rsp ⟪┫  - mov qword ptr [rdx], rdi ⟪┫  - sub rsp, 32 ⟪┫  - movabs rax, offset ijl_bounds_error_ints ⟪┫  - mov r8d, 1 ⟪┫  -  ┣⟫ .p2align 4, 0x90 -  ┣⟫ # =>This Inner Lo… -  ┣⟫ mov rcx, qword ptr [r8 + 8*rax] -  ┣⟫ imul rcx, rdx -  ┣⟫ add rcx, qword ptr [r9 + 8*rax] -  ┣⟫ mov qword ptr [r11 + 8*rax], rcx -  ┣⟫ inc rax -  ┣⟫ cmp r10, rax -  ┣⟫ jne .LBB0_10 -  ┣⟫ vmovaps xmm6, xmmword ptr [rbp - 48] # 16-byte … -  ┣⟫ vmovaps xmm7, xmmword ptr [rbp - 32] # 16-byte … -  ┣⟫ pop rdi -  ┣⟫ pop rsi -  ┣⟫ pop rbp - vzeroupper  ┃  vzeroupper - call rax ⟪┫  -  ┣⟫ ret -  ┣⟫.LBB0_4: -  ┣⟫ xor ecx, ecx -  ┣⟫ test sil, 1 -  ┣⟫ je .LBB0_9 -  ┣⟫ jmp .LBB0_8 -.Lfunc_end0:  ┃ .Lfunc_end0: - .size saxpy, .Lfunc_end0-saxpy ⟪╋⟫ .size saxpy_simd, .Lfunc_end0-saxpy_simd - .cfi_endproc  ┃  .cfi_endproc - # -- End function  ┃  # -- End function - .section ".note.GNU-stack","",@progbits  ┃  .section ".note.GNU-stack","",@progbits -  ┃  \ No newline at end of file diff --git a/test/references/saxpy_x86_COLOR.ll b/test/references/saxpy_x86_COLOR.ll deleted file mode 100644 index 7b7329a..0000000 --- a/test/references/saxpy_x86_COLOR.ll +++ /dev/null @@ -1,324 +0,0 @@ -; Function Attrs: uwtable  ┃ ; Function Attrs: uwtable -define void @saxpy({}* noundef nonnull align 16 dereferen…⟪╋⟫define void @saxpy_simd({}* noundef nonnull align 16 dere… -top:  ┃ top: - %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }*  ┃  %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* - %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… ┃  %arraylen_ptr = getelementptr inbounds { i8*, i64, i16,… - %arraylen = load i64, i64* %arraylen_ptr, align 8  ┃  %arraylen = load i64, i64* %arraylen_ptr, align 8 - %.not.not = icmp eq i64 %arraylen, 0 ⟪╋⟫ %.not = icmp eq i64 %arraylen, 0 - br i1 %.not.not, label %L31, label %L13.preheader ⟪╋⟫ br i1 %.not, label %L32, label %L12.lr.ph - ⟪┫  -L13.preheader: ; preds…⟪┫  - %5 = bitcast {}* %2 to { i8*, i64, i16, i16, i32 }* ⟪┫  - %arraylen_ptr5 = getelementptr inbounds { i8*, i64, i16…⟪┫  - %arraylen6 = load i64, i64* %arraylen_ptr5, align 8 ⟪┫  - %6 = bitcast {}* %3 to { i8*, i64, i16, i16, i32 }* ⟪┫  - %arraylen_ptr7 = getelementptr inbounds { i8*, i64, i16…⟪┫  - %arraylen8 = load i64, i64* %arraylen_ptr7, align 8 ⟪┫  - %7 = bitcast {}* %2 to i64** ⟪┫  - %arrayptr29 = load i64*, i64** %7, align 8 ⟪┫  - %8 = bitcast {}* %3 to i64** ⟪┫  - %arrayptr1430 = load i64*, i64** %8, align 8 ⟪┫  - %9 = bitcast {}* %0 to i64** ⟪┫  - %arrayptr2331 = load i64*, i64** %9, align 8 ⟪┫  - %umin = call i64 @llvm.umin.i64(i64 %arraylen6, i64 %ar…⟪┫  - %smin = call i64 @llvm.smin.i64(i64 %arraylen8, i64 0) ⟪┫  - %10 = sub i64 %arraylen8, %smin ⟪┫  - %smax = call i64 @llvm.smax.i64(i64 %smin, i64 -1) ⟪┫  - %11 = add nsw i64 %smax, 1 ⟪┫  - %12 = mul nuw nsw i64 %10, %11 ⟪┫  - %umin36 = call i64 @llvm.umin.i64(i64 %umin, i64 %12) ⟪┫  - %exit.mainloop.at = call i64 @llvm.umin.i64(i64 %umin36…⟪┫  - %.not = icmp eq i64 %exit.mainloop.at, 0 ⟪┫  - br i1 %.not, label %main.pseudo.exit, label %idxend21.p…⟪┫  - ⟪┫  -idxend21.preheader: ; preds…⟪┫  - %min.iters.check = icmp ult i64 %exit.mainloop.at, 16 ⟪┫  - br i1 %min.iters.check, label %scalar.ph, label %vector…⟪┫  -  ┃  -vector.memcheck: ; preds…⟪┫  - %scevgep = getelementptr i64, i64* %arrayptr2331, i64 %…⟪┫  - %scevgep58 = getelementptr i64, i64* %arrayptr29, i64 %…⟪┫  - %scevgep61 = getelementptr i64, i64* %arrayptr1430, i64…⟪┫  - %bound0 = icmp ult i64* %arrayptr2331, %scevgep58 ⟪┫  - %bound1 = icmp ult i64* %arrayptr29, %scevgep ⟪┫  - %found.conflict = and i1 %bound0, %bound1 ⟪┫  - %bound063 = icmp ult i64* %arrayptr2331, %scevgep61 ⟪┫  - %bound164 = icmp ult i64* %arrayptr1430, %scevgep ⟪┫  - %found.conflict65 = and i1 %bound063, %bound164 ⟪┫  - %conflict.rdx = or i1 %found.conflict, %found.conflict6…⟪┫  - br i1 %conflict.rdx, label %scalar.ph, label %vector.ph ⟪╋⟫ br i1 %min.iters.check, label %scalar.ph, label %vector… -  ┣⟫L12.lr.ph: ; preds… -  ┣⟫ %5 = bitcast {}* %2 to i64** -  ┣⟫ %arrayptr8 = load i64*, i64** %5, align 8 -  ┣⟫ %6 = bitcast {}* %3 to i64** -  ┣⟫ %arrayptr29 = load i64*, i64** %6, align 8 -  ┣⟫ %7 = bitcast {}* %0 to i64** -  ┣⟫ %arrayptr510 = load i64*, i64** %7, align 8 -  ┣⟫ %min.iters.check = icmp ult i64 %arraylen, 16 -  ┃  -vector.ph: ; preds…⟪╋⟫vector.ph: ; preds… - %n.vec = and i64 %exit.mainloop.at, 9223372036854775792 ⟪╋⟫ %n.vec = and i64 %arraylen, 9223372036854775792 - %ind.end = or i64 %n.vec, 1 ⟪┫  - %broadcast.splatinsert = insertelement <4 x i64> poison… ┃  %broadcast.splatinsert = insertelement <4 x i64> poison… - %broadcast.splat = shufflevector <4 x i64> %broadcast.s… ┃  %broadcast.splat = shufflevector <4 x i64> %broadcast.s… - %13 = add nsw i64 %n.vec, -16 ⟪╋⟫ %8 = add nsw i64 %n.vec, -16 - %14 = lshr exact i64 %13, 4 ⟪╋⟫ %9 = lshr exact i64 %8, 4 - %15 = add nuw nsw i64 %14, 1 ⟪╋⟫ %10 = add nuw nsw i64 %9, 1 - %xtraiter = and i64 %15, 1 ⟪╋⟫ %xtraiter = and i64 %10, 1 - %16 = icmp eq i64 %13, 0 ⟪╋⟫ %11 = icmp eq i64 %8, 0 - br i1 %16, label %middle.block.unr-lcssa, label %vector…⟪╋⟫ br i1 %11, label %middle.block.unr-lcssa, label %vector… -  ┃  -vector.ph.new: ; preds… ┃ vector.ph.new: ; preds… - %unroll_iter = and i64 %15, 2305843009213693950 ⟪╋⟫ %unroll_iter = and i64 %10, 2305843009213693950 - br label %vector.body  ┃  br label %vector.body -  ┃  -vector.body: ; preds… ┃ vector.body: ; preds… - %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… ┃  %index = phi i64 [ 0, %vector.ph.new ], [ %index.next.1… - %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… ┃  %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.next.1… - %17 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %12 = getelementptr inbounds i64, i64* %arrayptr8, i64 … - %18 = bitcast i64* %17 to <4 x i64>* ⟪╋⟫ %13 = bitcast i64* %12 to <4 x i64>* - %wide.load = load <4 x i64>, <4 x i64>* %18, align 8 ⟪╋⟫ %wide.load = load <4 x i64>, <4 x i64>* %13, align 8 - %19 = getelementptr inbounds i64, i64* %17, i64 4 ⟪╋⟫ %14 = getelementptr inbounds i64, i64* %12, i64 4 - %20 = bitcast i64* %19 to <4 x i64>* ⟪╋⟫ %15 = bitcast i64* %14 to <4 x i64>* - %wide.load66 = load <4 x i64>, <4 x i64>* %20, align 8 ⟪╋⟫ %wide.load13 = load <4 x i64>, <4 x i64>* %15, align 8 - %21 = getelementptr inbounds i64, i64* %17, i64 8 ⟪╋⟫ %16 = getelementptr inbounds i64, i64* %12, i64 8 - %22 = bitcast i64* %21 to <4 x i64>* ⟪╋⟫ %17 = bitcast i64* %16 to <4 x i64>* - %wide.load67 = load <4 x i64>, <4 x i64>* %22, align 8 ⟪╋⟫ %wide.load14 = load <4 x i64>, <4 x i64>* %17, align 8 - %23 = getelementptr inbounds i64, i64* %17, i64 12 ⟪╋⟫ %18 = getelementptr inbounds i64, i64* %12, i64 12 - %24 = bitcast i64* %23 to <4 x i64>* ⟪╋⟫ %19 = bitcast i64* %18 to <4 x i64>* - %wide.load68 = load <4 x i64>, <4 x i64>* %24, align 8 ⟪╋⟫ %wide.load15 = load <4 x i64>, <4 x i64>* %19, align 8 - %25 = mul <4 x i64> %wide.load, %broadcast.splat ⟪╋⟫ %20 = mul <4 x i64> %wide.load, %broadcast.splat - %26 = mul <4 x i64> %wide.load66, %broadcast.splat ⟪╋⟫ %21 = mul <4 x i64> %wide.load13, %broadcast.splat - %27 = mul <4 x i64> %wide.load67, %broadcast.splat ⟪╋⟫ %22 = mul <4 x i64> %wide.load14, %broadcast.splat - %28 = mul <4 x i64> %wide.load68, %broadcast.splat ⟪╋⟫ %23 = mul <4 x i64> %wide.load15, %broadcast.splat - %29 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %24 = getelementptr inbounds i64, i64* %arrayptr29, i64… - %30 = bitcast i64* %29 to <4 x i64>* ⟪╋⟫ %25 = bitcast i64* %24 to <4 x i64>* - %wide.load75 = load <4 x i64>, <4 x i64>* %30, align 8 ⟪╋⟫ %wide.load22 = load <4 x i64>, <4 x i64>* %25, align 8 - %31 = getelementptr inbounds i64, i64* %29, i64 4 ⟪╋⟫ %26 = getelementptr inbounds i64, i64* %24, i64 4 - %32 = bitcast i64* %31 to <4 x i64>* ⟪╋⟫ %27 = bitcast i64* %26 to <4 x i64>* - %wide.load76 = load <4 x i64>, <4 x i64>* %32, align 8 ⟪╋⟫ %wide.load23 = load <4 x i64>, <4 x i64>* %27, align 8 - %33 = getelementptr inbounds i64, i64* %29, i64 8 ⟪╋⟫ %28 = getelementptr inbounds i64, i64* %24, i64 8 - %34 = bitcast i64* %33 to <4 x i64>* ⟪╋⟫ %29 = bitcast i64* %28 to <4 x i64>* - %wide.load77 = load <4 x i64>, <4 x i64>* %34, align 8 ⟪╋⟫ %wide.load24 = load <4 x i64>, <4 x i64>* %29, align 8 - %35 = getelementptr inbounds i64, i64* %29, i64 12 ⟪╋⟫ %30 = getelementptr inbounds i64, i64* %24, i64 12 - %36 = bitcast i64* %35 to <4 x i64>* ⟪╋⟫ %31 = bitcast i64* %30 to <4 x i64>* - %wide.load78 = load <4 x i64>, <4 x i64>* %36, align 8 ⟪╋⟫ %wide.load25 = load <4 x i64>, <4 x i64>* %31, align 8 - %37 = add <4 x i64> %wide.load75, %25 ⟪╋⟫ %32 = add <4 x i64> %wide.load22, %20 - %38 = add <4 x i64> %wide.load76, %26 ⟪╋⟫ %33 = add <4 x i64> %wide.load23, %21 - %39 = add <4 x i64> %wide.load77, %27 ⟪╋⟫ %34 = add <4 x i64> %wide.load24, %22 - %40 = add <4 x i64> %wide.load78, %28 ⟪╋⟫ %35 = add <4 x i64> %wide.load25, %23 - %41 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %36 = getelementptr inbounds i64, i64* %arrayptr510, i6… - %42 = bitcast i64* %41 to <4 x i64>* ⟪╋⟫ %37 = bitcast i64* %36 to <4 x i64>* - store <4 x i64> %37, <4 x i64>* %42, align 8 ⟪╋⟫ store <4 x i64> %32, <4 x i64>* %37, align 8 - %43 = getelementptr inbounds i64, i64* %41, i64 4 ⟪╋⟫ %38 = getelementptr inbounds i64, i64* %36, i64 4 - %44 = bitcast i64* %43 to <4 x i64>* ⟪╋⟫ %39 = bitcast i64* %38 to <4 x i64>* - store <4 x i64> %38, <4 x i64>* %44, align 8 ⟪╋⟫ store <4 x i64> %33, <4 x i64>* %39, align 8 - %45 = getelementptr inbounds i64, i64* %41, i64 8 ⟪╋⟫ %40 = getelementptr inbounds i64, i64* %36, i64 8 - %46 = bitcast i64* %45 to <4 x i64>* ⟪╋⟫ %41 = bitcast i64* %40 to <4 x i64>* - store <4 x i64> %39, <4 x i64>* %46, align 8 ⟪╋⟫ store <4 x i64> %34, <4 x i64>* %41, align 8 - %47 = getelementptr inbounds i64, i64* %41, i64 12 ⟪╋⟫ %42 = getelementptr inbounds i64, i64* %36, i64 12 - %48 = bitcast i64* %47 to <4 x i64>* ⟪╋⟫ %43 = bitcast i64* %42 to <4 x i64>* - store <4 x i64> %40, <4 x i64>* %48, align 8 ⟪╋⟫ store <4 x i64> %35, <4 x i64>* %43, align 8 - %index.next = or i64 %index, 16  ┃  %index.next = or i64 %index, 16 - %49 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %44 = getelementptr inbounds i64, i64* %arrayptr8, i64 … - %50 = bitcast i64* %49 to <4 x i64>* ⟪╋⟫ %45 = bitcast i64* %44 to <4 x i64>* - %wide.load.1 = load <4 x i64>, <4 x i64>* %50, align 8 ⟪╋⟫ %wide.load.1 = load <4 x i64>, <4 x i64>* %45, align 8 - %51 = getelementptr inbounds i64, i64* %49, i64 4 ⟪╋⟫ %46 = getelementptr inbounds i64, i64* %44, i64 4 - %52 = bitcast i64* %51 to <4 x i64>* ⟪╋⟫ %47 = bitcast i64* %46 to <4 x i64>* - %wide.load66.1 = load <4 x i64>, <4 x i64>* %52, align …⟪╋⟫ %wide.load13.1 = load <4 x i64>, <4 x i64>* %47, align … - %53 = getelementptr inbounds i64, i64* %49, i64 8 ⟪╋⟫ %48 = getelementptr inbounds i64, i64* %44, i64 8 - %54 = bitcast i64* %53 to <4 x i64>* ⟪╋⟫ %49 = bitcast i64* %48 to <4 x i64>* - %wide.load67.1 = load <4 x i64>, <4 x i64>* %54, align …⟪╋⟫ %wide.load14.1 = load <4 x i64>, <4 x i64>* %49, align … - %55 = getelementptr inbounds i64, i64* %49, i64 12 ⟪╋⟫ %50 = getelementptr inbounds i64, i64* %44, i64 12 - %56 = bitcast i64* %55 to <4 x i64>* ⟪╋⟫ %51 = bitcast i64* %50 to <4 x i64>* - %wide.load68.1 = load <4 x i64>, <4 x i64>* %56, align …⟪╋⟫ %wide.load15.1 = load <4 x i64>, <4 x i64>* %51, align … - %57 = mul <4 x i64> %wide.load.1, %broadcast.splat ⟪╋⟫ %52 = mul <4 x i64> %wide.load.1, %broadcast.splat - %58 = mul <4 x i64> %wide.load66.1, %broadcast.splat ⟪╋⟫ %53 = mul <4 x i64> %wide.load13.1, %broadcast.splat - %59 = mul <4 x i64> %wide.load67.1, %broadcast.splat ⟪╋⟫ %54 = mul <4 x i64> %wide.load14.1, %broadcast.splat - %60 = mul <4 x i64> %wide.load68.1, %broadcast.splat ⟪╋⟫ %55 = mul <4 x i64> %wide.load15.1, %broadcast.splat - %61 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %56 = getelementptr inbounds i64, i64* %arrayptr29, i64… - %62 = bitcast i64* %61 to <4 x i64>* ⟪╋⟫ %57 = bitcast i64* %56 to <4 x i64>* - %wide.load75.1 = load <4 x i64>, <4 x i64>* %62, align …⟪╋⟫ %wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align … - %63 = getelementptr inbounds i64, i64* %61, i64 4 ⟪╋⟫ %58 = getelementptr inbounds i64, i64* %56, i64 4 - %64 = bitcast i64* %63 to <4 x i64>* ⟪╋⟫ %59 = bitcast i64* %58 to <4 x i64>* - %wide.load76.1 = load <4 x i64>, <4 x i64>* %64, align …⟪╋⟫ %wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align … - %65 = getelementptr inbounds i64, i64* %61, i64 8 ⟪╋⟫ %60 = getelementptr inbounds i64, i64* %56, i64 8 - %66 = bitcast i64* %65 to <4 x i64>* ⟪╋⟫ %61 = bitcast i64* %60 to <4 x i64>* - %wide.load77.1 = load <4 x i64>, <4 x i64>* %66, align …⟪╋⟫ %wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align … - %67 = getelementptr inbounds i64, i64* %61, i64 12 ⟪╋⟫ %62 = getelementptr inbounds i64, i64* %56, i64 12 - %68 = bitcast i64* %67 to <4 x i64>* ⟪╋⟫ %63 = bitcast i64* %62 to <4 x i64>* - %wide.load78.1 = load <4 x i64>, <4 x i64>* %68, align …⟪╋⟫ %wide.load25.1 = load <4 x i64>, <4 x i64>* %63, align … - %69 = add <4 x i64> %wide.load75.1, %57 ⟪╋⟫ %64 = add <4 x i64> %wide.load22.1, %52 - %70 = add <4 x i64> %wide.load76.1, %58 ⟪╋⟫ %65 = add <4 x i64> %wide.load23.1, %53 - %71 = add <4 x i64> %wide.load77.1, %59 ⟪╋⟫ %66 = add <4 x i64> %wide.load24.1, %54 - %72 = add <4 x i64> %wide.load78.1, %60 ⟪╋⟫ %67 = add <4 x i64> %wide.load25.1, %55 - %73 = getelementptr inbounds i64, i64* %arrayptr2331, i…⟪╋⟫ %68 = getelementptr inbounds i64, i64* %arrayptr510, i6… - %74 = bitcast i64* %73 to <4 x i64>* ⟪╋⟫ %69 = bitcast i64* %68 to <4 x i64>* - store <4 x i64> %69, <4 x i64>* %74, align 8 ⟪╋⟫ store <4 x i64> %64, <4 x i64>* %69, align 8 - %75 = getelementptr inbounds i64, i64* %73, i64 4 ⟪╋⟫ %70 = getelementptr inbounds i64, i64* %68, i64 4 - %76 = bitcast i64* %75 to <4 x i64>* ⟪╋⟫ %71 = bitcast i64* %70 to <4 x i64>* - store <4 x i64> %70, <4 x i64>* %76, align 8 ⟪╋⟫ store <4 x i64> %65, <4 x i64>* %71, align 8 - %77 = getelementptr inbounds i64, i64* %73, i64 8 ⟪╋⟫ %72 = getelementptr inbounds i64, i64* %68, i64 8 - %78 = bitcast i64* %77 to <4 x i64>* ⟪╋⟫ %73 = bitcast i64* %72 to <4 x i64>* - store <4 x i64> %71, <4 x i64>* %78, align 8 ⟪╋⟫ store <4 x i64> %66, <4 x i64>* %73, align 8 - %79 = getelementptr inbounds i64, i64* %73, i64 12 ⟪╋⟫ %74 = getelementptr inbounds i64, i64* %68, i64 12 - %80 = bitcast i64* %79 to <4 x i64>* ⟪╋⟫ %75 = bitcast i64* %74 to <4 x i64>* - store <4 x i64> %72, <4 x i64>* %80, align 8 ⟪╋⟫ store <4 x i64> %67, <4 x i64>* %75, align 8 - %index.next.1 = add nuw i64 %index, 32  ┃  %index.next.1 = add nuw i64 %index, 32 - %niter.next.1 = add i64 %niter, 2  ┃  %niter.next.1 = add i64 %niter, 2 - %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter  ┃  %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter - br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… ┃  br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, lab… -  ┃  -middle.block.unr-lcssa: ; preds… ┃ middle.block.unr-lcssa: ; preds… - %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… ┃  %index.unr = phi i64 [ 0, %vector.ph ], [ %index.next.1… - %lcmp.mod.not = icmp eq i64 %xtraiter, 0  ┃  %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %middle.block, label %vector… ┃  br i1 %lcmp.mod.not, label %middle.block, label %vector… -  ┃  -vector.body.epil.preheader: ; preds… ┃ vector.body.epil.preheader: ; preds… - %81 = getelementptr inbounds i64, i64* %arrayptr29, i64…⟪╋⟫ %76 = getelementptr inbounds i64, i64* %arrayptr8, i64 … - %82 = bitcast i64* %81 to <4 x i64>* ⟪╋⟫ %77 = bitcast i64* %76 to <4 x i64>* - %wide.load.epil = load <4 x i64>, <4 x i64>* %82, align…⟪╋⟫ %wide.load.epil = load <4 x i64>, <4 x i64>* %77, align… - %83 = getelementptr inbounds i64, i64* %81, i64 4 ⟪╋⟫ %78 = getelementptr inbounds i64, i64* %76, i64 4 - %84 = bitcast i64* %83 to <4 x i64>* ⟪╋⟫ %79 = bitcast i64* %78 to <4 x i64>* - %wide.load66.epil = load <4 x i64>, <4 x i64>* %84, ali…⟪╋⟫ %wide.load13.epil = load <4 x i64>, <4 x i64>* %79, ali… - %85 = getelementptr inbounds i64, i64* %81, i64 8 ⟪╋⟫ %80 = getelementptr inbounds i64, i64* %76, i64 8 - %86 = bitcast i64* %85 to <4 x i64>* ⟪╋⟫ %81 = bitcast i64* %80 to <4 x i64>* - %wide.load67.epil = load <4 x i64>, <4 x i64>* %86, ali…⟪╋⟫ %wide.load14.epil = load <4 x i64>, <4 x i64>* %81, ali… - %87 = getelementptr inbounds i64, i64* %81, i64 12 ⟪╋⟫ %82 = getelementptr inbounds i64, i64* %76, i64 12 - %88 = bitcast i64* %87 to <4 x i64>* ⟪╋⟫ %83 = bitcast i64* %82 to <4 x i64>* - %wide.load68.epil = load <4 x i64>, <4 x i64>* %88, ali…⟪╋⟫ %wide.load15.epil = load <4 x i64>, <4 x i64>* %83, ali… - %89 = mul <4 x i64> %wide.load.epil, %broadcast.splat ⟪╋⟫ %84 = mul <4 x i64> %wide.load.epil, %broadcast.splat - %90 = mul <4 x i64> %wide.load66.epil, %broadcast.splat ⟪╋⟫ %85 = mul <4 x i64> %wide.load13.epil, %broadcast.splat - %91 = mul <4 x i64> %wide.load67.epil, %broadcast.splat ⟪╋⟫ %86 = mul <4 x i64> %wide.load14.epil, %broadcast.splat - %92 = mul <4 x i64> %wide.load68.epil, %broadcast.splat ⟪╋⟫ %87 = mul <4 x i64> %wide.load15.epil, %broadcast.splat - %93 = getelementptr inbounds i64, i64* %arrayptr1430, i…⟪╋⟫ %88 = getelementptr inbounds i64, i64* %arrayptr29, i64… - %94 = bitcast i64* %93 to <4 x i64>* ⟪╋⟫ %89 = bitcast i64* %88 to <4 x i64>* - %wide.load75.epil = load <4 x i64>, <4 x i64>* %94, ali…⟪╋⟫ %wide.load22.epil = load <4 x i64>, <4 x i64>* %89, ali… - %95 = getelementptr inbounds i64, i64* %93, i64 4 ⟪╋⟫ %90 = getelementptr inbounds i64, i64* %88, i64 4 - %96 = bitcast i64* %95 to <4 x i64>* ⟪╋⟫ %91 = bitcast i64* %90 to <4 x i64>* - %wide.load76.epil = load <4 x i64>, <4 x i64>* %96, ali…⟪╋⟫ %wide.load23.epil = load <4 x i64>, <4 x i64>* %91, ali… - %97 = getelementptr inbounds i64, i64* %93, i64 8 ⟪╋⟫ %92 = getelementptr inbounds i64, i64* %88, i64 8 - %98 = bitcast i64* %97 to <4 x i64>* ⟪╋⟫ %93 = bitcast i64* %92 to <4 x i64>* - %wide.load77.epil = load <4 x i64>, <4 x i64>* %98, ali…⟪╋⟫ %wide.load24.epil = load <4 x i64>, <4 x i64>* %93, ali… - %99 = getelementptr inbounds i64, i64* %93, i64 12 ⟪╋⟫ %94 = getelementptr inbounds i64, i64* %88, i64 12 - %100 = bitcast i64* %99 to <4 x i64>* ⟪╋⟫ %95 = bitcast i64* %94 to <4 x i64>* - %wide.load78.epil = load <4 x i64>, <4 x i64>* %100, al…⟪╋⟫ %wide.load25.epil = load <4 x i64>, <4 x i64>* %95, ali… - %101 = add <4 x i64> %wide.load75.epil, %89 ⟪╋⟫ %96 = add <4 x i64> %wide.load22.epil, %84 - %102 = add <4 x i64> %wide.load76.epil, %90 ⟪╋⟫ %97 = add <4 x i64> %wide.load23.epil, %85 - %103 = add <4 x i64> %wide.load77.epil, %91 ⟪╋⟫ %98 = add <4 x i64> %wide.load24.epil, %86 - %104 = add <4 x i64> %wide.load78.epil, %92 ⟪╋⟫ %99 = add <4 x i64> %wide.load25.epil, %87 - %105 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪╋⟫ %100 = getelementptr inbounds i64, i64* %arrayptr510, i… - %106 = bitcast i64* %105 to <4 x i64>* ⟪╋⟫ %101 = bitcast i64* %100 to <4 x i64>* - store <4 x i64> %101, <4 x i64>* %106, align 8 ⟪╋⟫ store <4 x i64> %96, <4 x i64>* %101, align 8 - %107 = getelementptr inbounds i64, i64* %105, i64 4 ⟪╋⟫ %102 = getelementptr inbounds i64, i64* %100, i64 4 - %108 = bitcast i64* %107 to <4 x i64>* ⟪╋⟫ %103 = bitcast i64* %102 to <4 x i64>* - store <4 x i64> %102, <4 x i64>* %108, align 8 ⟪╋⟫ store <4 x i64> %97, <4 x i64>* %103, align 8 - %109 = getelementptr inbounds i64, i64* %105, i64 8 ⟪╋⟫ %104 = getelementptr inbounds i64, i64* %100, i64 8 - %110 = bitcast i64* %109 to <4 x i64>* ⟪╋⟫ %105 = bitcast i64* %104 to <4 x i64>* - store <4 x i64> %103, <4 x i64>* %110, align 8 ⟪╋⟫ store <4 x i64> %98, <4 x i64>* %105, align 8 - %111 = getelementptr inbounds i64, i64* %105, i64 12 ⟪╋⟫ %106 = getelementptr inbounds i64, i64* %100, i64 12 - %112 = bitcast i64* %111 to <4 x i64>* ⟪╋⟫ %107 = bitcast i64* %106 to <4 x i64>* - store <4 x i64> %104, <4 x i64>* %112, align 8 ⟪╋⟫ store <4 x i64> %99, <4 x i64>* %107, align 8 - br label %middle.block  ┃  br label %middle.block -  ┃  -middle.block: ; preds… ┃ middle.block: ; preds… - %cmp.n = icmp eq i64 %exit.mainloop.at, %n.vec ⟪╋⟫ %cmp.n = icmp eq i64 %arraylen, %n.vec - br i1 %cmp.n, label %main.exit.selector, label %scalar.…⟪┫  - ⟪┫  -scalar.ph: ; preds…⟪┫  - %bc.resume.val = phi i64 [ %ind.end, %middle.block ], […⟪┫  - br label %idxend21 ⟪┫  - ⟪┫  -L31: ; preds…⟪┫  - ret void ⟪┫  - ⟪┫  -oob: ; preds…⟪┫  - %errorbox = alloca i64, align 8 ⟪┫  - store i64 %value_phi3.postloop, i64* %errorbox, align 8 ⟪┫  - call void @ijl_bounds_error_ints({}* %2, i64* nonnull %…⟪┫  - unreachable ⟪┫  - ⟪┫  -oob10: ; preds…⟪┫  - %errorbox11 = alloca i64, align 8 ⟪┫  - store i64 %value_phi3.postloop, i64* %errorbox11, align…⟪┫  - call void @ijl_bounds_error_ints({}* %3, i64* nonnull %…⟪┫  - unreachable ⟪┫  - ⟪┫  -oob19: ; preds…⟪┫  - %errorbox20 = alloca i64, align 8 ⟪┫  - store i64 %value_phi3.postloop, i64* %errorbox20, align…⟪┫  - call void @ijl_bounds_error_ints({}* %0, i64* nonnull %…⟪┫  - unreachable ⟪┫  - ⟪┫  -idxend21: ; preds…⟪┫  - %value_phi3 = phi i64 [ %119, %idxend21 ], [ %bc.resume…⟪┫  - %113 = add nsw i64 %value_phi3, -1 ⟪┫  - %114 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫  - %arrayref = load i64, i64* %114, align 8 ⟪┫  - %115 = mul i64 %arrayref, %1 ⟪┫  - %116 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫  - %arrayref15 = load i64, i64* %116, align 8 ⟪┫  - %117 = add i64 %arrayref15, %115 ⟪┫  - %118 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫  - store i64 %117, i64* %118, align 8 ⟪┫  - %119 = add nuw nsw i64 %value_phi3, 1 ⟪┫  - %.not51 = icmp ult i64 %value_phi3, %exit.mainloop.at ⟪┫  - br i1 %.not51, label %idxend21, label %main.exit.select…⟪┫  - ⟪┫  -main.exit.selector: ; preds…⟪┫  - %value_phi3.lcssa = phi i64 [ %exit.mainloop.at, %middl…⟪┫  - %.lcssa = phi i64 [ %ind.end, %middle.block ], [ %119, …⟪┫  - %120 = icmp ult i64 %value_phi3.lcssa, %arraylen ⟪┫  - br i1 %120, label %main.pseudo.exit, label %L31 ⟪┫  - ⟪┫  -main.pseudo.exit: ; preds…⟪┫  - %value_phi3.copy = phi i64 [ 1, %L13.preheader ], [ %.l…⟪┫  - br label %L13.postloop ⟪┫  - ⟪┫  -L13.postloop: ; preds…⟪┫  - %value_phi3.postloop = phi i64 [ %127, %idxend21.postlo…⟪┫  - %121 = add i64 %value_phi3.postloop, -1 ⟪┫  - %inbounds.postloop = icmp ult i64 %121, %arraylen6 ⟪┫  - br i1 %inbounds.postloop, label %idxend.postloop, label…⟪┫  -  ┣⟫ br i1 %cmp.n, label %L32, label %scalar.ph -  ┃  -idxend.postloop: ; preds…⟪┫  - %inbounds9.postloop = icmp ult i64 %121, %arraylen8 ⟪┫  - br i1 %inbounds9.postloop, label %idxend12.postloop, la…⟪┫  -  ┣⟫scalar.ph: ; preds… -  ┣⟫ %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0… -  ┣⟫ br label %L12 -  ┃  -idxend12.postloop: ; preds…⟪┫  - %inbounds18.postloop = icmp ult i64 %121, %arraylen ⟪┫  - br i1 %inbounds18.postloop, label %idxend21.postloop, l…⟪┫  -  ┣⟫L12: ; preds… -  ┣⟫ %value_phi12 = phi i64 [ %bc.resume.val, %scalar.ph ], … -  ┣⟫ %108 = getelementptr inbounds i64, i64* %arrayptr8, i64… -  ┣⟫ %arrayref = load i64, i64* %108, align 8 -  ┣⟫ %109 = mul i64 %arrayref, %1 -  ┣⟫ %110 = getelementptr inbounds i64, i64* %arrayptr29, i6… -  ┣⟫ %arrayref3 = load i64, i64* %110, align 8 -  ┣⟫ %111 = add i64 %arrayref3, %109 -  ┣⟫ %112 = getelementptr inbounds i64, i64* %arrayptr510, i… -  ┣⟫ store i64 %111, i64* %112, align 8 -  ┣⟫ %113 = add nuw nsw i64 %value_phi12, 1 -  ┣⟫ %exitcond.not = icmp eq i64 %113, %arraylen -  ┣⟫ br i1 %exitcond.not, label %L32, label %L12 -  ┃  -idxend21.postloop: ; preds…⟪┫  - %122 = getelementptr inbounds i64, i64* %arrayptr29, i6…⟪┫  - %arrayref.postloop = load i64, i64* %122, align 8 ⟪┫  - %123 = mul i64 %arrayref.postloop, %1 ⟪┫  - %124 = getelementptr inbounds i64, i64* %arrayptr1430, …⟪┫  - %arrayref15.postloop = load i64, i64* %124, align 8 ⟪┫  - %125 = add i64 %arrayref15.postloop, %123 ⟪┫  - %126 = getelementptr inbounds i64, i64* %arrayptr2331, …⟪┫  - store i64 %125, i64* %126, align 8 ⟪┫  - %.not.not32.postloop = icmp eq i64 %value_phi3.postloop…⟪┫  - %127 = add nuw nsw i64 %value_phi3.postloop, 1 ⟪┫  - br i1 %.not.not32.postloop, label %L31, label %L13.post…⟪┫  -  ┣⟫L32: ; preds… -  ┣⟫ ret void -}  ┃ } -  ┃  \ No newline at end of file diff --git a/test/references/saxpy_x86_LINES.ll b/test/references/saxpy_x86_LINES.ll deleted file mode 100644 index 737fb07..0000000 --- a/test/references/saxpy_x86_LINES.ll +++ /dev/null @@ -1,324 +0,0 @@ - 1 ; Function Attrs: uwtable ┃ ; Function Attrs: uwtable 1 - 2 define void @saxpy({}* noundef nonnull align 16 deref…⟪╋⟫define void @saxpy_simd({}* noundef nonnull align 16 …2 - 3 top: ┃ top: 3 - 4 %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* ┃ %4 = bitcast {}* %0 to { i8*, i64, i16, i16, i32 }* 4 - 5 %arraylen_ptr = getelementptr inbounds { i8*, i64, … ┃ %arraylen_ptr = getelementptr inbounds { i8*, i64, …5 - 6 %arraylen = load i64, i64* %arraylen_ptr, align 8 ┃ %arraylen = load i64, i64* %arraylen_ptr, align 8 6 - 7 %.not.not = icmp eq i64 %arraylen, 0 ⟪╋⟫ %.not = icmp eq i64 %arraylen, 0 7 - 8 br i1 %.not.not, label %L31, label %L13.preheader ⟪╋⟫ br i1 %.not, label %L32, label %L12.lr.ph 8 - 9 ⟪┫ - 10 L13.preheader: ; p…⟪┫ - 11 %5 = bitcast {}* %2 to { i8*, i64, i16, i16, i32 }* ⟪┫ - 12 %arraylen_ptr5 = getelementptr inbounds { i8*, i64,…⟪┫ - 13 %arraylen6 = load i64, i64* %arraylen_ptr5, align 8 ⟪┫ - 14 %6 = bitcast {}* %3 to { i8*, i64, i16, i16, i32 }* ⟪┫ - 15 %arraylen_ptr7 = getelementptr inbounds { i8*, i64,…⟪┫ - 16 %arraylen8 = load i64, i64* %arraylen_ptr7, align 8 ⟪┫ - 17 %7 = bitcast {}* %2 to i64** ⟪┫ - 18 %arrayptr29 = load i64*, i64** %7, align 8 ⟪┫ - 19 %8 = bitcast {}* %3 to i64** ⟪┫ - 20 %arrayptr1430 = load i64*, i64** %8, align 8 ⟪┫ - 21 %9 = bitcast {}* %0 to i64** ⟪┫ - 22 %arrayptr2331 = load i64*, i64** %9, align 8 ⟪┫ - 23 %umin = call i64 @llvm.umin.i64(i64 %arraylen6, i64…⟪┫ - 24 %smin = call i64 @llvm.smin.i64(i64 %arraylen8, i64…⟪┫ - 25 %10 = sub i64 %arraylen8, %smin ⟪┫ - 26 %smax = call i64 @llvm.smax.i64(i64 %smin, i64 -1) ⟪┫ - 27 %11 = add nsw i64 %smax, 1 ⟪┫ - 28 %12 = mul nuw nsw i64 %10, %11 ⟪┫ - 29 %umin36 = call i64 @llvm.umin.i64(i64 %umin, i64 %1…⟪┫ - 30 %exit.mainloop.at = call i64 @llvm.umin.i64(i64 %um…⟪┫ - 31 %.not = icmp eq i64 %exit.mainloop.at, 0 ⟪┫ - 32 br i1 %.not, label %main.pseudo.exit, label %idxend…⟪┫ - 33 ⟪┫ - 34 idxend21.preheader: ; p…⟪┫ - 35 %min.iters.check = icmp ult i64 %exit.mainloop.at, …⟪┫ - 36 br i1 %min.iters.check, label %scalar.ph, label %ve…⟪┫ - 37 ┃ 9 - 38 vector.memcheck: ; p…⟪┫ - 39 %scevgep = getelementptr i64, i64* %arrayptr2331, i…⟪┫ - 40 %scevgep58 = getelementptr i64, i64* %arrayptr29, i…⟪┫ - 41 %scevgep61 = getelementptr i64, i64* %arrayptr1430,…⟪┫ - 42 %bound0 = icmp ult i64* %arrayptr2331, %scevgep58 ⟪┫ - 43 %bound1 = icmp ult i64* %arrayptr29, %scevgep ⟪┫ - 44 %found.conflict = and i1 %bound0, %bound1 ⟪┫ - 45 %bound063 = icmp ult i64* %arrayptr2331, %scevgep61 ⟪┫ - 46 %bound164 = icmp ult i64* %arrayptr1430, %scevgep ⟪┫ - 47 %found.conflict65 = and i1 %bound063, %bound164 ⟪┫ - 48 %conflict.rdx = or i1 %found.conflict, %found.confl…⟪┫ - 49 br i1 %conflict.rdx, label %scalar.ph, label %vecto…⟪╋⟫ br i1 %min.iters.check, label %scalar.ph, label %ve…10 - ┣⟫L12.lr.ph: ; p…11 - ┣⟫ %5 = bitcast {}* %2 to i64** 12 - ┣⟫ %arrayptr8 = load i64*, i64** %5, align 8 13 - ┣⟫ %6 = bitcast {}* %3 to i64** 14 - ┣⟫ %arrayptr29 = load i64*, i64** %6, align 8 15 - ┣⟫ %7 = bitcast {}* %0 to i64** 16 - ┣⟫ %arrayptr510 = load i64*, i64** %7, align 8 17 - ┣⟫ %min.iters.check = icmp ult i64 %arraylen, 16 18 - 50 ┃ 19 - 51 vector.ph: ; p…⟪╋⟫vector.ph: ; p…20 - 52 %n.vec = and i64 %exit.mainloop.at, 922337203685477…⟪╋⟫ %n.vec = and i64 %arraylen, 9223372036854775792 21 - 53 %ind.end = or i64 %n.vec, 1 ⟪┫ - 54 %broadcast.splatinsert = insertelement <4 x i64> po… ┃ %broadcast.splatinsert = insertelement <4 x i64> po…22 - 55 %broadcast.splat = shufflevector <4 x i64> %broadca… ┃ %broadcast.splat = shufflevector <4 x i64> %broadca…23 - 56 %13 = add nsw i64 %n.vec, -16 ⟪╋⟫ %8 = add nsw i64 %n.vec, -16 24 - 57 %14 = lshr exact i64 %13, 4 ⟪╋⟫ %9 = lshr exact i64 %8, 4 25 - 58 %15 = add nuw nsw i64 %14, 1 ⟪╋⟫ %10 = add nuw nsw i64 %9, 1 26 - 59 %xtraiter = and i64 %15, 1 ⟪╋⟫ %xtraiter = and i64 %10, 1 27 - 60 %16 = icmp eq i64 %13, 0 ⟪╋⟫ %11 = icmp eq i64 %8, 0 28 - 61 br i1 %16, label %middle.block.unr-lcssa, label %ve…⟪╋⟫ br i1 %11, label %middle.block.unr-lcssa, label %ve…29 - 62 ┃ 30 - 63 vector.ph.new: ; p… ┃ vector.ph.new: ; p…31 - 64 %unroll_iter = and i64 %15, 2305843009213693950 ⟪╋⟫ %unroll_iter = and i64 %10, 2305843009213693950 32 - 65 br label %vector.body ┃ br label %vector.body 33 - 66 ┃ 34 - 67 vector.body: ; p… ┃ vector.body: ; p…35 - 68 %index = phi i64 [ 0, %vector.ph.new ], [ %index.ne… ┃ %index = phi i64 [ 0, %vector.ph.new ], [ %index.ne…36 - 69 %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.ne… ┃ %niter = phi i64 [ 0, %vector.ph.new ], [ %niter.ne…37 - 70 %17 = getelementptr inbounds i64, i64* %arrayptr29,…⟪╋⟫ %12 = getelementptr inbounds i64, i64* %arrayptr8, …38 - 71 %18 = bitcast i64* %17 to <4 x i64>* ⟪╋⟫ %13 = bitcast i64* %12 to <4 x i64>* 39 - 72 %wide.load = load <4 x i64>, <4 x i64>* %18, align …⟪╋⟫ %wide.load = load <4 x i64>, <4 x i64>* %13, align …40 - 73 %19 = getelementptr inbounds i64, i64* %17, i64 4 ⟪╋⟫ %14 = getelementptr inbounds i64, i64* %12, i64 4 41 - 74 %20 = bitcast i64* %19 to <4 x i64>* ⟪╋⟫ %15 = bitcast i64* %14 to <4 x i64>* 42 - 75 %wide.load66 = load <4 x i64>, <4 x i64>* %20, alig…⟪╋⟫ %wide.load13 = load <4 x i64>, <4 x i64>* %15, alig…43 - 76 %21 = getelementptr inbounds i64, i64* %17, i64 8 ⟪╋⟫ %16 = getelementptr inbounds i64, i64* %12, i64 8 44 - 77 %22 = bitcast i64* %21 to <4 x i64>* ⟪╋⟫ %17 = bitcast i64* %16 to <4 x i64>* 45 - 78 %wide.load67 = load <4 x i64>, <4 x i64>* %22, alig…⟪╋⟫ %wide.load14 = load <4 x i64>, <4 x i64>* %17, alig…46 - 79 %23 = getelementptr inbounds i64, i64* %17, i64 12 ⟪╋⟫ %18 = getelementptr inbounds i64, i64* %12, i64 12 47 - 80 %24 = bitcast i64* %23 to <4 x i64>* ⟪╋⟫ %19 = bitcast i64* %18 to <4 x i64>* 48 - 81 %wide.load68 = load <4 x i64>, <4 x i64>* %24, alig…⟪╋⟫ %wide.load15 = load <4 x i64>, <4 x i64>* %19, alig…49 - 82 %25 = mul <4 x i64> %wide.load, %broadcast.splat ⟪╋⟫ %20 = mul <4 x i64> %wide.load, %broadcast.splat 50 - 83 %26 = mul <4 x i64> %wide.load66, %broadcast.splat ⟪╋⟫ %21 = mul <4 x i64> %wide.load13, %broadcast.splat 51 - 84 %27 = mul <4 x i64> %wide.load67, %broadcast.splat ⟪╋⟫ %22 = mul <4 x i64> %wide.load14, %broadcast.splat 52 - 85 %28 = mul <4 x i64> %wide.load68, %broadcast.splat ⟪╋⟫ %23 = mul <4 x i64> %wide.load15, %broadcast.splat 53 - 86 %29 = getelementptr inbounds i64, i64* %arrayptr143…⟪╋⟫ %24 = getelementptr inbounds i64, i64* %arrayptr29,…54 - 87 %30 = bitcast i64* %29 to <4 x i64>* ⟪╋⟫ %25 = bitcast i64* %24 to <4 x i64>* 55 - 88 %wide.load75 = load <4 x i64>, <4 x i64>* %30, alig…⟪╋⟫ %wide.load22 = load <4 x i64>, <4 x i64>* %25, alig…56 - 89 %31 = getelementptr inbounds i64, i64* %29, i64 4 ⟪╋⟫ %26 = getelementptr inbounds i64, i64* %24, i64 4 57 - 90 %32 = bitcast i64* %31 to <4 x i64>* ⟪╋⟫ %27 = bitcast i64* %26 to <4 x i64>* 58 - 91 %wide.load76 = load <4 x i64>, <4 x i64>* %32, alig…⟪╋⟫ %wide.load23 = load <4 x i64>, <4 x i64>* %27, alig…59 - 92 %33 = getelementptr inbounds i64, i64* %29, i64 8 ⟪╋⟫ %28 = getelementptr inbounds i64, i64* %24, i64 8 60 - 93 %34 = bitcast i64* %33 to <4 x i64>* ⟪╋⟫ %29 = bitcast i64* %28 to <4 x i64>* 61 - 94 %wide.load77 = load <4 x i64>, <4 x i64>* %34, alig…⟪╋⟫ %wide.load24 = load <4 x i64>, <4 x i64>* %29, alig…62 - 95 %35 = getelementptr inbounds i64, i64* %29, i64 12 ⟪╋⟫ %30 = getelementptr inbounds i64, i64* %24, i64 12 63 - 96 %36 = bitcast i64* %35 to <4 x i64>* ⟪╋⟫ %31 = bitcast i64* %30 to <4 x i64>* 64 - 97 %wide.load78 = load <4 x i64>, <4 x i64>* %36, alig…⟪╋⟫ %wide.load25 = load <4 x i64>, <4 x i64>* %31, alig…65 - 98 %37 = add <4 x i64> %wide.load75, %25 ⟪╋⟫ %32 = add <4 x i64> %wide.load22, %20 66 - 99 %38 = add <4 x i64> %wide.load76, %26 ⟪╋⟫ %33 = add <4 x i64> %wide.load23, %21 67 -100 %39 = add <4 x i64> %wide.load77, %27 ⟪╋⟫ %34 = add <4 x i64> %wide.load24, %22 68 -101 %40 = add <4 x i64> %wide.load78, %28 ⟪╋⟫ %35 = add <4 x i64> %wide.load25, %23 69 -102 %41 = getelementptr inbounds i64, i64* %arrayptr233…⟪╋⟫ %36 = getelementptr inbounds i64, i64* %arrayptr510…70 -103 %42 = bitcast i64* %41 to <4 x i64>* ⟪╋⟫ %37 = bitcast i64* %36 to <4 x i64>* 71 -104 store <4 x i64> %37, <4 x i64>* %42, align 8 ⟪╋⟫ store <4 x i64> %32, <4 x i64>* %37, align 8 72 -105 %43 = getelementptr inbounds i64, i64* %41, i64 4 ⟪╋⟫ %38 = getelementptr inbounds i64, i64* %36, i64 4 73 -106 %44 = bitcast i64* %43 to <4 x i64>* ⟪╋⟫ %39 = bitcast i64* %38 to <4 x i64>* 74 -107 store <4 x i64> %38, <4 x i64>* %44, align 8 ⟪╋⟫ store <4 x i64> %33, <4 x i64>* %39, align 8 75 -108 %45 = getelementptr inbounds i64, i64* %41, i64 8 ⟪╋⟫ %40 = getelementptr inbounds i64, i64* %36, i64 8 76 -109 %46 = bitcast i64* %45 to <4 x i64>* ⟪╋⟫ %41 = bitcast i64* %40 to <4 x i64>* 77 -110 store <4 x i64> %39, <4 x i64>* %46, align 8 ⟪╋⟫ store <4 x i64> %34, <4 x i64>* %41, align 8 78 -111 %47 = getelementptr inbounds i64, i64* %41, i64 12 ⟪╋⟫ %42 = getelementptr inbounds i64, i64* %36, i64 12 79 -112 %48 = bitcast i64* %47 to <4 x i64>* ⟪╋⟫ %43 = bitcast i64* %42 to <4 x i64>* 80 -113 store <4 x i64> %40, <4 x i64>* %48, align 8 ⟪╋⟫ store <4 x i64> %35, <4 x i64>* %43, align 8 81 -114 %index.next = or i64 %index, 16 ┃ %index.next = or i64 %index, 16 82 -115 %49 = getelementptr inbounds i64, i64* %arrayptr29,…⟪╋⟫ %44 = getelementptr inbounds i64, i64* %arrayptr8, …83 -116 %50 = bitcast i64* %49 to <4 x i64>* ⟪╋⟫ %45 = bitcast i64* %44 to <4 x i64>* 84 -117 %wide.load.1 = load <4 x i64>, <4 x i64>* %50, alig…⟪╋⟫ %wide.load.1 = load <4 x i64>, <4 x i64>* %45, alig…85 -118 %51 = getelementptr inbounds i64, i64* %49, i64 4 ⟪╋⟫ %46 = getelementptr inbounds i64, i64* %44, i64 4 86 -119 %52 = bitcast i64* %51 to <4 x i64>* ⟪╋⟫ %47 = bitcast i64* %46 to <4 x i64>* 87 -120 %wide.load66.1 = load <4 x i64>, <4 x i64>* %52, al…⟪╋⟫ %wide.load13.1 = load <4 x i64>, <4 x i64>* %47, al…88 -121 %53 = getelementptr inbounds i64, i64* %49, i64 8 ⟪╋⟫ %48 = getelementptr inbounds i64, i64* %44, i64 8 89 -122 %54 = bitcast i64* %53 to <4 x i64>* ⟪╋⟫ %49 = bitcast i64* %48 to <4 x i64>* 90 -123 %wide.load67.1 = load <4 x i64>, <4 x i64>* %54, al…⟪╋⟫ %wide.load14.1 = load <4 x i64>, <4 x i64>* %49, al…91 -124 %55 = getelementptr inbounds i64, i64* %49, i64 12 ⟪╋⟫ %50 = getelementptr inbounds i64, i64* %44, i64 12 92 -125 %56 = bitcast i64* %55 to <4 x i64>* ⟪╋⟫ %51 = bitcast i64* %50 to <4 x i64>* 93 -126 %wide.load68.1 = load <4 x i64>, <4 x i64>* %56, al…⟪╋⟫ %wide.load15.1 = load <4 x i64>, <4 x i64>* %51, al…94 -127 %57 = mul <4 x i64> %wide.load.1, %broadcast.splat ⟪╋⟫ %52 = mul <4 x i64> %wide.load.1, %broadcast.splat 95 -128 %58 = mul <4 x i64> %wide.load66.1, %broadcast.spla…⟪╋⟫ %53 = mul <4 x i64> %wide.load13.1, %broadcast.spla…96 -129 %59 = mul <4 x i64> %wide.load67.1, %broadcast.spla…⟪╋⟫ %54 = mul <4 x i64> %wide.load14.1, %broadcast.spla…97 -130 %60 = mul <4 x i64> %wide.load68.1, %broadcast.spla…⟪╋⟫ %55 = mul <4 x i64> %wide.load15.1, %broadcast.spla…98 -131 %61 = getelementptr inbounds i64, i64* %arrayptr143…⟪╋⟫ %56 = getelementptr inbounds i64, i64* %arrayptr29,…99 -132 %62 = bitcast i64* %61 to <4 x i64>* ⟪╋⟫ %57 = bitcast i64* %56 to <4 x i64>* 100 -133 %wide.load75.1 = load <4 x i64>, <4 x i64>* %62, al…⟪╋⟫ %wide.load22.1 = load <4 x i64>, <4 x i64>* %57, al…101 -134 %63 = getelementptr inbounds i64, i64* %61, i64 4 ⟪╋⟫ %58 = getelementptr inbounds i64, i64* %56, i64 4 102 -135 %64 = bitcast i64* %63 to <4 x i64>* ⟪╋⟫ %59 = bitcast i64* %58 to <4 x i64>* 103 -136 %wide.load76.1 = load <4 x i64>, <4 x i64>* %64, al…⟪╋⟫ %wide.load23.1 = load <4 x i64>, <4 x i64>* %59, al…104 -137 %65 = getelementptr inbounds i64, i64* %61, i64 8 ⟪╋⟫ %60 = getelementptr inbounds i64, i64* %56, i64 8 105 -138 %66 = bitcast i64* %65 to <4 x i64>* ⟪╋⟫ %61 = bitcast i64* %60 to <4 x i64>* 106 -139 %wide.load77.1 = load <4 x i64>, <4 x i64>* %66, al…⟪╋⟫ %wide.load24.1 = load <4 x i64>, <4 x i64>* %61, al…107 -140 %67 = getelementptr inbounds i64, i64* %61, i64 12 ⟪╋⟫ %62 = getelementptr inbounds i64, i64* %56, i64 12 108 -141 %68 = bitcast i64* %67 to <4 x i64>* ⟪╋⟫ %63 = bitcast i64* %62 to <4 x i64>* 109 -142 %wide.load78.1 = load <4 x i64>, <4 x i64>* %68, al…⟪╋⟫ %wide.load25.1 = load <4 x i64>, <4 x i64>* %63, al…110 -143 %69 = add <4 x i64> %wide.load75.1, %57 ⟪╋⟫ %64 = add <4 x i64> %wide.load22.1, %52 111 -144 %70 = add <4 x i64> %wide.load76.1, %58 ⟪╋⟫ %65 = add <4 x i64> %wide.load23.1, %53 112 -145 %71 = add <4 x i64> %wide.load77.1, %59 ⟪╋⟫ %66 = add <4 x i64> %wide.load24.1, %54 113 -146 %72 = add <4 x i64> %wide.load78.1, %60 ⟪╋⟫ %67 = add <4 x i64> %wide.load25.1, %55 114 -147 %73 = getelementptr inbounds i64, i64* %arrayptr233…⟪╋⟫ %68 = getelementptr inbounds i64, i64* %arrayptr510…115 -148 %74 = bitcast i64* %73 to <4 x i64>* ⟪╋⟫ %69 = bitcast i64* %68 to <4 x i64>* 116 -149 store <4 x i64> %69, <4 x i64>* %74, align 8 ⟪╋⟫ store <4 x i64> %64, <4 x i64>* %69, align 8 117 -150 %75 = getelementptr inbounds i64, i64* %73, i64 4 ⟪╋⟫ %70 = getelementptr inbounds i64, i64* %68, i64 4 118 -151 %76 = bitcast i64* %75 to <4 x i64>* ⟪╋⟫ %71 = bitcast i64* %70 to <4 x i64>* 119 -152 store <4 x i64> %70, <4 x i64>* %76, align 8 ⟪╋⟫ store <4 x i64> %65, <4 x i64>* %71, align 8 120 -153 %77 = getelementptr inbounds i64, i64* %73, i64 8 ⟪╋⟫ %72 = getelementptr inbounds i64, i64* %68, i64 8 121 -154 %78 = bitcast i64* %77 to <4 x i64>* ⟪╋⟫ %73 = bitcast i64* %72 to <4 x i64>* 122 -155 store <4 x i64> %71, <4 x i64>* %78, align 8 ⟪╋⟫ store <4 x i64> %66, <4 x i64>* %73, align 8 123 -156 %79 = getelementptr inbounds i64, i64* %73, i64 12 ⟪╋⟫ %74 = getelementptr inbounds i64, i64* %68, i64 12 124 -157 %80 = bitcast i64* %79 to <4 x i64>* ⟪╋⟫ %75 = bitcast i64* %74 to <4 x i64>* 125 -158 store <4 x i64> %72, <4 x i64>* %80, align 8 ⟪╋⟫ store <4 x i64> %67, <4 x i64>* %75, align 8 126 -159 %index.next.1 = add nuw i64 %index, 32 ┃ %index.next.1 = add nuw i64 %index, 32 127 -160 %niter.next.1 = add i64 %niter, 2 ┃ %niter.next.1 = add i64 %niter, 2 128 -161 %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_… ┃ %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_…129 -162 br i1 %niter.ncmp.1, label %middle.block.unr-lcssa,… ┃ br i1 %niter.ncmp.1, label %middle.block.unr-lcssa,…130 -163 ┃ 131 -164 middle.block.unr-lcssa: ; p… ┃ middle.block.unr-lcssa: ; p…132 -165 %index.unr = phi i64 [ 0, %vector.ph ], [ %index.ne… ┃ %index.unr = phi i64 [ 0, %vector.ph ], [ %index.ne…133 -166 %lcmp.mod.not = icmp eq i64 %xtraiter, 0 ┃ %lcmp.mod.not = icmp eq i64 %xtraiter, 0 134 -167 br i1 %lcmp.mod.not, label %middle.block, label %ve… ┃ br i1 %lcmp.mod.not, label %middle.block, label %ve…135 -168 ┃ 136 -169 vector.body.epil.preheader: ; p… ┃ vector.body.epil.preheader: ; p…137 -170 %81 = getelementptr inbounds i64, i64* %arrayptr29,…⟪╋⟫ %76 = getelementptr inbounds i64, i64* %arrayptr8, …138 -171 %82 = bitcast i64* %81 to <4 x i64>* ⟪╋⟫ %77 = bitcast i64* %76 to <4 x i64>* 139 -172 %wide.load.epil = load <4 x i64>, <4 x i64>* %82, a…⟪╋⟫ %wide.load.epil = load <4 x i64>, <4 x i64>* %77, a…140 -173 %83 = getelementptr inbounds i64, i64* %81, i64 4 ⟪╋⟫ %78 = getelementptr inbounds i64, i64* %76, i64 4 141 -174 %84 = bitcast i64* %83 to <4 x i64>* ⟪╋⟫ %79 = bitcast i64* %78 to <4 x i64>* 142 -175 %wide.load66.epil = load <4 x i64>, <4 x i64>* %84,…⟪╋⟫ %wide.load13.epil = load <4 x i64>, <4 x i64>* %79,…143 -176 %85 = getelementptr inbounds i64, i64* %81, i64 8 ⟪╋⟫ %80 = getelementptr inbounds i64, i64* %76, i64 8 144 -177 %86 = bitcast i64* %85 to <4 x i64>* ⟪╋⟫ %81 = bitcast i64* %80 to <4 x i64>* 145 -178 %wide.load67.epil = load <4 x i64>, <4 x i64>* %86,…⟪╋⟫ %wide.load14.epil = load <4 x i64>, <4 x i64>* %81,…146 -179 %87 = getelementptr inbounds i64, i64* %81, i64 12 ⟪╋⟫ %82 = getelementptr inbounds i64, i64* %76, i64 12 147 -180 %88 = bitcast i64* %87 to <4 x i64>* ⟪╋⟫ %83 = bitcast i64* %82 to <4 x i64>* 148 -181 %wide.load68.epil = load <4 x i64>, <4 x i64>* %88,…⟪╋⟫ %wide.load15.epil = load <4 x i64>, <4 x i64>* %83,…149 -182 %89 = mul <4 x i64> %wide.load.epil, %broadcast.spl…⟪╋⟫ %84 = mul <4 x i64> %wide.load.epil, %broadcast.spl…150 -183 %90 = mul <4 x i64> %wide.load66.epil, %broadcast.s…⟪╋⟫ %85 = mul <4 x i64> %wide.load13.epil, %broadcast.s…151 -184 %91 = mul <4 x i64> %wide.load67.epil, %broadcast.s…⟪╋⟫ %86 = mul <4 x i64> %wide.load14.epil, %broadcast.s…152 -185 %92 = mul <4 x i64> %wide.load68.epil, %broadcast.s…⟪╋⟫ %87 = mul <4 x i64> %wide.load15.epil, %broadcast.s…153 -186 %93 = getelementptr inbounds i64, i64* %arrayptr143…⟪╋⟫ %88 = getelementptr inbounds i64, i64* %arrayptr29,…154 -187 %94 = bitcast i64* %93 to <4 x i64>* ⟪╋⟫ %89 = bitcast i64* %88 to <4 x i64>* 155 -188 %wide.load75.epil = load <4 x i64>, <4 x i64>* %94,…⟪╋⟫ %wide.load22.epil = load <4 x i64>, <4 x i64>* %89,…156 -189 %95 = getelementptr inbounds i64, i64* %93, i64 4 ⟪╋⟫ %90 = getelementptr inbounds i64, i64* %88, i64 4 157 -190 %96 = bitcast i64* %95 to <4 x i64>* ⟪╋⟫ %91 = bitcast i64* %90 to <4 x i64>* 158 -191 %wide.load76.epil = load <4 x i64>, <4 x i64>* %96,…⟪╋⟫ %wide.load23.epil = load <4 x i64>, <4 x i64>* %91,…159 -192 %97 = getelementptr inbounds i64, i64* %93, i64 8 ⟪╋⟫ %92 = getelementptr inbounds i64, i64* %88, i64 8 160 -193 %98 = bitcast i64* %97 to <4 x i64>* ⟪╋⟫ %93 = bitcast i64* %92 to <4 x i64>* 161 -194 %wide.load77.epil = load <4 x i64>, <4 x i64>* %98,…⟪╋⟫ %wide.load24.epil = load <4 x i64>, <4 x i64>* %93,…162 -195 %99 = getelementptr inbounds i64, i64* %93, i64 12 ⟪╋⟫ %94 = getelementptr inbounds i64, i64* %88, i64 12 163 -196 %100 = bitcast i64* %99 to <4 x i64>* ⟪╋⟫ %95 = bitcast i64* %94 to <4 x i64>* 164 -197 %wide.load78.epil = load <4 x i64>, <4 x i64>* %100…⟪╋⟫ %wide.load25.epil = load <4 x i64>, <4 x i64>* %95,…165 -198 %101 = add <4 x i64> %wide.load75.epil, %89 ⟪╋⟫ %96 = add <4 x i64> %wide.load22.epil, %84 166 -199 %102 = add <4 x i64> %wide.load76.epil, %90 ⟪╋⟫ %97 = add <4 x i64> %wide.load23.epil, %85 167 -200 %103 = add <4 x i64> %wide.load77.epil, %91 ⟪╋⟫ %98 = add <4 x i64> %wide.load24.epil, %86 168 -201 %104 = add <4 x i64> %wide.load78.epil, %92 ⟪╋⟫ %99 = add <4 x i64> %wide.load25.epil, %87 169 -202 %105 = getelementptr inbounds i64, i64* %arrayptr23…⟪╋⟫ %100 = getelementptr inbounds i64, i64* %arrayptr51…170 -203 %106 = bitcast i64* %105 to <4 x i64>* ⟪╋⟫ %101 = bitcast i64* %100 to <4 x i64>* 171 -204 store <4 x i64> %101, <4 x i64>* %106, align 8 ⟪╋⟫ store <4 x i64> %96, <4 x i64>* %101, align 8 172 -205 %107 = getelementptr inbounds i64, i64* %105, i64 4 ⟪╋⟫ %102 = getelementptr inbounds i64, i64* %100, i64 4 173 -206 %108 = bitcast i64* %107 to <4 x i64>* ⟪╋⟫ %103 = bitcast i64* %102 to <4 x i64>* 174 -207 store <4 x i64> %102, <4 x i64>* %108, align 8 ⟪╋⟫ store <4 x i64> %97, <4 x i64>* %103, align 8 175 -208 %109 = getelementptr inbounds i64, i64* %105, i64 8 ⟪╋⟫ %104 = getelementptr inbounds i64, i64* %100, i64 8 176 -209 %110 = bitcast i64* %109 to <4 x i64>* ⟪╋⟫ %105 = bitcast i64* %104 to <4 x i64>* 177 -210 store <4 x i64> %103, <4 x i64>* %110, align 8 ⟪╋⟫ store <4 x i64> %98, <4 x i64>* %105, align 8 178 -211 %111 = getelementptr inbounds i64, i64* %105, i64 1…⟪╋⟫ %106 = getelementptr inbounds i64, i64* %100, i64 1…179 -212 %112 = bitcast i64* %111 to <4 x i64>* ⟪╋⟫ %107 = bitcast i64* %106 to <4 x i64>* 180 -213 store <4 x i64> %104, <4 x i64>* %112, align 8 ⟪╋⟫ store <4 x i64> %99, <4 x i64>* %107, align 8 181 -214 br label %middle.block ┃ br label %middle.block 182 -215 ┃ 183 -216 middle.block: ; p… ┃ middle.block: ; p…184 -217 %cmp.n = icmp eq i64 %exit.mainloop.at, %n.vec ⟪╋⟫ %cmp.n = icmp eq i64 %arraylen, %n.vec 185 -218 br i1 %cmp.n, label %main.exit.selector, label %sca…⟪┫ -219 ⟪┫ -220 scalar.ph: ; p…⟪┫ -221 %bc.resume.val = phi i64 [ %ind.end, %middle.block …⟪┫ -222 br label %idxend21 ⟪┫ -223 ⟪┫ -224 L31: ; p…⟪┫ -225 ret void ⟪┫ -226 ⟪┫ -227 oob: ; p…⟪┫ -228 %errorbox = alloca i64, align 8 ⟪┫ -229 store i64 %value_phi3.postloop, i64* %errorbox, ali…⟪┫ -230 call void @ijl_bounds_error_ints({}* %2, i64* nonnu…⟪┫ -231 unreachable ⟪┫ -232 ⟪┫ -233 oob10: ; p…⟪┫ -234 %errorbox11 = alloca i64, align 8 ⟪┫ -235 store i64 %value_phi3.postloop, i64* %errorbox11, a…⟪┫ -236 call void @ijl_bounds_error_ints({}* %3, i64* nonnu…⟪┫ -237 unreachable ⟪┫ -238 ⟪┫ -239 oob19: ; p…⟪┫ -240 %errorbox20 = alloca i64, align 8 ⟪┫ -241 store i64 %value_phi3.postloop, i64* %errorbox20, a…⟪┫ -242 call void @ijl_bounds_error_ints({}* %0, i64* nonnu…⟪┫ -243 unreachable ⟪┫ -244 ⟪┫ -245 idxend21: ; p…⟪┫ -246 %value_phi3 = phi i64 [ %119, %idxend21 ], [ %bc.re…⟪┫ -247 %113 = add nsw i64 %value_phi3, -1 ⟪┫ -248 %114 = getelementptr inbounds i64, i64* %arrayptr29…⟪┫ -249 %arrayref = load i64, i64* %114, align 8 ⟪┫ -250 %115 = mul i64 %arrayref, %1 ⟪┫ -251 %116 = getelementptr inbounds i64, i64* %arrayptr14…⟪┫ -252 %arrayref15 = load i64, i64* %116, align 8 ⟪┫ -253 %117 = add i64 %arrayref15, %115 ⟪┫ -254 %118 = getelementptr inbounds i64, i64* %arrayptr23…⟪┫ -255 store i64 %117, i64* %118, align 8 ⟪┫ -256 %119 = add nuw nsw i64 %value_phi3, 1 ⟪┫ -257 %.not51 = icmp ult i64 %value_phi3, %exit.mainloop.…⟪┫ -258 br i1 %.not51, label %idxend21, label %main.exit.se…⟪┫ -259 ⟪┫ -260 main.exit.selector: ; p…⟪┫ -261 %value_phi3.lcssa = phi i64 [ %exit.mainloop.at, %m…⟪┫ -262 %.lcssa = phi i64 [ %ind.end, %middle.block ], [ %1…⟪┫ -263 %120 = icmp ult i64 %value_phi3.lcssa, %arraylen ⟪┫ -264 br i1 %120, label %main.pseudo.exit, label %L31 ⟪┫ -265 ⟪┫ -266 main.pseudo.exit: ; p…⟪┫ -267 %value_phi3.copy = phi i64 [ 1, %L13.preheader ], […⟪┫ -268 br label %L13.postloop ⟪┫ -269 ⟪┫ -270 L13.postloop: ; p…⟪┫ -271 %value_phi3.postloop = phi i64 [ %127, %idxend21.po…⟪┫ -272 %121 = add i64 %value_phi3.postloop, -1 ⟪┫ -273 %inbounds.postloop = icmp ult i64 %121, %arraylen6 ⟪┫ -274 br i1 %inbounds.postloop, label %idxend.postloop, l…⟪┫ - ┣⟫ br i1 %cmp.n, label %L32, label %scalar.ph 186 -275 ┃ 187 -276 idxend.postloop: ; p…⟪┫ -277 %inbounds9.postloop = icmp ult i64 %121, %arraylen8 ⟪┫ -278 br i1 %inbounds9.postloop, label %idxend12.postloop…⟪┫ - ┣⟫scalar.ph: ; p…188 - ┣⟫ %bc.resume.val = phi i64 [ %n.vec, %middle.block ],…189 - ┣⟫ br label %L12 190 -279 ┃ 191 -280 idxend12.postloop: ; p…⟪┫ -281 %inbounds18.postloop = icmp ult i64 %121, %arraylen ⟪┫ -282 br i1 %inbounds18.postloop, label %idxend21.postloo…⟪┫ - ┣⟫L12: ; p…192 - ┣⟫ %value_phi12 = phi i64 [ %bc.resume.val, %scalar.ph…193 - ┣⟫ %108 = getelementptr inbounds i64, i64* %arrayptr8,…194 - ┣⟫ %arrayref = load i64, i64* %108, align 8 195 - ┣⟫ %109 = mul i64 %arrayref, %1 196 - ┣⟫ %110 = getelementptr inbounds i64, i64* %arrayptr29…197 - ┣⟫ %arrayref3 = load i64, i64* %110, align 8 198 - ┣⟫ %111 = add i64 %arrayref3, %109 199 - ┣⟫ %112 = getelementptr inbounds i64, i64* %arrayptr51…200 - ┣⟫ store i64 %111, i64* %112, align 8 201 - ┣⟫ %113 = add nuw nsw i64 %value_phi12, 1 202 - ┣⟫ %exitcond.not = icmp eq i64 %113, %arraylen 203 - ┣⟫ br i1 %exitcond.not, label %L32, label %L12 204 -283 ┃ 205 -284 idxend21.postloop: ; p…⟪┫ -285 %122 = getelementptr inbounds i64, i64* %arrayptr29…⟪┫ -286 %arrayref.postloop = load i64, i64* %122, align 8 ⟪┫ -287 %123 = mul i64 %arrayref.postloop, %1 ⟪┫ -288 %124 = getelementptr inbounds i64, i64* %arrayptr14…⟪┫ -289 %arrayref15.postloop = load i64, i64* %124, align 8 ⟪┫ -290 %125 = add i64 %arrayref15.postloop, %123 ⟪┫ -291 %126 = getelementptr inbounds i64, i64* %arrayptr23…⟪┫ -292 store i64 %125, i64* %126, align 8 ⟪┫ -293 %.not.not32.postloop = icmp eq i64 %value_phi3.post…⟪┫ -294 %127 = add nuw nsw i64 %value_phi3.postloop, 1 ⟪┫ -295 br i1 %.not.not32.postloop, label %L31, label %L13.…⟪┫ - ┣⟫L32: ; p…206 - ┣⟫ ret void 207 -296 } ┃ } 208 -297 ┃ 209 \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 1f56189..4d1444a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,12 +6,9 @@ using ReferenceTests using Test -const NATIVE_ARCH = InteractiveUtils.sys_arch_category() - - function display_str(v; mime=MIME"text/plain"(), compact=false, color=true, columns=nothing) # Fancy print `v` to a string - columns = @something columns displaysize(stdout)[2] + columns = !isnothing(columns) ? columns : displaysize(stdout)[2] io = IOBuffer() io_ctx = IOContext(io, :compact => compact, :color => color) withenv("COLUMNS" => columns) do @@ -25,6 +22,29 @@ function display_str(v; mime=MIME"text/plain"(), compact=false, color=true, colu end +function check_diff_display_order(diff::CodeDiffs.CodeDiff, order::Vector{<:Pair}) + xlines = split(diff.before, '\n') + ylines = split(diff.after, '\n') + order_idx = 1 + DeepDiffs.visitall(diff) do idx, state, _ + if state === :removed + @test last(order[order_idx]) === nothing + @test occursin(first(order[order_idx]), xlines[idx]) + elseif state === :added + @test first(order[order_idx]) === nothing + @test occursin(last(order[order_idx]), ylines[idx]) + elseif state === :changed + line_diff = diff.changed[idx][2] + @test occursin(first(order[order_idx]), line_diff.before) + @test occursin(last(order[order_idx]), line_diff.after) + else + @test occursin(first(order[order_idx]), xlines[idx]) + end + order_idx += 1 + end +end + + @testset "CodeDiffs.jl" begin @testset "Code quality (Aqua.jl)" begin Aqua.test_all(CodeDiffs) @@ -49,6 +69,8 @@ end diff = CodeDiffs.compare_ast(e, :(1+2); color=false, prettify=true, lines=false, alias=false) @test CodeDiffs.issame(diff) @test diff == (@code_diff color=false e :(1+2)) + + # TODO: OhMyREPL highlighting end @testset "Basic function" begin @@ -72,7 +94,6 @@ end diff = CodeDiffs.compare_code_llvm(f1, Tuple{}, f2, Tuple{}; color=false) @test !CodeDiffs.issame(diff) - @test length(DeepDiffs.added(diff)) == length(DeepDiffs.removed(diff)) == 1 + 2 # ret + name*2 @test diff == (@code_diff type=:llvm color=false f1() f2()) end @@ -96,61 +117,79 @@ end end B = quote + println("B") 1 + 3 f(a, d) g(c, b) + a = c + b + c = b - d + h(x, y) "test2" end diff = CodeDiffs.compare_ast(A, B; color=false) @test !CodeDiffs.issame(diff) - # All statements were marked as changed - @test length(DeepDiffs.added(diff)) == length(DeepDiffs.changed(diff)) == 4 + @test length(DeepDiffs.added(diff)) == 8 + @test length(DeepDiffs.changed(diff)) == 4 + + check_diff_display_order(diff, [ + "quote" => "quote", + nothing => "println(\"B\")", + "1 + 2" => "1 + 3", + "f(a, b)" => "f(a, d)", + "g(c, d)" => "g(c, b)", + nothing => "a = c + b", + nothing => "c = b - d", + nothing => "h(x, y)", + "\"test\"" => "\"test2\"", + "end" => "end" + ]) end @testset "Display" begin - function test_cmp_display(cmp_name, f₁, args₁, f₂, args₂) + function test_cmp_display(f₁, args₁, f₂, args₂) @testset "Typed" begin - diff = CodeDiffs.compare_code_typed(f₁, args₁, f₂, args₂; color=false) - @test_reference "references/$(cmp_name).jl_typed" display_str(diff; color=false, columns=120) - diff = CodeDiffs.compare_code_typed(f₁, args₁, f₂, args₂; color=true) @test findfirst(CodeDiffs.ANSI_REGEX, diff.before) === nothing - @test findfirst(CodeDiffs.ANSI_REGEX, diff.highlighted_before) !== nothing - @test_reference "references/$(cmp_name)_COLOR.jl_typed" display_str(diff; columns=120) + @test !endswith(diff.before, '\n') && !endswith(diff.after, '\n') + println("\nTyped: $(nameof(f₁)) vs. $(nameof(f₂))") + printstyled(display_str(diff; columns=120)) + println() end @testset "LLVM" begin - diff = CodeDiffs.compare_code_llvm(f₁, args₁, f₂, args₂; color=false, debuginfo=:none) - @test_reference "references/$(cmp_name)_$(NATIVE_ARCH).ll" display_str(diff; color=false, columns=120) - diff = CodeDiffs.compare_code_llvm(f₁, args₁, f₂, args₂; color=true, debuginfo=:none) @test findfirst(CodeDiffs.ANSI_REGEX, diff.before) === nothing - @test findfirst(CodeDiffs.ANSI_REGEX, diff.highlighted_before) !== nothing - @test_reference "references/$(cmp_name)_$(NATIVE_ARCH)_COLOR.ll" display_str(diff; columns=120) + @test !endswith(diff.before, '\n') && !endswith(diff.after, '\n') + @test rstrip(@io2str InteractiveUtils.print_llvm(IOContext(::IO, :color => true), diff.before)) == diff.highlighted_before + println("\nLLVM: $(nameof(f₁)) vs. $(nameof(f₂))") + printstyled(display_str(diff; columns=120)) + println() end @testset "Native" begin - diff = CodeDiffs.compare_code_native(f₁, args₁, f₂, args₂; color=false, debuginfo=:none) - @test_reference "references/$(cmp_name)_$(NATIVE_ARCH).S" display_str(diff; color=false, columns=120) - diff = CodeDiffs.compare_code_native(f₁, args₁, f₂, args₂; color=true, debuginfo=:none) @test findfirst(CodeDiffs.ANSI_REGEX, diff.before) === nothing - @test findfirst(CodeDiffs.ANSI_REGEX, diff.highlighted_before) !== nothing - @test_reference "references/$(cmp_name)_$(NATIVE_ARCH)_COLOR.S" display_str(diff; columns=120) + @test !endswith(diff.before, '\n') && !endswith(diff.after, '\n') + @test rstrip(@io2str InteractiveUtils.print_native(IOContext(::IO, :color => true), diff.before)) == diff.highlighted_before + println("\nNative: $(nameof(f₁)) vs. $(nameof(f₂))") + printstyled(display_str(diff; columns=120)) + println() end @testset "Line numbers" begin - diff = CodeDiffs.compare_code_llvm(f₁, args₁, f₂, args₂; color=false, debuginfo=:none) + diff = CodeDiffs.compare_code_typed(f₁, args₁, f₂, args₂; color=false) withenv("CODE_DIFFS_LINE_NUMBERS" => true) do - @test_reference "references/$(cmp_name)_$(NATIVE_ARCH)_LINES.ll" display_str(diff; color=false, columns=120) + println("\nTyped + line numbers: $(nameof(f₁)) vs. $(nameof(f₂))") + printstyled(display_str(diff; color=false, columns=120)) + println() end end end @testset "f1" begin f() = 1 - test_cmp_display("f1", f, Tuple{}, f, Tuple{}) + test_cmp_display(f, Tuple{}, f, Tuple{}) end @testset "saxpy" begin @@ -167,7 +206,48 @@ end end saxpy_args = Tuple{Vector{Int}, Int, Vector{Int}, Vector{Int}} - test_cmp_display("saxpy", saxpy, saxpy_args, saxpy_simd, saxpy_args) + test_cmp_display(saxpy, saxpy_args, saxpy_simd, saxpy_args) + end + + @testset "AST" begin + A = quote + 1 + 2 + f(a, b) + g(c, d) + "test" + end + + B = quote + println("B") + 1 + 3 + f(a, d) + g(c, b) + h(x, y) + "test2" + end + + diff = CodeDiffs.compare_ast(A, B; color=false) + + check_diff_display_order(diff, [ + "quote" => "quote", + nothing => "println(\"B\")", + "1 + 2" => "1 + 3", + "f(a, b)" => "f(a, d)", + "g(c, d)" => "g(c, b)", + nothing => "h(x, y)", + "\"test\"" => "\"test2\"", + "end" => "end" + ]) + + @test_reference "references/a_vs_b_PRINT.jl_ast" display_str(diff; mime=nothing, color=false) + @test_reference "references/a_vs_b.jl_ast" display_str(diff; color=false, columns=120) + + withenv("CODE_DIFFS_LINE_NUMBERS" => true) do + @test_reference "references/a_vs_b_LINES.jl_ast" display_str(diff; color=false, columns=120) + end + + diff = CodeDiffs.compare_ast(A, B; color=true) + @test_reference "references/a_vs_b_COLOR.jl_ast" display_str(diff; color=true, columns=120) end end