Skip to content

Conversation

@terasakisatoshi
Copy link
Member

This PR updates src/matrixlu.jl to optimize arrlu.

Here is a benchmark script:

using Random

using BenchmarkTools
import TensorCrossInterpolation as TCI

begin
	struct CallableMatrix{T} <: Function
		A::Matrix{T}
	end

	function (c::CallableMatrix)(i, j)
		c.A[i, j]
	end
end

begin
	@benchmark TCI.arrlu(Float64, CallableMatrix(A), size(A), [1], [1]) setup=begin
		A = rand(10, 10)
	end samples=100
end

On main branch:

               _
   _       _ _(_)_     |  Documentation: https://docs.julialang.org
  (_)     | (_) (_)    |
   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
  | | | | | | |/ _` |  |
  | | |_| | | | (_| |  |  Version 1.11.1 (2024-10-16)
 _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
|__/                   |

julia> versioninfo()
Julia Version 1.11.1
Commit 8f5b7ca12ad (2024-10-16 10:53 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: macOS (x86_64-apple-darwin22.4.0)
  CPU: 16 × Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, skylake)
Threads: 1 default, 0 interactive, 1 GC (on 16 virtual cores)
Environment:
  JULIA_PROJECT = @.
  JULIA_PKG_USE_CLI_GIT = true
  JULIA_EDITOR = subl

julia> using Random

julia> using BenchmarkTools

julia> import TensorCrossInterpolation as TCI

julia> begin
               struct CallableMatrix{T} <: Function
                       A::Matrix{T}
               end

               function (c::CallableMatrix)(i, j)
                       c.A[i, j]
               end
       end

julia> begin
               @benchmark TCI.arrlu(Float64, CallableMatrix(A), size(A), [1], [1]) setup=begin
                       A = rand(10, 10)
               end samples=100
       end

BenchmarkTools.Trial: 100 samples with 1 evaluation.
 Range (min … max):  45.684 μs … 90.908 μs  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     56.826 μs              ┊ GC (median):    0.00%
 Time  (mean ± σ):   57.139 μs ±  6.828 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%

        ▂        ▄ █   ▆ ▂  ▂ ▂▄  ▄                            
  ▄▆▄▆▄▄██▄▆▆▁▄▄▆█▆█▆▆▄█▄█▄▆████▆▆█▄▁█▄▄▄▁▆▄▄▆▄▁▄▁▁▁▁▁▄▁▁▁▁▁▄ ▄
  45.7 μs         Histogram: frequency by time        74.3 μs <

 Memory estimate: 118.83 KiB, allocs estimate: 1934.

julia> 

This PR

               _
   _       _ _(_)_     |  Documentation: https://docs.julialang.org
  (_)     | (_) (_)    |
   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
  | | | | | | |/ _` |  |
  | | |_| | | | (_| |  |  Version 1.11.1 (2024-10-16)
 _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
|__/                   |

julia> versioninfo()
Julia Version 1.11.1
Commit 8f5b7ca12ad (2024-10-16 10:53 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: macOS (x86_64-apple-darwin22.4.0)
  CPU: 16 × Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, skylake)
Threads: 1 default, 0 interactive, 1 GC (on 16 virtual cores)
Environment:
  JULIA_PROJECT = @.
  JULIA_PKG_USE_CLI_GIT = true
  JULIA_EDITOR = subl

julia> using Random

julia> using BenchmarkTools

julia> import TensorCrossInterpolation as TCI

julia> begin
               struct CallableMatrix{T} <: Function
                       A::Matrix{T}
               end

               function (c::CallableMatrix)(i, j)
                       c.A[i, j]
               end
       end

julia> begin
               @benchmark TCI.arrlu(Float64, CallableMatrix(A), size(A), [1], [1]) setup=begin
                       A = rand(10, 10)
               end samples=100
       end

BenchmarkTools.Trial: 100 samples with 1 evaluation.
 Range (min … max):  18.551 μs … 44.954 μs  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     22.490 μs              ┊ GC (median):    0.00%
 Time  (mean ± σ):   22.753 μs ±  3.648 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%

  ▂▁  ▁█▁▄▅ █▄▅▂                                               
  ██▃▅█████▆█████▅▃█▆▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃ ▃
  18.6 μs         Histogram: frequency by time        42.9 μs <

 Memory estimate: 29.77 KiB, allocs estimate: 294.

julia> 

@shinaoka shinaoka merged commit a9d9076 into main Nov 11, 2024
@terasakisatoshi terasakisatoshi deleted the terasaki/optimize-arrlu branch November 11, 2024 13:27
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants