From 485584ada215d0c138a056d73c1b90b15a40d681 Mon Sep 17 00:00:00 2001 From: "Documenter.jl" Date: Sat, 11 Nov 2023 17:20:38 +0000 Subject: [PATCH] build based on 004fe70 --- dev/.documenter-siteinfo.json | 2 +- dev/assets/Manifest.toml | 301 ++++++++++++------ dev/examples/ad/index.html | 2 +- dev/examples/bruss/index.html | 2 +- dev/examples/reaction_diffusion/index.html | 2 +- dev/examples/reductions/index.html | 2 +- dev/examples/sde/index.html | 2 +- dev/getting_started/index.html | 154 ++++----- dev/index.html | 109 ++++--- dev/manual/backends/index.html | 2 +- dev/manual/choosing_ensembler/index.html | 2 +- dev/manual/ensemblegpuarray/index.html | 2 +- dev/manual/ensemblegpukernel/index.html | 8 +- dev/manual/optimal_trajectories/index.html | 2 +- dev/tutorials/gpu_ensemble_basic/index.html | 2 +- dev/tutorials/lower_level_api/index.html | 4 +- dev/tutorials/multigpu/index.html | 2 +- dev/tutorials/parallel_callbacks/index.html | 2 +- .../{d2b8e041.svg => 3aa17211.svg} | 64 ++-- dev/tutorials/weak_order_conv_sde/index.html | 2 +- dev/tutorials/within_method_gpu/index.html | 2 +- 21 files changed, 403 insertions(+), 267 deletions(-) rename dev/tutorials/weak_order_conv_sde/{d2b8e041.svg => 3aa17211.svg} (87%) diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json index 24c76d78..3cd3505f 100644 --- a/dev/.documenter-siteinfo.json +++ b/dev/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.9.3","generation_timestamp":"2023-11-02T19:26:35","documenter_version":"1.1.2"}} \ No newline at end of file +{"documenter":{"julia_version":"1.9.3","generation_timestamp":"2023-11-11T17:20:33","documenter_version":"1.1.2"}} \ No newline at end of file diff --git a/dev/assets/Manifest.toml b/dev/assets/Manifest.toml index 7be292ea..19b5a093 100644 --- a/dev/assets/Manifest.toml +++ b/dev/assets/Manifest.toml @@ -30,6 +30,25 @@ git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c" uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" version = "0.4.4" +[[deps.Accessors]] +deps = ["CompositionsBase", "ConstructionBase", "Dates", "InverseFunctions", "LinearAlgebra", "MacroTools", "Test"] +git-tree-sha1 = "a7055b939deae2455aa8a67491e034f735dd08d3" +uuid = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +version = "0.1.33" + + [deps.Accessors.extensions] + AccessorsAxisKeysExt = "AxisKeys" + AccessorsIntervalSetsExt = "IntervalSets" + AccessorsStaticArraysExt = "StaticArrays" + AccessorsStructArraysExt = "StructArrays" + + [deps.Accessors.weakdeps] + AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + Requires = "ae029012-a4dd-5104-9daa-d747884805df" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + [[deps.Adapt]] deps = ["LinearAlgebra", "Requires"] git-tree-sha1 = "02f731463748db57cc2ebfbd9fbc9ce8280d3433" @@ -155,16 +174,21 @@ uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" version = "0.2.4" [[deps.CUDA]] -deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Preferences", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "UnsafeAtomicsLLVM"] -git-tree-sha1 = "968c1365e2992824c3e7a794e30907483f8469a9" +deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "Crayons", "DataFrames", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LLVMLoopInfo", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "NVTX", "Preferences", "PrettyTables", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "Statistics", "UnsafeAtomicsLLVM"] +git-tree-sha1 = "64461b0e9df3069248979113ce8ab6d11bd371cf" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "4.4.1" +version = "5.1.0" +weakdeps = ["ChainRulesCore", "SpecialFunctions"] + + [deps.CUDA.extensions] + ChainRulesCoreExt = "ChainRulesCore" + SpecialFunctionsExt = "SpecialFunctions" [[deps.CUDA_Driver_jll]] deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] -git-tree-sha1 = "498f45593f6ddc0adff64a9310bb6710e851781b" +git-tree-sha1 = "1e42ef1bdb45487ff28de16182c0df4920181dc3" uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc" -version = "0.5.0+1" +version = "0.7.0+0" [[deps.CUDA_Runtime_Discovery]] deps = ["Libdl"] @@ -174,9 +198,9 @@ version = "0.2.2" [[deps.CUDA_Runtime_jll]] deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] -git-tree-sha1 = "5248d9c45712e51e27ba9b30eebec65658c6ce29" +git-tree-sha1 = "92394521ec4582c11d089a3b15b76ef2cb850994" uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" -version = "0.6.0+0" +version = "0.10.0+1" [[deps.Cairo_jll]] deps = ["Artifacts", "Bzip2_jll", "CompilerSupportLibraries_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] @@ -197,9 +221,9 @@ version = "0.3.12" [[deps.ChainRules]] deps = ["Adapt", "ChainRulesCore", "Compat", "Distributed", "GPUArraysCore", "IrrationalConstants", "LinearAlgebra", "Random", "RealDot", "SparseArrays", "SparseInverseSubset", "Statistics", "StructArrays", "SuiteSparse"] -git-tree-sha1 = "7e4f5593e7e1ab923cebc5414f6d5433872cdd19" +git-tree-sha1 = "006cc7170be3e0fa02ccac6d4164a1eee1fc8c27" uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" -version = "1.56.0" +version = "1.58.0" [[deps.ChainRulesCore]] deps = ["Compat", "LinearAlgebra"] @@ -281,13 +305,11 @@ version = "1.0.5+0" git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" uuid = "a33af91c-f02d-484b-be07-31d278c5ca2b" version = "0.1.2" +weakdeps = ["InverseFunctions"] [deps.CompositionsBase.extensions] CompositionsBaseInverseFunctionsExt = "InverseFunctions" - [deps.CompositionsBase.weakdeps] - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - [[deps.ConcreteStructs]] git-tree-sha1 = "f749037478283d372048690eb3b5f92a79432b34" uuid = "2569d6c7-a4a2-43d3-a901-331e8e4be471" @@ -330,11 +352,22 @@ git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" uuid = "adafc99b-e345-5852-983c-f28acb93d879" version = "0.3.1" +[[deps.Crayons]] +git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" +uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +version = "4.1.1" + [[deps.DataAPI]] git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c" uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" version = "1.15.0" +[[deps.DataFrames]] +deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "1.6.1" + [[deps.DataStructures]] deps = ["Compat", "InteractiveUtils", "OrderedCollections"] git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d" @@ -362,14 +395,15 @@ uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" version = "1.9.1" [[deps.DiffEqBase]] -deps = ["ArrayInterface", "ChainRulesCore", "DataStructures", "DocStringExtensions", "EnumX", "EnzymeCore", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "Markdown", "MuladdMacro", "Parameters", "PreallocationTools", "PrecompileTools", "Printf", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "SciMLOperators", "Setfield", "SparseArrays", "Static", "StaticArraysCore", "Statistics", "Tricks", "TruncatedStacktraces", "ZygoteRules"] -git-tree-sha1 = "c8bc8487a7987c13576f25959ac11b25d5da84e2" +deps = ["ArrayInterface", "DataStructures", "DocStringExtensions", "EnumX", "EnzymeCore", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "Markdown", "MuladdMacro", "Parameters", "PreallocationTools", "PrecompileTools", "Printf", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "Setfield", "SparseArrays", "Static", "StaticArraysCore", "Statistics", "Tricks", "TruncatedStacktraces"] +git-tree-sha1 = "de4709e30bd5490435122c4b415b90a812c23fbf" uuid = "2b5f629d-d688-5b77-993f-72d75c75574e" -version = "6.136.0" +version = "6.138.1" [deps.DiffEqBase.extensions] + DiffEqBaseChainRulesCoreExt = "ChainRulesCore" DiffEqBaseDistributionsExt = "Distributions" - DiffEqBaseEnzymeExt = "Enzyme" + DiffEqBaseEnzymeExt = ["ChainRulesCore", "Enzyme"] DiffEqBaseGeneralizedGeneratedExt = "GeneralizedGenerated" DiffEqBaseMPIExt = "MPI" DiffEqBaseMeasurementsExt = "Measurements" @@ -377,9 +411,9 @@ version = "6.136.0" DiffEqBaseReverseDiffExt = "ReverseDiff" DiffEqBaseTrackerExt = "Tracker" DiffEqBaseUnitfulExt = "Unitful" - DiffEqBaseZygoteExt = "Zygote" [deps.DiffEqBase.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" GeneralizedGenerated = "6b9d7cbe-bcb9-11e9-073f-15a7a543e2eb" @@ -389,7 +423,6 @@ version = "6.136.0" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" - Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [[deps.DiffEqCallbacks]] deps = ["DataStructures", "DiffEqBase", "ForwardDiff", "Functors", "LinearAlgebra", "Markdown", "NLsolve", "Parameters", "RecipesBase", "RecursiveArrayTools", "SciMLBase", "StaticArraysCore"] @@ -403,7 +436,7 @@ version = "2.33.1" [[deps.DiffEqGPU]] deps = ["Adapt", "ChainRulesCore", "DiffEqBase", "Distributed", "DocStringExtensions", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LinearSolve", "MuladdMacro", "Parameters", "Random", "RecursiveArrayTools", "Requires", "SciMLBase", "Setfield", "SimpleDiffEq", "StaticArrays", "TOML", "ZygoteRules"] -path = "/var/lib/buildkite-agent/builds/gpuci-6/julialang/diffeqgpu-dot-jl" +path = "/var/lib/buildkite-agent/builds/gpuci-11/julialang/diffeqgpu-dot-jl" uuid = "071ae1c0-96b5-11e9-1965-c90190d839ea" version = "3.2.0" @@ -457,18 +490,20 @@ deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[deps.Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "Test"] -git-tree-sha1 = "3d5873f811f582873bb9871fc9c451784d5dc8c7" +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] +git-tree-sha1 = "a6c00f894f24460379cb7136633cef54ac9f6f4a" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.25.102" +version = "0.25.103" [deps.Distributions.extensions] DistributionsChainRulesCoreExt = "ChainRulesCore" DistributionsDensityInterfaceExt = "DensityInterface" + DistributionsTestExt = "Test" [deps.Distributions.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[deps.DocStringExtensions]] deps = ["LibGit2"] @@ -495,9 +530,9 @@ version = "0.6.8" [[deps.EllipsisNotation]] deps = ["StaticArrayInterface"] -git-tree-sha1 = "d89f0d98f6296a08b73fdfed559f8e86f871cc06" +git-tree-sha1 = "3507300d4343e8e4ad080ad24e335274c2e297a9" uuid = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" -version = "1.7.0" +version = "1.8.0" [[deps.EnumX]] git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237" @@ -558,10 +593,10 @@ uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" version = "0.4.1" [[deps.FFMPEG_jll]] -deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "PCRE2_jll", "Pkg", "Zlib_jll", "libaom_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] -git-tree-sha1 = "74faea50c1d007c85837327f6775bea60b5492dd" +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "PCRE2_jll", "Zlib_jll", "libaom_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "466d45dc38e15794ec7d5d63ec03d776a9aff36e" uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" -version = "4.4.2+2" +version = "4.4.4+1" [[deps.FLoops]] deps = ["BangBang", "Compat", "FLoopsBase", "InitialValues", "JuliaVariables", "MLStyle", "Serialization", "Setfield", "Transducers"] @@ -715,9 +750,9 @@ version = "3.3.8+0" [[deps.GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] -git-tree-sha1 = "2e57b4a4f9cc15e85a24d603256fe08e527f48d1" +git-tree-sha1 = "85d7fb51afb3def5dcb85ad31c3707795c8bccc1" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "8.8.1" +version = "9.1.0" [[deps.GPUArraysCore]] deps = ["Adapt"] @@ -727,9 +762,9 @@ version = "0.1.5" [[deps.GPUCompiler]] deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "72b2e3c2ba583d1a7aa35129e56cf92e07c083e3" +git-tree-sha1 = "a846f297ce9d09ccba02ead0cae70690e072a119" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.21.4" +version = "0.25.0" [[deps.GR]] deps = ["Artifacts", "Base64", "DelimitedFiles", "Downloads", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Preferences", "Printf", "Random", "Serialization", "Sockets", "TOML", "Tar", "Test", "UUIDs", "p7zip_jll"] @@ -829,6 +864,17 @@ git-tree-sha1 = "4da0f88e9a39111c2fa3add390ab15f3a44f3ca3" uuid = "22cec73e-a1b8-11e9-2c92-598750a2cf9c" version = "0.3.1" +[[deps.InlineStrings]] +deps = ["Parsers"] +git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461" +uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" +version = "1.4.0" + +[[deps.IntegerMathUtils]] +git-tree-sha1 = "b8ffb903da9f7b8cf695a8bead8e01814aa24b30" +uuid = "18e54dd8-cb9d-406c-a71d-865a43cbb235" +version = "0.1.2" + [[deps.IntelOpenMP_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "ad37c091f7d7daf900963171600d7c1c5c3ede32" @@ -839,6 +885,17 @@ version = "2023.2.0+0" deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +[[deps.InverseFunctions]] +deps = ["Test"] +git-tree-sha1 = "68772f49f54b479fa88ace904f6127f0a3bb2e46" +uuid = "3587e190-3f89-42d0-90ee-14403ec27112" +version = "0.1.12" + +[[deps.InvertedIndices]] +git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.3.0" + [[deps.IrrationalConstants]] git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" @@ -873,6 +930,12 @@ git-tree-sha1 = "6f2675ef130a300a112286de91973805fcc5ffbc" uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" version = "2.1.91+0" +[[deps.JuliaNVTXCallbacks_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "af433a10f3942e882d3c671aacb203e006a5808f" +uuid = "9c1d0b0a-7046-5b2e-a33f-ea22f176ac7e" +version = "0.2.1+0" + [[deps.JuliaVariables]] deps = ["MLStyle", "NameResolution"] git-tree-sha1 = "49fb3cb53362ddadb4415e9b73926d6b40709e70" @@ -897,9 +960,9 @@ version = "0.4.1" [[deps.KernelAbstractions]] deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] -git-tree-sha1 = "5f1ecfddb6abde48563d08b2cc7e5116ebcd6c27" +git-tree-sha1 = "95063c5bc98ba0c47e75e05ae71f1fed4deac6f6" uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -version = "0.9.10" +version = "0.9.12" weakdeps = ["EnzymeCore"] [deps.KernelAbstractions.extensions] @@ -939,6 +1002,11 @@ git-tree-sha1 = "a84f8f1e8caaaa4e3b4c101306b9e801d3883ace" uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" version = "0.0.27+0" +[[deps.LLVMLoopInfo]] +git-tree-sha1 = "2e5c102cfc41f48ae4740c7eca7743cc7e7b75ea" +uuid = "8b046642-f1f6-4319-8d3c-209ddc03c586" +version = "1.0.0" + [[deps.LLVMOpenMP_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b" @@ -970,6 +1038,12 @@ version = "0.16.1" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" SymEngine = "123dc426-2d89-5057-bbad-38513e3affd8" +[[deps.LatticeRules]] +deps = ["Random"] +git-tree-sha1 = "7f5b02258a3ca0221a6a9710b0a0a2e8fb4957fe" +uuid = "73f95e8e-ec14-4e6a-8b18-0d2e271c4e55" +version = "0.0.1" + [[deps.LayoutPointers]] deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"] git-tree-sha1 = "62edfee3211981241b57ff1cedf4d74d79519277" @@ -1078,10 +1152,10 @@ deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[deps.LinearSolve]] -deps = ["ArrayInterface", "ConcreteStructs", "DocStringExtensions", "EnumX", "EnzymeCore", "FastLapackInterface", "GPUArraysCore", "InteractiveUtils", "KLU", "Krylov", "Libdl", "LinearAlgebra", "MKL", "MKL_jll", "PrecompileTools", "Preferences", "RecursiveFactorization", "Reexport", "Requires", "SciMLBase", "SciMLOperators", "Setfield", "SparseArrays", "Sparspak", "SuiteSparse", "UnPack"] -git-tree-sha1 = "371e4ece4fc1341f54531dde75ae7d0b19257c2c" +deps = ["ArrayInterface", "ConcreteStructs", "DocStringExtensions", "EnumX", "EnzymeCore", "FastLapackInterface", "GPUArraysCore", "InteractiveUtils", "KLU", "Krylov", "Libdl", "LinearAlgebra", "MKL_jll", "PrecompileTools", "Preferences", "RecursiveFactorization", "Reexport", "Requires", "SciMLBase", "SciMLOperators", "Setfield", "SparseArrays", "Sparspak", "UnPack"] +git-tree-sha1 = "525c1af3fce8a8eba3a91920b9c3be7d948ec004" uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" -version = "2.15.0" +version = "2.19.0" [deps.LinearSolve.extensions] LinearSolveBandedMatricesExt = "BandedMatrices" @@ -1145,12 +1219,6 @@ weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] SpecialFunctionsExt = "SpecialFunctions" -[[deps.MKL]] -deps = ["Artifacts", "Libdl", "LinearAlgebra", "MKL_jll"] -git-tree-sha1 = "100521a1d2181cb39036ee1a6955d6b9686bb363" -uuid = "33e6dc65-8f57-5167-99aa-e5a354878fb2" -version = "0.6.1" - [[deps.MKL_jll]] deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] git-tree-sha1 = "eb006abbd7041c28e0d16260e50a24f8f9104913" @@ -1190,10 +1258,10 @@ uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" version = "0.1.2" [[deps.MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "Random", "Sockets"] -git-tree-sha1 = "03a9b9718f5682ecb107ac9f7308991db4ce395b" +deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] +git-tree-sha1 = "f512dc13e64e96f703fd92ce617755ee6b5adf0f" uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.1.7" +version = "1.1.8" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] @@ -1259,6 +1327,18 @@ version = "0.9.7" EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" +[[deps.NVTX]] +deps = ["Colors", "JuliaNVTXCallbacks_jll", "Libdl", "NVTX_jll"] +git-tree-sha1 = "8bc9ce4233be3c63f8dcd78ccaf1b63a9c0baa34" +uuid = "5da4648a-3479-48b8-97b9-01cb529c0a1f" +version = "0.3.3" + +[[deps.NVTX_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ce3269ed42816bf18d500c9f63418d4b0d9f5a3b" +uuid = "e98f9f5b-d649-5603-91fd-7774390e6439" +version = "3.1.0+2" + [[deps.NaNMath]] deps = ["OpenLibm_jll"] git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" @@ -1277,9 +1357,9 @@ version = "1.2.0" [[deps.NonlinearSolve]] deps = ["ADTypes", "ArrayInterface", "ConcreteStructs", "DiffEqBase", "EnumX", "FastBroadcast", "FiniteDiff", "ForwardDiff", "LineSearches", "LinearAlgebra", "LinearSolve", "PrecompileTools", "RecursiveArrayTools", "Reexport", "SciMLBase", "SimpleNonlinearSolve", "SparseArrays", "SparseDiffTools", "StaticArraysCore", "UnPack"] -git-tree-sha1 = "613f5d5b911ab761cc7f41191c197ba3d9ca9d78" +git-tree-sha1 = "f400009287afedef175058e64aadf7d41f593fef" uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" -version = "2.6.0" +version = "2.8.0" [deps.NonlinearSolve.extensions] NonlinearSolveBandedMatricesExt = "BandedMatrices" @@ -1333,9 +1413,9 @@ version = "1.4.1" [[deps.OpenSSL_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "a12e56c72edee3ce6b96667745e6cbbe5498f200" +git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f" uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "1.1.23+0" +version = "3.0.12+0" [[deps.OpenSpecFun_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] @@ -1368,9 +1448,9 @@ version = "1.6.2" [[deps.OrdinaryDiffEq]] deps = ["ADTypes", "Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "IfElse", "InteractiveUtils", "LineSearches", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLNLSolve", "SciMLOperators", "SimpleNonlinearSolve", "SimpleUnPack", "SparseArrays", "SparseDiffTools", "StaticArrayInterface", "StaticArrays", "TruncatedStacktraces"] -git-tree-sha1 = "f4bb6db58944af8d57da3cbb85e8bd96f9d8ea51" +git-tree-sha1 = "760558496b50017d10e1907244dc2c4fa4385963" uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" -version = "6.58.1" +version = "6.59.0" [[deps.PCRE2_jll]] deps = ["Artifacts", "Libdl"] @@ -1379,9 +1459,9 @@ version = "10.42.0+0" [[deps.PDMats]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "66b2fcd977db5329aa35cac121e5b94dd6472198" +git-tree-sha1 = "f6f85a2edb9c356b829934ad3caed2ad0ebbfc99" uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.11.28" +version = "0.11.29" [[deps.PackageExtensionCompat]] git-tree-sha1 = "fb28e33b8a95c4cee25ce296c817d89cc2e53518" @@ -1397,9 +1477,9 @@ version = "0.12.3" [[deps.Parsers]] deps = ["Dates", "PrecompileTools", "UUIDs"] -git-tree-sha1 = "716e24b21538abc91f6205fd1d8363f39b442851" +git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.7.2" +version = "2.8.0" [[deps.Pipe]] git-tree-sha1 = "6842804e7867b115ca9de748a0cf6b364523c16d" @@ -1457,9 +1537,9 @@ version = "0.4.4" [[deps.Polyester]] deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StaticArrayInterface", "StrideArraysCore", "ThreadingUtilities"] -git-tree-sha1 = "398f91235beaac50445557c937ecb0145d171842" +git-tree-sha1 = "fca25670784a1ae44546bcb17288218310af2778" uuid = "f517fe37-dbe3-4b94-8317-1923a5111588" -version = "0.7.8" +version = "0.7.9" [[deps.PolyesterWeave]] deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] @@ -1467,6 +1547,12 @@ git-tree-sha1 = "240d7170f5ffdb285f9427b92333c3463bf65bf6" uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" version = "0.2.1" +[[deps.PooledArrays]] +deps = ["DataAPI", "Future"] +git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "1.4.3" + [[deps.PositiveFactorizations]] deps = ["LinearAlgebra"] git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20" @@ -1500,6 +1586,18 @@ git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4" uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98" version = "0.2.0" +[[deps.PrettyTables]] +deps = ["Crayons", "LaTeXStrings", "Markdown", "Printf", "Reexport", "StringManipulation", "Tables"] +git-tree-sha1 = "6842ce83a836fbbc0cfeca0b5a4de1a4dcbdb8d1" +uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +version = "2.2.8" + +[[deps.Primes]] +deps = ["IntegerMathUtils"] +git-tree-sha1 = "4c9f306e5d6603ae203c2000dd460d81a5251489" +uuid = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae" +version = "0.5.4" + [[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" @@ -1516,9 +1614,9 @@ version = "0.1.4" [[deps.Qt6Base_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Vulkan_Loader_jll", "Xorg_libSM_jll", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_cursor_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "libinput_jll", "xkbcommon_jll"] -git-tree-sha1 = "7c29f0e8c575428bd84dc3c72ece5178caa67336" +git-tree-sha1 = "37b7bb7aabf9a085e0044307e1717436117f2b3b" uuid = "c0090381-4147-56d7-9ebc-da0b1113ec56" -version = "6.5.2+2" +version = "6.5.3+1" [[deps.QuadGK]] deps = ["DataStructures", "LinearAlgebra"] @@ -1526,6 +1624,12 @@ git-tree-sha1 = "9ebcd48c498668c7fa0e97a9cae873fbee7bfee1" uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" version = "2.9.1" +[[deps.QuasiMonteCarlo]] +deps = ["Accessors", "ConcreteStructs", "Distributions", "LatticeRules", "LinearAlgebra", "Primes", "Random", "Sobol", "StatsBase"] +git-tree-sha1 = "5654ee2631b0b291c639b2e6c81bbf06128bce85" +uuid = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" +version = "0.3.2" + [[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" @@ -1584,9 +1688,9 @@ version = "2.38.10" [[deps.RecursiveFactorization]] deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "PrecompileTools", "StrideArraysCore", "TriangularSolve"] -git-tree-sha1 = "2b6d4a40339aa02655b1743f4cd7c03109f520c1" +git-tree-sha1 = "8bc86c78c7d8e2a5fe559e3721c0f9c9e303b2ed" uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" -version = "0.2.20" +version = "0.2.21" [[deps.Reexport]] git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" @@ -1662,12 +1766,13 @@ uuid = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" version = "0.1.0" [[deps.SciMLBase]] -deps = ["ADTypes", "ArrayInterface", "ChainRulesCore", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FillArrays", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables", "TruncatedStacktraces", "ZygoteRules"] -git-tree-sha1 = "9e4c2902c2e8675c3b67eb24fd7f89defcbde9af" +deps = ["ADTypes", "ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FillArrays", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "PrecompileTools", "Preferences", "Printf", "QuasiMonteCarlo", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables", "TruncatedStacktraces"] +git-tree-sha1 = "dd2d18b981d09a2376ba49c5fab480f497992c88" uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462" -version = "2.6.0" +version = "2.8.0" [deps.SciMLBase.extensions] + SciMLBaseChainRulesCoreExt = "ChainRulesCore" SciMLBasePartialFunctionsExt = "PartialFunctions" SciMLBasePyCallExt = "PyCall" SciMLBasePythonCallExt = "PythonCall" @@ -1675,6 +1780,8 @@ version = "2.6.0" SciMLBaseZygoteExt = "Zygote" [deps.SciMLBase.weakdeps] + ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" @@ -1689,9 +1796,9 @@ version = "0.1.9" [[deps.SciMLOperators]] deps = ["ArrayInterface", "DocStringExtensions", "Lazy", "LinearAlgebra", "Setfield", "SparseArrays", "StaticArraysCore", "Tricks"] -git-tree-sha1 = "65c2e6ced6f62ea796af251eb292a0e131a3613b" +git-tree-sha1 = "51ae235ff058a64815e0a2c34b1db7578a06813d" uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" -version = "0.3.6" +version = "0.3.7" [[deps.SciMLSensitivity]] deps = ["ADTypes", "Adapt", "ArrayInterface", "ChainRulesCore", "DiffEqBase", "DiffEqCallbacks", "DiffEqNoiseProcess", "Distributions", "EllipsisNotation", "Enzyme", "FiniteDiff", "ForwardDiff", "FunctionProperties", "FunctionWrappersWrappers", "Functors", "GPUArraysCore", "LinearAlgebra", "LinearSolve", "Markdown", "OrdinaryDiffEq", "Parameters", "PreallocationTools", "QuadGK", "Random", "RandomNumbers", "RecursiveArrayTools", "Reexport", "ReverseDiff", "SciMLBase", "SciMLOperators", "SimpleNonlinearSolve", "SparseDiffTools", "StaticArraysCore", "Statistics", "StochasticDiffEq", "Tracker", "TruncatedStacktraces", "Zygote", "ZygoteRules"] @@ -1705,6 +1812,12 @@ git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" uuid = "6c6a2e73-6563-6170-7368-637461726353" version = "1.2.1" +[[deps.SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.4.1" + [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" @@ -1741,10 +1854,10 @@ uuid = "05bca326-078c-5bf0-a5bf-ce7c7982d7fd" version = "1.11.0" [[deps.SimpleNonlinearSolve]] -deps = ["ArrayInterface", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "PackageExtensionCompat", "PrecompileTools", "Reexport", "SciMLBase", "StaticArraysCore"] -git-tree-sha1 = "15ff97fa4881133caa324dacafe28b5ac47ad8a2" +deps = ["ArrayInterface", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "PrecompileTools", "Reexport", "SciMLBase", "StaticArraysCore"] +git-tree-sha1 = "69b1a53374dd14d7c165d98cb646aeb5f36f8d07" uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7" -version = "0.1.23" +version = "0.1.25" weakdeps = ["NNlib"] [deps.SimpleNonlinearSolve.extensions] @@ -1761,6 +1874,12 @@ git-tree-sha1 = "58e6353e72cde29b90a69527e56df1b5c3d8c437" uuid = "ce78b400-467f-4804-87d8-8f486da07d0a" version = "1.1.0" +[[deps.Sobol]] +deps = ["DelimitedFiles", "Random"] +git-tree-sha1 = "5a74ac22a9daef23705f010f72c81d6925b19df8" +uuid = "ed01d8cd-4d21-5b2a-85b4-cc3bdc58bad4" +version = "1.5.0" + [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" @@ -1776,9 +1895,9 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[deps.SparseDiffTools]] deps = ["ADTypes", "Adapt", "ArrayInterface", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "PackageExtensionCompat", "Random", "Reexport", "SciMLOperators", "Setfield", "SparseArrays", "StaticArrayInterface", "StaticArrays", "Tricks", "UnPack", "VertexSafeGraphs"] -git-tree-sha1 = "bb0ff88a054f2dbf3d54d7630a42b743fcdfa21b" +git-tree-sha1 = "e162b74fd1ce6d371ff5c584b53e34538edb9212" uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804" -version = "2.9.2" +version = "2.11.0" [deps.SparseDiffTools.extensions] SparseDiffToolsEnzymeExt = "Enzyme" @@ -1872,26 +1991,29 @@ deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Re git-tree-sha1 = "f625d686d5a88bcd2b15cd81f18f98186fdc0c9a" uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" version = "1.3.0" +weakdeps = ["ChainRulesCore", "InverseFunctions"] [deps.StatsFuns.extensions] StatsFunsChainRulesCoreExt = "ChainRulesCore" StatsFunsInverseFunctionsExt = "InverseFunctions" - [deps.StatsFuns.weakdeps] - ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - [[deps.StochasticDiffEq]] deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DiffEqNoiseProcess", "DocStringExtensions", "FillArrays", "FiniteDiff", "ForwardDiff", "JumpProcesses", "LevyArea", "LinearAlgebra", "Logging", "MuladdMacro", "NLsolve", "OrdinaryDiffEq", "Random", "RandomNumbers", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"] -git-tree-sha1 = "2a1818923476d496190791b4cf7183e9ccf64a99" +git-tree-sha1 = "7a71f1e67cbcfcd5387707e6621431d1afff62a9" uuid = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0" -version = "6.63.0" +version = "6.63.2" [[deps.StrideArraysCore]] deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface", "ThreadingUtilities"] -git-tree-sha1 = "f02eb61eb5c97b48c153861c72fbbfdddc607e06" +git-tree-sha1 = "e7dd250422df290cee14960c1ee144b44ac3dd77" uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da" -version = "0.4.17" +version = "0.5.1" + +[[deps.StringManipulation]] +deps = ["PrecompileTools"] +git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" +uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" +version = "0.3.4" [[deps.StructArrays]] deps = ["Adapt", "ConstructionBase", "DataAPI", "GPUArraysCore", "StaticArraysCore", "Tables"] @@ -1966,18 +2088,18 @@ version = "0.5.23" [[deps.Tracker]] deps = ["Adapt", "DiffRules", "ForwardDiff", "Functors", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NNlib", "NaNMath", "Optimisers", "Printf", "Random", "Requires", "SpecialFunctions", "Statistics"] -git-tree-sha1 = "994c4977c13e7e2525566e193f82374021fab5dd" +git-tree-sha1 = "4e35f86bcdcdc2f104f763750d6038dc35e598bc" uuid = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" -version = "0.2.27" +version = "0.2.28" weakdeps = ["PDMats"] [deps.Tracker.extensions] TrackerPDMatsExt = "PDMats" [[deps.TranscodingStreams]] -git-tree-sha1 = "49cbf7c74fafaed4c529d47d48c8f7da6a19eb75" +git-tree-sha1 = "1fbeaaca45801b4ba17c251dd8603ef24801dd84" uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.10.1" +version = "0.10.2" weakdeps = ["Random", "Test"] [deps.TranscodingStreams.extensions] @@ -1985,9 +2107,9 @@ weakdeps = ["Random", "Test"] [[deps.Transducers]] deps = ["Adapt", "ArgCheck", "BangBang", "Baselet", "CompositionsBase", "ConstructionBase", "DefineSingletons", "Distributed", "InitialValues", "Logging", "Markdown", "MicroCollections", "Requires", "Setfield", "SplittablesBase", "Tables"] -git-tree-sha1 = "53bd5978b182fa7c57577bdb452c35e5b4fb73a5" +git-tree-sha1 = "e579d3c991938fecbb225699e8f611fa3fbf2141" uuid = "28d57a85-8fef-5791-bfe6-a80928e7c999" -version = "0.4.78" +version = "0.4.79" [deps.Transducers.extensions] TransducersBlockArraysExt = "BlockArrays" @@ -2054,15 +2176,12 @@ deps = ["Dates", "LinearAlgebra", "Random"] git-tree-sha1 = "a72d22c7e13fe2de562feda8645aa134712a87ee" uuid = "1986cc42-f94f-5a68-af5c-568840ba703d" version = "1.17.0" +weakdeps = ["ConstructionBase", "InverseFunctions"] [deps.Unitful.extensions] ConstructionBaseUnitfulExt = "ConstructionBase" InverseFunctionsUnitfulExt = "InverseFunctions" - [deps.Unitful.weakdeps] - ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" - InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" - [[deps.UnitfulLatexify]] deps = ["LaTeXStrings", "Latexify", "Unitful"] git-tree-sha1 = "e2d817cc500e960fdbafcf988ac8436ba3208bfd" @@ -2129,9 +2248,9 @@ version = "1.1.34+0" [[deps.XZ_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "cf2c7de82431ca6f39250d2fc4aacd0daa1675c0" +git-tree-sha1 = "522b8414d40c4cbbab8dee346ac3a09f9768f25d" uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800" -version = "5.4.4+0" +version = "5.4.5+0" [[deps.Xorg_libICE_jll]] deps = ["Libdl", "Pkg"] diff --git a/dev/examples/ad/index.html b/dev/examples/ad/index.html index ff177e83..23bed82b 100644 --- a/dev/examples/ad/index.html +++ b/dev/examples/ad/index.html @@ -66,4 +66,4 @@ @time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10_000, saveat = 1.0f0)
EnsembleSolution Solution of length 10000 with uType:
-SciMLBase.ODESolution{ForwardDiff.Dual{Nothing, Float64, 3}, 2, uType, Nothing, Nothing, Vector{Float32}, rateType, SciMLBase.ODEProblem{Vector{ForwardDiff.Dual{Nothing, Float64, 3}}, Tuple{Float32, Float32}, true, Vector{Float32}, SciMLBase.ODEFunction{true, SciMLBase.FullSpecialize, typeof(Main.lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, IType, SciMLBase.DEStats, Nothing} where {uType, rateType, IType}
+SciMLBase.ODESolution{ForwardDiff.Dual{Nothing, Float64, 3}, 2, uType, Nothing, Nothing, Vector{Float32}, rateType, SciMLBase.ODEProblem{Vector{ForwardDiff.Dual{Nothing, Float64, 3}}, Tuple{Float32, Float32}, true, Vector{Float32}, SciMLBase.ODEFunction{true, SciMLBase.FullSpecialize, typeof(Main.lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, OrdinaryDiffEq.Tsit5{typeof(OrdinaryDiffEq.trivial_limiter!), typeof(OrdinaryDiffEq.trivial_limiter!), Static.False}, IType, SciMLBase.DEStats, Nothing} where {uType, rateType, IType} diff --git a/dev/examples/bruss/index.html b/dev/examples/bruss/index.html index d7e1cc52..98a383a6 100644 --- a/dev/examples/bruss/index.html +++ b/dev/examples/bruss/index.html @@ -89,4 +89,4 @@ 11.5 u: 2-element Vector{CUDA.CuArray{Float64, 3, CUDA.Mem.DeviceBuffer}}: [0.0 0.12134432813715873 … 0.1213443281371586 0.0; 0.0 0.12134432813715873 … 0.1213443281371586 0.0; … ; 0.0 0.12134432813715873 … 0.1213443281371586 0.0; 0.0 0.12134432813715873 … 0.1213443281371586 0.0;;; 0.0 0.0 … 0.0 0.0; 0.14892258453196755 0.14892258453196755 … 0.14892258453196755 0.14892258453196755; … ; 0.14892258453196738 0.14892258453196738 … 0.14892258453196738 0.14892258453196738; 0.0 0.0 … 0.0 0.0] - [3.0478431718176133 3.0478158588958384 … 3.047934407132488 3.0478829076399707; 3.0478921742796787 3.0478615839692695 … 3.0479950350866707 3.0479368431642384; … ; 3.0477574497062734 3.04773545592171 … 3.0478300382484087 3.047789226504652; 3.047797767524695 3.0477733366890103 … 3.047878867861117 3.047833184325156;;; 2.567054744858359 2.5670562676074793 … 2.567049778296592 2.567052560054736; 2.567051711744317 2.56705334622074 … 2.5670463664110166 2.5670493630392817; … ; 2.567060148333675 2.567061482264857 … 2.567055819529447 2.567058240137795; 2.567057591432184 2.567059013089459 … 2.567052966605906 2.567055554756332] + [3.0478431718176133 3.0478158588958384 … 3.047934407132488 3.0478829076399707; 3.0478921742796787 3.0478615839692695 … 3.0479950350866707 3.0479368431642384; … ; 3.0477574497062734 3.04773545592171 … 3.0478300382484087 3.047789226504652; 3.047797767524695 3.0477733366890103 … 3.047878867861117 3.047833184325156;;; 2.567054744858359 2.5670562676074793 … 2.567049778296592 2.567052560054736; 2.567051711744317 2.56705334622074 … 2.5670463664110166 2.5670493630392817; … ; 2.567060148333675 2.567061482264857 … 2.567055819529447 2.567058240137795; 2.567057591432184 2.567059013089459 … 2.567052966605906 2.567055554756332] diff --git a/dev/examples/reaction_diffusion/index.html b/dev/examples/reaction_diffusion/index.html index 115f7b36..e6afaa11 100644 --- a/dev/examples/reaction_diffusion/index.html +++ b/dev/examples/reaction_diffusion/index.html @@ -1,2 +1,2 @@ -GPU-Accelerated Stochastic Partial Differential Equations · DiffEqGPU.jl
+GPU-Accelerated Stochastic Partial Differential Equations · DiffEqGPU.jl
diff --git a/dev/examples/reductions/index.html b/dev/examples/reductions/index.html index f7df4f51..d8f8f5e0 100644 --- a/dev/examples/reductions/index.html +++ b/dev/examples/reductions/index.html @@ -28,4 +28,4 @@ reduction = reduction, u_init = Vector{eltype(prob.u0)}([0.0])) sim4 = solve(prob2, Tsit5(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 100, batch_size = 20)
EnsembleSolution Solution of length 1 with uType:
-Float64
+Float64 diff --git a/dev/examples/sde/index.html b/dev/examples/sde/index.html index 1edd31ed..a9f3ab29 100644 --- a/dev/examples/sde/index.html +++ b/dev/examples/sde/index.html @@ -23,4 +23,4 @@ monteprob = EnsembleProblem(prob, prob_func = prob_func) sol = solve(monteprob, SOSRI(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10_000, saveat = 1.0f0)
EnsembleSolution Solution of length 10000 with uType:
-SciMLBase.RODESolution{Float32, 2, uType, Nothing, Nothing, Vector{Float32}, randType, SciMLBase.SDEProblem{Vector{Float32}, Tuple{Float32, Float32}, true, Vector{Float32}, Nothing, SciMLBase.SDEFunction{true, SciMLBase.FullSpecialize, typeof(Main.lorenz), typeof(Main.multiplicative_noise), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, typeof(Main.multiplicative_noise), Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, Nothing}, StochasticDiffEq.SOSRI, IType, SciMLBase.DEStats, Nothing} where {uType, randType, IType}
+SciMLBase.RODESolution{Float32, 2, uType, Nothing, Nothing, Vector{Float32}, randType, SciMLBase.SDEProblem{Vector{Float32}, Tuple{Float32, Float32}, true, Vector{Float32}, Nothing, SciMLBase.SDEFunction{true, SciMLBase.FullSpecialize, typeof(Main.lorenz), typeof(Main.multiplicative_noise), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, typeof(Main.multiplicative_noise), Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, Nothing}, StochasticDiffEq.SOSRI, IType, SciMLBase.DEStats, Nothing} where {uType, randType, IType} diff --git a/dev/getting_started/index.html b/dev/getting_started/index.html index e996057c..da0a3d8d 100644 --- a/dev/getting_started/index.html +++ b/dev/getting_started/index.html @@ -6,96 +6,96 @@ prob = ODEProblem(f, u0, (0.0, 1.0)) sol = solve(prob, Tsit5())
retcode: Success
 Interpolation: specialized 4th order "free" interpolation
-t: 49-element Vector{Float64}:
+t: 48-element Vector{Float64}:
  0.0
- 0.0011527378461214704
- 0.005204080858925229
- 0.011871419397780506
- 0.019750329295883273
- 0.028881162765730932
- 0.03971964851761266
- 0.051753802045826455
- 0.06529068043344283
- 0.0783981778960967
+ 0.0017460573639805404
+ 0.006704015767905096
+ 0.014319370930348364
+ 0.023221826973707938
+ 0.03452006705349458
+ 0.047072496799016324
+ 0.06120788819832835
+ 0.07600328352138182
+ 0.089887496749854
  ⋮
- 0.7904512678244963
- 0.8159972290539383
- 0.8430570979947353
- 0.869718291757767
- 0.8970109270221073
- 0.9242483575618928
- 0.9518732806883997
- 0.9788357397272859
+ 0.7933992236269642
+ 0.8197223678987577
+ 0.8468102324990433
+ 0.8741066512525104
+ 0.9015907020769398
+ 0.9293423676815272
+ 0.9568614932626144
+ 0.9849342135770022
  1.0
-u: 49-element Vector{Vector{Float64}}:
- [0.9939247976870481, 0.9540436541340336, 0.27152785527728585, 0.15674204277936787, 0.6418697912296284, 0.41537296600582474, 0.5246066692672485, 0.5397517980970151, 0.17835905402507635, 0.5304524200329674  …  0.3085895280603833, 0.7768304029039654, 0.5641162921875568, 0.8447068022369464, 0.006906451785012924, 0.21588285955783848, 0.05037060866914711, 0.4784240508332661, 0.5366280223564113, 0.9008121732689689]
- [0.9928584816051756, 0.9367175479750247, 0.2651571006651849, 0.14651919001557118, 0.6619690979551597, 0.41503257453981157, 0.508467937569531, 0.5237187981193008, 0.15942535529236146, 0.5433086835043012  …  0.2717871273273137, 0.7646146324259679, 0.5312279315171634, 0.8750804971046412, -0.03466455329777871, 0.20808269100314264, 0.07686508230814496, 0.4498502917254031, 0.5001654848529354, 0.8791317872595498]
- [0.988531197236948, 0.8888231333443147, 0.24310857392901852, 0.11633470672461622, 0.7269851775160854, 0.41066150357362285, 0.4452670589560903, 0.4625038476879545, 0.09198410576494656, 0.5871091956221961  …  0.1513555894972209, 0.736726653433933, 0.41134903670641504, 0.9786941052126231, -0.18457557303906102, 0.17678073999871466, 0.16847795459139256, 0.34864324379811035, 0.3676458816129993, 0.7998470354089824]
- [0.9792766159979713, 0.8543317678489944, 0.20826119215245395, 0.07902427185042309, 0.8116408965932985, 0.3936400158297083, 0.3120038809783565, 0.33951318258381846, -0.020011454387416788, 0.6506940873192386  …  -0.017969641759342524, 0.7407396132186418, 0.2028121858666597, 1.1346700288922542, -0.4453072423780087, 0.1104585974896159, 0.3162734152087961, 0.1786091142609957, 0.1309595607161901, 0.6607676744268182]
- [0.9650814226214751, 0.8849161015742372, 0.17039807082283184, 0.03666356084528918, 0.868412608432469, 0.3598337596550622, 0.09035780616984139, 0.14350706104307756, -0.1473986401130777, 0.7045079455600491  …  -0.17566849051175548, 0.8236126128267479, -0.05380291160372907, 1.2844556843614432, -0.7794871105654575, 0.0038424898825845442, 0.4911218493350419, -0.03082673200957795, -0.18710383142640982, 0.48717383554023025]
- [0.9468569283997329, 1.0161837754470031, 0.13329073721355536, -0.041790067200426675, 0.8646591228089205, 0.30498596508584047, -0.281043521351374, -0.179287853806774, -0.27343859879232263, 0.7290289038418997  …  -0.30909374906057446, 1.024215916609383, -0.3527388026619159, 1.3883707804188798, -1.210453770015346, -0.16659357386610144, 0.7014689656399099, -0.29287808716911096, -0.6197014391934963, 0.2812781060564696]
- [0.9350570778557666, 1.3009310524831421, 0.10310568768536431, -0.2280741451511164, 0.7494763877608984, 0.2209207173920376, -0.9234984866111254, -0.7416362070317873, -0.36175911846839887, 0.698752615313529  …  -0.41539399797650683, 1.4091583958739464, -0.693054461331818, 1.368818606225972, -1.80197555237789, -0.4495064957001721, 0.9711084079445877, -0.650993362595809, -1.2351580054096853, 0.04353027350002056]
- [0.9741241072718829, 1.7687459056275365, 0.09484842854048149, -0.6280541930726493, 0.4686112361489025, 0.10210969311464879, -1.9526826923708391, -1.6619415726027398, -0.32638440022908716, 0.591095527588084  …  -0.4993124158441251, 2.026888204639013, -1.035096645266987, 1.0798105793413477, -2.599535246241541, -0.888532321539515, 1.302302278357942, -1.1532851820224672, -2.052252469746723, -0.19171480669213736]
- [1.1961991822023865, 2.462129227074895, 0.13294253617309704, -1.4203864086576585, -0.039274507326557836, -0.08413034609303846, -3.599651439826568, -3.16330681134287, -0.029044768684619388, 0.3919319785586559  …  -0.6162471578085481, 2.9577170963517405, -1.3630168203810875, 0.25039669044134066, -3.7520402051019586, -1.566060881552029, 1.7076791148826482, -1.944760414234377, -3.116594402106619, -0.3917508479631903]
- [1.8116526733732614, 3.281495992811935, 0.249304182408188, -2.6264021819991785, -0.6928790963490769, -0.3835141342274986, -5.813936583820177, -5.175372872845645, 0.6140462030591305, 0.1445725444332641  …  -0.8588656039004277, 4.059796851834279, -1.6253617428559244, -1.3276844691380827, -5.239852862430459, -2.4184948593789244, 2.097370968208895, -3.0839708582127234, -4.232322177872473, -0.497502316818431]
+u: 48-element Vector{Vector{Float64}}:
+ [0.5025392443576809, 0.7591172646546439, 0.22581999791596086, 0.23102511372775658, 0.28051354669381123, 0.49886392696305615, 0.7902067268986426, 0.5064156793039125, 0.7769744394158119, 0.6616782645815115  …  0.8212806189354621, 0.34218927845101166, 0.6878901347084793, 0.9579856348748483, 0.6211972180515419, 0.7639676982658415, 0.31628844071954754, 0.008899371025192448, 0.46213995402621977, 0.9946262761664695]
+ [0.564509883357797, 0.7323700526112732, 0.17317718548104769, 0.21316156484125093, 0.30359203082566916, 0.5083819860821849, 0.8197235509826144, 0.57026720445949, 0.7277738544472674, 0.6686384886639892  …  0.8493064172033837, 0.37495162218621025, 0.7242981415458869, 1.011685288698082, 0.6383594322374649, 0.7744666210738418, 0.33935613500492967, 0.024315829040908843, 0.4932119825582928, 0.9955902356477314]
+ [0.7541481022654971, 0.6512991048643804, 0.02196229509332479, 0.15887703224068625, 0.37467768944864016, 0.5410176874844858, 0.8969276969719753, 0.7574584782219329, 0.5882924222918493, 0.6856686837072739  …  0.9194937429080167, 0.4767556986985899, 0.8092167816226892, 1.1599492514781282, 0.694783908676237, 0.8036408434761888, 0.40836039385463724, 0.06717706588927172, 0.5776322627058214, 1.0002753945135938]
+ [1.0922379684607104, 0.5137629605176164, -0.21381615632681272, 0.0663987914762576, 0.5011444648567268, 0.6033005580667361, 0.9973567397449704, 1.0538359199783025, 0.37449640931501377, 0.707425514557546  …  0.997498611077629, 0.65503096235981, 0.8841451033712505, 1.3750451804886987, 0.8010845873486654, 0.8483206690281013, 0.5288941814218894, 0.13705487464098698, 0.6951799851918488, 1.0156855790093269]
+ [1.576251954723386, 0.33600059667383125, -0.49191454577956945, -0.05381712488523661, 0.6788115002443611, 0.6871061985447604, 1.0899982866955071, 1.397223228117366, 0.12562787038798076, 0.7344316919146228  …  1.034314371464989, 0.8888219939952531, 0.8785289602512726, 1.6071207297944032, 0.9510145047413581, 0.9028321835852599, 0.7023248969086583, 0.24284674075000792, 0.8128518087275947, 1.0482503153365612]
+ [2.3607917349559013, 0.08793434432688176, -0.8449900324260948, -0.2237474569462749, 0.956820729109876, 0.8027559566786635, 1.177978281451744, 1.8023816719217711, -0.1857485550645876, 0.786255817704967  …  0.9728960455123183, 1.207867641519857, 0.7043991685543584, 1.875029125516842, 1.1777828895145017, 0.9739983438404504, 0.9924538060488919, 0.45722066298508784, 0.934137143259855, 1.1034294834944995]
+ [3.501673539048462, -0.2130065599251324, -1.2385730596401328, -0.4365220556429444, 1.3401119881012817, 0.9535648783964389, 1.2546565628829718, 2.192541244873288, -0.5182069235755918, 0.8797695821408044  …  0.7135814834354494, 1.5675537575304153, 0.24846588262688793, 2.1498134051066455, 1.4892675234061596, 1.0345767239889274, 1.4334977036880379, 0.8777146209987329, 1.046426333515253, 1.1425580119251453]
+ [5.185097440554946, -0.5757562130063147, -1.7043320423672532, -0.7093810824467112, 1.8591886279494372, 1.2170651086290019, 1.347020814193673, 2.574324832100407, -0.8594979722687179, 1.0147287862096628  …  0.08536212518328452, 1.9569708720943038, -0.666176367431668, 2.460041000005208, 1.9774583448721346, 1.0047647314783612, 2.111403625379449, 1.7084523523706714, 1.1840279831714489, 1.0477938135665312]
+ [7.466707443085445, -0.9611615397756959, -2.2869622900177387, -1.0277236566876342, 2.461008052191686, 1.7650093175255916, 1.5020357726315234, 3.0199710395361463, -1.1511751488096054, 1.0924115340274838  …  -1.1137897039068945, 2.3577975802811175, -2.1575503553063085, 2.839963202968425, 2.822838885666709, 0.6928251245881981, 3.0559191893375277, 3.1511287537947084, 1.412931181992034, 0.5694218429546043]
+ [10.161228426052084, -1.2884034434014677, -3.058360838138829, -1.3302301274010464, 2.9925600686177787, 2.7800465898945808, 1.7680309688424882, 3.708635338673924, -1.3240279801459596, 0.8978622918646272  …  -2.9597625119608946, 2.8073337537248024, -4.0901582900901605, 3.3191974774594937, 4.260520443614682, -0.10977419147467492, 4.1869292135517355, 5.192303900602119, 1.7892821220496211, -0.5460829197651772]
  ⋮
- [8.682801598133852e9, -1.4746608915196884e9, 6.21655579144842e9, -2.684667048812335e9, -1.2322174256198488e8, -1.2200450097024862e10, -8.319417966721466e9, 1.2637258479359882e9, 7.82380091871658e9, -7.813543076709813e9  …  -4.310942767568423e9, 1.4314903428380554e10, -1.0290264374660053e10, 8.143526710663184e8, -1.1418697012679338e10, -1.7936160360959827e10, -1.822772317207374e10, -2.0685240588137802e10, -9.023667357699337e9, -1.9626530259241673e10]
- [2.2020896703250244e10, -2.4475852283419676e9, 1.285182185040672e10, -6.212817277887419e9, -4.117638667555443e9, -2.6223270584844135e10, -2.0062879712141747e10, 2.7731365379063087e9, 1.4487952560802797e10, -1.9268774382785145e10  …  -6.077289678952021e9, 3.017263342194233e10, -1.9367216720087097e10, 1.3268347494657924e9, -2.1653981051783314e10, -3.798932619311891e10, -3.970022343998541e10, -4.4333254434413055e10, -1.9210701885168533e10, -4.012347410531021e10]
- [5.747653978847355e10, -2.4431256127799416e9, 2.794351952546235e10, -1.4349404594928778e10, -1.866140993040277e10, -5.902004415662274e10, -4.875289318326798e10, 7.566937578895867e9, 2.442863566358015e10, -4.774819269578953e10  …  -5.289188377328218e9, 6.471256480612351e10, -3.571454079710646e10, 2.775713733633406e9, -4.174517274436011e10, -8.256411980719737e10, -8.89402003951055e10, -9.854998251655717e10, -4.189686627534261e10, -8.24929160174308e10]
- [1.4408148186583655e11, 3.772111950779946e9, 6.070176053229744e10, -3.061122144502365e10, -6.197376611113616e10, -1.3205279082856139e11, -1.1193849573896321e11, 2.2338501617888813e10, 3.1105823887415928e10, -1.1202751064300874e11  …  7.326295073078831e9, 1.3296653929310085e11, -6.058560251677729e10, 7.613107379489561e9, -7.78508684274926e10, -1.7453672988916437e11, -1.9434878732622974e11, -2.1418270627342767e11, -8.818213398053195e10, -1.6099744552619955e11]
- [3.5940961127936816e11, 3.3947772734112114e10, 1.3574336373084283e11, -6.0117061869490746e10, -1.865448921495766e11, -3.041577559418816e11, -2.5026141539807104e11, 6.906299148309894e10, 6.322219005637833e9, -2.586029337472415e11  …  6.037446045443994e10, 2.667166845245023e11, -9.329788433528749e10, 2.5246172790761124e10, -1.4332027636635464e11, -3.7075425594113586e11, -4.2931610844660364e11, -4.6753528480905853e11, -1.8355319580152835e11, -3.0321302950456305e11]
- [8.709167311544537e11, 1.3845548385022964e11, 3.052683873097301e11, -9.8103433084552e10, -5.2073969241697754e11, -7.074654056920726e11, -5.302966607394929e11, 2.0960568832545728e11, -1.480637006945285e11, -5.776112477144434e11  …  2.301326195307292e11, 5.047228859678586e11, -1.188815323240054e11, 8.634124244286067e10, -2.5458857089414447e11, -7.80501723995588e11, -9.453163638231323e11, -1.0010534482489354e12, -3.686409453916363e11, -5.323258133976238e11]
- [2.0764644027098005e12, 4.57387911449334e11, 6.954503129685316e11, -9.148443430785612e10, -1.403966113823087e12, -1.6836271655738225e12, -1.0638581006620504e12, 6.284008885428921e11, -7.405054616998577e11, -1.2721596456946047e12  …  7.239909343510253e11, 8.815849730739597e11, -9.208469177215607e10, 2.932490498513618e11, -4.3437499795115753e11, -1.6607724366673872e12, -2.114582327982454e12, -2.1182929179642922e12, -7.159619631256249e11, -8.456877701324573e11]
- [4.704991235461599e12, 1.301253832409049e12, 1.5446032458926536e12, 1.9223849948270166e11, -3.5691465694701987e12, -3.9558285494828936e12, -1.9185534630740007e12, 1.785307278538939e12, -2.5590917221928057e12, -2.704104585059433e12  …  2.0018028035370708e12, 1.29562455111746e12, 8.118758102061253e10, 9.326635310693347e11, -6.789363960198962e11, -3.5039853288991846e12, -4.684132457038829e12, -4.283416265333123e12, -1.2933303877506455e12, -1.0837067632259346e12]
- [8.73769433292214e12, 2.7892519962853926e12, 2.8644999070348403e12, 1.093906037942095e12, -7.282395845298795e12, -7.760816279723397e12, -2.7528026919141133e12, 3.9838646415681763e12, -6.137222473221858e12, -4.86621161932002e12  …  4.254004628117728e12, 1.3489709840619336e12, 4.0914163224700476e11, 2.257322892148912e12, -8.77407695083006e11, -6.382132127370598e12, -8.82310552395867e12, -7.270321292501653e12, -1.943570274226415e12, -8.824872362047236e11]

Translating this to a GPU-based solve of the ODE simply requires moving the arrays for the initial condition, parameters, and caches to the GPU. This looks like:

using OrdinaryDiffEq, CUDA, LinearAlgebra
+ [1.0256829602104752e9, -1.4704778171187458e10, 2.1981078680357273e10, 2.728046486510647e9, -3.8965554995474374e8, -1.447749335318877e10, -1.7131897914888692e9, -1.2108769833331736e10, 2.5804771595372605e9, -5.97699062996193e9  …  -6.3871379351066e9, -5.445078947590716e8, -2.921522973439939e8, 4.079739723522224e9, -5.498859550387484e9, 1.6154816763341454e10, 5.317333099678879e9, 1.6617662511216357e8, -1.2345645368450068e10, 4.76169973413564e9]
+ [4.102800228867718e9, -3.672326208212151e10, 5.475020216527941e10, 7.658825770985814e9, -3.568405174597912e8, -3.3507812993290367e10, -4.403967181116129e9, -2.7932031998569572e10, 1.1255318337792488e10, -1.368932010490691e10  …  -1.469019166101528e10, 5.438825812180547e8, 5.2277687368099794e7, 9.478729372673061e9, -1.2290473866663568e10, 3.553787290635597e10, 1.1708644285273487e10, 3.8614484768554554e9, -3.0705730148001564e10, 1.0522060960447891e10]
+ [1.3817947561808008e10, -9.268165375418817e10, 1.374318502112814e11, 2.129197251163915e10, 2.893515010948268e8, -7.871844055221082e10, -1.125008627634984e10, -6.585869450689052e10, 3.887314551631099e10, -3.1643815858914032e10  …  -3.508144965645493e10, 5.1161846745173235e9, 2.1167713383980234e9, 2.292853905024586e10, -2.764431819782675e10, 7.926522116001724e10, 2.61008683050658e10, 1.7015963406328442e10, -7.828451554089456e10, 2.364893884392461e10]
+ [4.1814531729242485e10, -2.3159075568868976e11, 3.4158257755342914e11, 5.823831034337868e10, 2.151007527094165e9, -1.8482018433352823e11, -2.803942897910188e10, -1.562539099308355e11, 1.2000232578767435e11, -7.241206442294312e10  …  -8.541000574853362e10, 1.9145931214671856e10, 1.021046054210689e10, 5.616160875321385e10, -6.183617402799579e10, 1.764915676785289e11, 5.766487277802272e10, 5.753177788272522e10, -2.0003801830525104e11, 5.329923786386776e10]
+ [1.1807030295874408e11, -5.724466010987032e11, 8.411136826290669e11, 1.5783594222310684e11, 4.223484204556518e9, -4.343527763900816e11, -6.803826896007854e10, -3.730942814426864e11, 3.484278731822776e11, -1.6400028303083655e11  …  -2.111953234433739e11, 5.600318810721533e10, 3.769308232131422e10, 1.3815807722160416e11, -1.3808755593311215e11, 3.9267825542961346e11, 1.2572563562711305e11, 1.7350685215213593e11, -5.1049532989664526e11, 1.2082969449739177e11]
+ [3.1821078552824615e11, -1.4038487030811409e12, 2.0589406535883638e12, 4.265389648797428e11, -4.818921822176788e9, -1.0262488287464955e12, -1.6048572618773578e11, -8.985296712409902e11, 9.773385663041272e11, -3.693573712561595e11  …  -5.295723031233646e11, 1.427346040408889e11, 1.2527870322084915e11, 3.407410802767034e11, -3.0951098295397327e11, 8.767358428484409e11, 2.7023134117407373e11, 4.9236475094975525e11, -1.3018060279495654e12, 2.7742125423667755e11]
+ [8.143907851213302e11, -3.3645728488046655e12, 4.934179767290842e12, 1.1304118562051355e12, -7.79314410428256e10, -2.403655411925356e12, -3.601891999221419e11, -2.1458635515041914e12, 2.634991361729227e12, -8.189403146120977e11  …  -1.3187378194069856e12, 3.20881842399273e11, 3.87319585247303e11, 8.277006669560339e11, -6.870660798827722e11, 1.9405241811125571e12, 5.631066653002656e11, 1.3221918087382383e12, -3.2578287557504756e12, 6.38332137001463e11]
+ [2.0499222004017097e12, -8.088942837119732e12, 1.1874285664147645e13, 3.0203181172361357e12, -4.241629822214363e11, -5.722892285693561e12, -7.787094205238442e11, -5.201839991722113e12, 7.078973889163777e12, -1.8403714893347397e12  …  -3.3347631946879727e12, 6.449794259977246e11, 1.1785977219893228e12, 2.0318654445400923e12, -1.5436217195965964e12, 4.3659962538970474e12, 1.156780461599673e12, 3.5028536585826396e12, -8.207741292307389e12, 1.5136499834303633e12]
+ [3.32305236134565e12, -1.288129659056256e13, 1.8922050548245875e13, 5.093051917195797e12, -9.256290516445881e11, -9.114350883702793e12, -1.1451130313002883e12, -8.353123134099205e12, 1.1936653628706715e13, -2.846825420118581e12  …  -5.473985135947022e12, 8.741497142395105e11, 2.1177029620570251e12, 3.2820481762231885e12, -2.3769978768513574e12, 6.753291397840154e12, 1.6785947135948518e12, 5.846536920707511e12, -1.3411581750173848e13, 2.4217991785766455e12]

Translating this to a GPU-based solve of the ODE simply requires moving the arrays for the initial condition, parameters, and caches to the GPU. This looks like:

using OrdinaryDiffEq, CUDA, LinearAlgebra
 u0 = cu(rand(1000))
 A = cu(randn(1000, 1000))
 f(du, u, p, t) = mul!(du, A, u)
 prob = ODEProblem(f, u0, (0.0f0, 1.0f0)) # Float32 is better on GPUs!
 sol = solve(prob, Tsit5())
retcode: Success
 Interpolation: specialized 4th order "free" interpolation
-t: 50-element Vector{Float32}:
+t: 47-element Vector{Float32}:
  0.0
- 0.0028032633
- 0.00770806
- 0.014659371
- 0.023422156
- 0.034057636
- 0.046285566
- 0.05950984
- 0.074400246
- 0.090368554
+ 0.0026315092
+ 0.0076305955
+ 0.014856268
+ 0.023957422
+ 0.034955107
+ 0.047603913
+ 0.061684184
+ 0.07742522
+ 0.09273072
  ⋮
- 0.8050874
- 0.83047694
- 0.85561347
- 0.88050914
- 0.90504134
- 0.9308001
- 0.95659125
- 0.98157626
+ 0.7925204
+ 0.81982714
+ 0.8483782
+ 0.8775659
+ 0.90655315
+ 0.93570405
+ 0.96486896
+ 0.99425656
  1.0
-u: 50-element Vector{CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}:
- Float32[0.88071316, 0.091738656, 0.5644176, 0.89315474, 0.11550513, 0.9899834, 0.76465535, 0.5730443, 0.5333632, 0.46715936  …  0.51651764, 0.69562453, 0.13039295, 0.017110005, 0.3178709, 0.7581309, 0.32872194, 0.65469325, 0.5585072, 0.12346421]
- Float32[0.88458586, 0.10383208, 0.56542593, 0.9105531, 0.14688143, 0.98178184, 0.725259, 0.59418094, 0.573652, 0.43604097  …  0.46635318, 0.7562544, 0.07489447, 0.09894379, 0.2747469, 0.7218533, 0.30385882, 0.749069, 0.6420995, 0.16706459]
- Float32[0.9013025, 0.11726365, 0.5532294, 0.92281747, 0.20539533, 0.96700597, 0.6402653, 0.6178738, 0.6443927, 0.3957117  …  0.3768051, 0.8760311, -0.033707015, 0.26627496, 0.20992415, 0.63702106, 0.253282, 0.90664876, 0.77402776, 0.24059023]
- Float32[0.9506427, 0.11814861, 0.5062973, 0.8983726, 0.28816503, 0.94372785, 0.48070958, 0.6184501, 0.74748284, 0.37123397  …  0.24583998, 1.076143, -0.20188585, 0.55090535, 0.14055808, 0.4674204, 0.17131874, 1.1111343, 0.93024594, 0.3372425]
- Float32[1.0623785, 0.08638905, 0.4001696, 0.79027975, 0.37505215, 0.9076652, 0.20401168, 0.5563591, 0.8853406, 0.40204  …  0.073421545, 1.3801469, -0.41001603, 0.9741743, 0.090954676, 0.1634308, 0.066957146, 1.3308672, 1.0788912, 0.44552672]
- Float32[1.278569, -0.0058233947, 0.20831701, 0.52600056, 0.428239, 0.8488757, -0.2666873, 0.37851772, 1.0660826, 0.5520373  …  -0.14858639, 1.8276249, -0.5984681, 1.5513371, 0.09369785, -0.359674, -0.020600917, 1.5228127, 1.1977124, 0.56266296]
- Float32[1.6344533, -0.19264252, -0.07817726, 0.015917871, 0.39763498, 0.7514975, -1.030123, 0.039752055, 1.2891968, 0.9227872  …  -0.42609212, 2.4470403, -0.6241573, 2.2441795, 0.20730077, -1.2141467, 0.014517415, 1.5952181, 1.2774434, 0.7059188]
- Float32[2.1275413, -0.50668025, -0.41936472, -0.79362315, 0.27951172, 0.5912294, -2.1819494, -0.4400623, 1.5444939, 1.6427702  …  -0.76444525, 3.2350993, -0.26647314, 2.9609494, 0.52785707, -2.5357955, 0.34722802, 1.3856934, 1.3453207, 0.9431921]
- Float32[2.7601502, -1.0255971, -0.71567386, -1.9411954, 0.20904303, 0.30135608, -3.95969, -0.9587043, 1.8606849, 2.981347  …  -1.2199625, 4.248185, 0.85137504, 3.6808665, 1.2795985, -4.724534, 1.273345, 0.5378595, 1.4889644, 1.4750801]
- Float32[3.416101, -1.8075991, -0.61632633, -3.0732634, 0.7101897, -0.21086018, -6.4749594, -1.0869303, 2.3067675, 5.218714  …  -1.8599517, 5.4480147, 3.2286246, 4.417976, 2.8007317, -8.321874, 3.0981548, -1.6155888, 1.866251, 2.6711357]
+u: 47-element Vector{CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}:
+ Float32[0.7225247, 0.6827509, 0.46744737, 0.26545057, 0.04414476, 0.1619979, 0.49150833, 0.16592944, 0.76445055, 0.7400701  …  0.53425467, 0.59935826, 0.60482174, 0.53545636, 0.63642085, 0.045652226, 0.80447304, 0.58318394, 0.26308885, 0.07304496]
+ Float32[0.70303774, 0.6500045, 0.46938607, 0.2938062, 0.052414216, 0.18511477, 0.5198097, 0.2251908, 0.7827285, 0.72593164  …  0.53357834, 0.5855796, 0.5951858, 0.5525056, 0.5637058, 0.008294081, 0.82217926, 0.57241875, 0.25514564, 0.120787494]
+ Float32[0.65449476, 0.5843905, 0.47518268, 0.34049872, 0.059031248, 0.2386228, 0.5781209, 0.3613446, 0.8345887, 0.696833  …  0.5467951, 0.56020874, 0.5753321, 0.61105466, 0.42069042, -0.036201555, 0.8537953, 0.55060935, 0.24477677, 0.21258187]
+ Float32[0.55849123, 0.48954397, 0.49194542, 0.39145717, 0.04425673, 0.33861044, 0.6668797, 0.608908, 0.9501803, 0.6455572  …  0.59690183, 0.52526, 0.540162, 0.75404626, 0.20365262, -0.043062713, 0.89473045, 0.5171586, 0.23878954, 0.34487927]
+ Float32[0.39368913, 0.38913816, 0.53427094, 0.4265301, -0.02374488, 0.5021369, 0.7717445, 0.996237, 1.1684444, 0.55456257  …  0.7072019, 0.48530567, 0.47538504, 1.025663, -0.08439971, 0.035023384, 0.93824303, 0.47399336, 0.24483716, 0.5046424]
+ Float32[0.1212203, 0.33196568, 0.627032, 0.4188235, -0.19679715, 0.7508257, 0.8610578, 1.5557679, 1.5549736, 0.38067228  …  0.9051922, 0.44912893, 0.34119383, 1.4727477, -0.44742352, 0.23928799, 0.97913486, 0.42191914, 0.27925125, 0.6779353]
+ Float32[-0.32172176, 0.40827894, 0.79903436, 0.32075453, -0.5489147, 1.0917214, 0.87011945, 2.2826025, 2.205685, 0.04414882  …  1.2213372, 0.4403709, 0.053891897, 2.1066384, -0.86733884, 0.59514743, 1.0115663, 0.35704634, 0.39089298, 0.8409999]
+ Float32[-1.0576835, 0.7440239, 1.0715864, 0.044710785, -1.1769041, 1.513891, 0.7003077, 3.1153433, 3.280873, -0.59731704  …  1.7056223, 0.50319934, -0.54874647, 2.8703952, -1.2914454, 1.116311, 1.0329729, 0.250507, 0.70598805, 0.97881895]
+ Float32[-2.3686838, 1.515115, 1.4451475, -0.6041113, -2.222917, 2.0097659, 0.19418256, 3.8930962, 5.1095223, -1.8312967  …  2.478216, 0.7100382, -1.8054675, 3.5927691, -1.5875037, 1.8545676, 1.0488384, -0.0053459853, 1.5245212, 1.1263922]
+ Float32[-4.451754, 2.729189, 1.7861743, -1.8237592, -3.6087675, 2.5278747, -0.73530376, 4.1606746, 7.7941113, -3.8415778  …  3.581097, 1.090233, -3.9780135, 3.8316417, -1.44118, 2.8008797, 1.0765508, -0.5999393, 3.1404438, 1.4147009]
  ⋮
- Float32[-1.2448122f9, 6.120147f9, -2.2411454f10, 2.5883497f9, -1.6325854f10, -1.7176054f10, 1.845867f10, -5.097449f9, -1.0280615f9, 3.1593787f9  …  -1.9029856f10, 1.6097072f10, 1.2417926f10, 2.0032188f10, -1.0834926f10, 2.8556372f9, 3.1497367f9, 7.697759f9, -6.9380485f9, -5.535416f8]
- Float32[-5.054011f9, 1.3453031f10, -4.616155f10, 8.2861164f9, -3.969783f10, -4.1313247f10, 4.0932168f10, -9.638242f9, 4.9811946f8, 8.812047f9  …  -4.5696623f10, 3.8924104f10, 2.6688123f10, 4.5313098f10, -1.5538131f10, 5.968125f9, 6.2155515f9, 2.0368216f10, -1.6654228f10, -6.140038f8]
- Float32[-1.4944735f10, 2.8362789f10, -9.165974f10, 2.815186f10, -9.5766045f10, -9.474463f10, 8.862145f10, -1.800876f10, 7.6212874f9, 2.4890206f10  …  -1.0672122f11, 9.316511f10, 5.550552f10, 9.85828f10, -1.7973344f10, 1.2330889f10, 7.6901f9, 4.9893302f10, -3.677066f10, 1.6863081f9]
- Float32[-3.893791f10, 5.6953594f10, -1.7770832f11, 9.044478f10, -2.2902504f11, -2.0856529f11, 1.8912282f11, -3.359453f10, 3.0586415f10, 6.920851f10  …  -2.425359f11, 2.2062703f11, 1.117672f11, 2.0673906f11, -1.1531223f10, 2.5009125f10, -2.3971753f9, 1.1517141f11, -7.575034f10, 1.3046186f10]
- Float32[-9.4686446f10, 1.0731566f11, -3.412096f11, 2.6793178f11, -5.3960088f11, -4.4109395f11, 3.9954403f11, -6.260374f10, 9.412401f10, 1.8581643f11  …  -5.3513342f11, 5.1393908f11, 2.1738078f11, 4.1722443f11, 1.5812787f10, 4.882664f10, -5.2608455f10, 2.5214506f11, -1.465566f11, 5.201068f10]
- Float32[-2.3425732f11, 1.9188338f11, -6.936081f11, 7.865207f11, -1.3207119f12, -9.422044f11, 8.834734f11, -1.2063414f11, 2.7343546f11, 5.0998555f11  …  -1.2084028f12, 1.2411704f12, 4.2599105f11, 8.479224f11, 8.660919f10, 9.264514f10, -2.3117296f11, 5.5311847f11, -2.7944665f11, 1.7754315f11]
- Float32[-5.7578055f11, 2.9436674f11, -1.4863718f12, 2.1885256f12, -3.2131108f12, -1.963872f12, 1.982272f12, -2.2853788f11, 7.4989955f11, 1.3616497f12  …  -2.6955859f12, 2.9757908f12, 8.1371673f11, 1.6795491f12, 2.162986f11, 1.5407199f11, -7.498414f11, 1.175319f12, -5.1397958f11, 5.3616073f11]
- Float32[-1.3832491f12, 3.0316875f11, -3.320267f12, 5.6475113f12, -7.5383245f12, -3.914882f12, 4.4024636f12, -4.0234097f11, 1.9380348f12, 3.4359225f12  …  -5.81553f12, 6.876512f12, 1.4829776f12, 3.1870962f12, 3.6673844f11, 1.8028655f11, -2.0433033f12, 2.376735f12, -9.128034f11, 1.4484123f12]
- Float32[-2.6601705f12, 3.8654902f10, -6.250732f12, 1.1101971f13, -1.4052883f13, -6.4339533f12, 8.0015576f12, -5.68527f11, 3.8711103f12, 6.6996934f12  …  -1.022768f13, 1.2668507f13, 2.263842f12, 5.060772f12, 3.9652835f11, 6.0590236f10, -4.0504087f12, 3.945264f12, -1.403861f12, 2.9080258f12]

Notice that the solution values sol[i] are CUDA-based arrays, which can be moved back to the CPU using Array(sol[i]).

More details on effective use of within-method GPU parallelism can be found in the within-method GPU parallelism tutorial.

Example of Parameter-Parallelism with GPU Ensemble Methods

On the other side of the spectrum, what if we want to solve tons of small ODEs? For this use case, we would use the ensemble methods to solve the same ODE many times with different parameters. This looks like:

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA
+ Float32[8.824565f9, -2.2353566f10, -1.2284248f9, 4.5821153f9, -1.3628768f10, -1.3454379f10, -1.5564204f10, -8.894392f9, 4.7344553f9, 1.7011054f10  …  2.564758f10, 8.0708495f9, -8.114939f9, 1.1835382f10, 2.0595294f10, 1.4037524f10, 6.3316064f8, -2.574791f10, 2.6456371f10, -6.8841536f8]
+ Float32[1.9375479f10, -5.397361f10, -3.351768f9, 6.6897085f9, -3.193777f10, -3.1437576f10, -3.817595f10, -1.9607966f10, 8.351324f9, 3.7169885f10  …  6.1636473f10, 1.9515163f10, -2.0887106f10, 3.0795528f10, 4.8416838f10, 3.565286f10, 2.3581568f9, -6.16722f10, 6.6675495f10, -9.733023f8]
+ Float32[4.3843035f10, -1.3919184f11, -8.715765f9, 7.8203694f9, -7.565009f10, -7.531131f10, -9.722679f10, -4.5324268f10, 1.2755874f10, 8.3999056f10  …  1.5391844f11, 4.858561f10, -5.523795f10, 8.2276745f10, 1.1691341f11, 9.271944f10, 6.8480077f9, -1.5473071f11, 1.7324961f11, -1.07187494f9]
+ Float32[1.0177321f11, -3.7435595f11, -2.1628383f10, 3.3732529f9, -1.7744788f11, -1.8204808f11, -2.5172009f11, -1.0774445f11, 1.1898215f10, 1.9447846f11  …  3.912012f11, 1.2198168f11, -1.4674493f11, 2.2150658f11, 2.84229f11, 2.4204686f11, 1.6423894f10, -3.983256f11, 4.546249f11, -8.179858f8]
+ Float32[2.3996703f11, -1.01305496f12, -5.0540126f10, -1.7075609f10, -4.0176478f11, -4.3459877f11, -6.438509f11, -2.5632563f11, -1.8193912f10, 4.5436312f11  …  9.845419f11, 3.008135f11, -3.8116187f11, 5.858209f11, 6.782362f11, 6.181776f11, 3.0043122f10, -1.0218094f12, 1.1722572f12, -2.6702508f9]
+ Float32[5.8802314f11, -2.771777f12, -1.1320558f11, -7.253592f10, -8.852779f11, -1.0400577f12, -1.6441394f12, -6.15724f11, -1.5709076f11, 1.0914169f12  …  2.4816176f12, 7.3768206f11, -9.820322f11, 1.5448948f12, 1.607414f12, 1.5662078f12, 2.5168226f10, -2.6365442f12, 3.006812f12, -2.4933814f10]
+ Float32[1.5009452f12, -7.581524f12, -2.4300241f11, -1.817655f11, -1.881664f12, -2.4934715f12, -4.1653518f12, -1.4849096f12, -6.439183f11, 2.698297f12  …  6.235828f12, 1.7931837f12, -2.5046698f12, 4.0506247f12, 3.7717543f12, 3.9250997f12, -1.1668537f11, -6.793232f12, 7.6353965f12, -1.5674465f11]
+ Float32[4.015159f12, -2.078769f13, -5.0495462f11, -3.0769506f11, -3.8434054f12, -6.048011f12, -1.0523076f13, -3.6151765f12, -2.178566f12, 6.9205214f12  …  1.5730337f13, 4.3566521f12, -6.387422f12, 1.0648936f13, 8.833077f12, 9.81156f12, -8.76899f11, -1.755765f13, 1.932217f13, -7.69174f11]
+ Float32[4.8864706f12, -2.5295143f13, -5.8036486f11, -3.1657512f11, -4.3901667f12, -7.1977203f12, -1.259647f13, -4.302969f12, -2.728486f12, 8.346593f12  …  1.8842418f13, 5.178649f12, -7.665734f12, 1.285762f13, 1.0423372f13, 1.1724157f13, -1.2031495f12, -2.1124363f13, 2.3137939f13, -1.0284885f12]

Notice that the solution values sol[i] are CUDA-based arrays, which can be moved back to the CPU using Array(sol[i]).

More details on effective use of within-method GPU parallelism can be found in the within-method GPU parallelism tutorial.

Example of Parameter-Parallelism with GPU Ensemble Methods

On the other side of the spectrum, what if we want to solve tons of small ODEs? For this use case, we would use the ensemble methods to solve the same ODE many times with different parameters. This looks like:

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA
 
 function lorenz(u, p, t)
     σ = p[1]
@@ -116,4 +116,4 @@
 
 sol = solve(monteprob, GPUTsit5(), EnsembleGPUKernel(CUDA.CUDABackend()),
     trajectories = 10_000)
EnsembleSolution Solution of length 10000 with uType:
-SciMLBase.ODESolution{Float32, 2, SubArray{StaticArraysCore.SVector{3, Float32}, 1, Matrix{StaticArraysCore.SVector{3, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, Nothing, SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, DiffEqGPU.ImmutableODEProblem{StaticArraysCore.SVector{3, Float32}, Tuple{Float32, Float32}, false, StaticArraysCore.SVector{3, Float32}, SciMLBase.ODEFunction{false, SciMLBase.AutoSpecialize, typeof(Main.lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, GPUTsit5, SciMLBase.LinearInterpolation{SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, SubArray{StaticArraysCore.SVector{3, Float32}, 1, Matrix{StaticArraysCore.SVector{3, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}}, Nothing, Nothing}

To dig more into this example, see the ensemble GPU solving tutorial.

+SciMLBase.ODESolution{Float32, 2, SubArray{StaticArraysCore.SVector{3, Float32}, 1, Matrix{StaticArraysCore.SVector{3, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, Nothing, SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, DiffEqGPU.ImmutableODEProblem{StaticArraysCore.SVector{3, Float32}, Tuple{Float32, Float32}, false, StaticArraysCore.SVector{3, Float32}, SciMLBase.ODEFunction{false, SciMLBase.AutoSpecialize, typeof(Main.lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, GPUTsit5, SciMLBase.LinearInterpolation{SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, SubArray{StaticArraysCore.SVector{3, Float32}, 1, Matrix{StaticArraysCore.SVector{3, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}}, Nothing, Nothing}

To dig more into this example, see the ensemble GPU solving tutorial.

diff --git a/dev/index.html b/dev/index.html index 8e60c620..e8375606 100644 --- a/dev/index.html +++ b/dev/index.html @@ -1,25 +1,24 @@ DiffEqGPU: Massively Data-Parallel GPU Solving of ODEs · DiffEqGPU.jl

DiffEqGPU: Massively Data-Parallel GPU Solving of ODEs

This library is a component package of the DifferentialEquations.jl ecosystem. It includes functionality for making use of GPUs in the differential equation solvers.

Installation

To install DiffEqGPU.jl, use the Julia package manager:

using Pkg
-Pkg.add("DiffEqGPU")

This will also install all the dependencies, including the CUDA.jl, which will also install all the required versions of CUDA and CuDNN required by these libraries. Note that the same requirements of CUDA.jl apply to DiffEqGPU, such as requiring a GPU with CUDA v11 compatibility. For more information on these requirements, see the requirements of CUDA.jl.

Contributing

Reproducibility

The documentation of this SciML package was built using these direct dependencies,
Status `/var/lib/buildkite-agent/builds/gpuci-6/julialang/diffeqgpu-dot-jl/docs/Project.toml`
+Pkg.add("DiffEqGPU")

This will also install all the dependencies, including the CUDA.jl, which will also install all the required versions of CUDA and CuDNN required by these libraries. Note that the same requirements of CUDA.jl apply to DiffEqGPU, such as requiring a GPU with CUDA v11 compatibility. For more information on these requirements, see the requirements of CUDA.jl.

Contributing

Reproducibility

The documentation of this SciML package was built using these direct dependencies,
Status `/var/lib/buildkite-agent/builds/gpuci-11/julialang/diffeqgpu-dot-jl/docs/Project.toml`
   [79e6a3ab] Adapt v3.7.1
   [6e4b80f9] BenchmarkTools v1.3.2
- [052768ef] CUDA v4.4.1
-  [2b5f629d] DiffEqBase v6.136.0
-  [071ae1c0] DiffEqGPU v3.2.0 `/var/lib/buildkite-agent/builds/gpuci-6/julialang/diffeqgpu-dot-jl`
+  [052768ef] CUDA v5.1.0
+  [2b5f629d] DiffEqBase v6.138.1
+  [071ae1c0] DiffEqGPU v3.2.0 `/var/lib/buildkite-agent/builds/gpuci-11/julialang/diffeqgpu-dot-jl`
   [e30172f5] Documenter v1.1.2
   [587475ba] Flux v0.14.6
   [f6369f11] ForwardDiff v0.10.36
-  [1dea7af3] OrdinaryDiffEq v6.58.1
+  [1dea7af3] OrdinaryDiffEq v6.59.0
   [91a5bcdd] Plots v1.39.0
   [1bc83da4] SafeTestsets v0.1.0
   [1ed8b502] SciMLSensitivity v7.46.0
   [90137ffa] StaticArrays v1.6.5
-  [789caeaf] StochasticDiffEq v6.63.0
+  [789caeaf] StochasticDiffEq v6.63.2
   [8ba89e20] Distributed
   [37e2e46d] LinearAlgebra
   [9a3f8284] Random
-  [10745b16] Statistics v1.9.0
-Info Packages marked with  have new versions available and may be upgradable.
and using this machine and Julia version.
Julia Version 1.9.3
+  [10745b16] Statistics v1.9.0
and using this machine and Julia version.
Julia Version 1.9.3
 Commit bed2cd540a1 (2023-08-24 14:43 UTC)
 Build Info:
   Official https://julialang.org/ release
@@ -29,17 +28,18 @@
   WORD_SIZE: 64
   LIBM: libopenlibm
   LLVM: libLLVM-14.0.6 (ORCJIT, znver2)
-  Threads: 1 on 2 virtual cores
+  Threads: 5 on 2 virtual cores
 Environment:
   JULIA_CPU_THREADS = 2
   JULIA_DEPOT_PATH = /root/.cache/julia-buildkite-plugin/depots/26e4f8df-bbdd-40a2-82e4-24a159795e4b
   LD_LIBRARY_PATH = /usr/local/nvidia/lib:/usr/local/nvidia/lib64
   JULIA_PKG_SERVER =
-  JULIA_IMAGE_THREADS = 1
A more complete overview of all dependencies and their versions is also provided.
Status `/var/lib/buildkite-agent/builds/gpuci-6/julialang/diffeqgpu-dot-jl/docs/Manifest.toml`
+  JULIA_IMAGE_THREADS = 1
A more complete overview of all dependencies and their versions is also provided.
Status `/var/lib/buildkite-agent/builds/gpuci-11/julialang/diffeqgpu-dot-jl/docs/Manifest.toml`
   [47edcb42] ADTypes v0.2.4
   [a4c015fc] ANSIColoredPrinters v0.0.1
   [621f4979] AbstractFFTs v1.5.0
   [1520ce14] AbstractTrees v0.4.4
+  [7d9f7c33] Accessors v0.1.33
   [79e6a3ab] Adapt v3.7.1
   [dce04be8] ArgCheck v2.3.0
   [ec485272] ArnoldiMethod v0.2.0
@@ -53,11 +53,11 @@
   [62783981] BitTwiddlingConvenienceFunctions v0.1.5
  [fa961155] CEnum v0.4.2
   [2a0fbf3d] CPUSummary v0.2.4
- [052768ef] CUDA v4.4.1
+  [052768ef] CUDA v5.1.0
   [1af6417a] CUDA_Runtime_Discovery v0.2.2
   [49dc2e85] Calculus v0.5.1
   [7057c7e9] Cassette v0.3.12
-  [082447d4] ChainRules v1.56.0
+  [082447d4] ChainRules v1.58.0
   [d360d2e6] ChainRulesCore v1.18.0
   [fb6a15b2] CloseOpenIntervals v0.1.12
   [944b1d66] CodecZlib v0.7.3
@@ -75,23 +75,25 @@
   [6add18c4] ContextVariablesX v0.1.3
   [d38c429a] Contour v0.6.2
   [adafc99b] CpuId v0.3.1
+  [a8cc5b0e] Crayons v4.1.1
   [9a962f9c] DataAPI v1.15.0
+  [a93c6f00] DataFrames v1.6.1
   [864edb3b] DataStructures v0.18.15
   [e2d170a0] DataValueInterfaces v1.0.0
   [244e2a9f] DefineSingletons v0.1.2
   [8bb1440f] DelimitedFiles v1.9.1
-  [2b5f629d] DiffEqBase v6.136.0
+  [2b5f629d] DiffEqBase v6.138.1
   [459566f4] DiffEqCallbacks v2.33.1
-  [071ae1c0] DiffEqGPU v3.2.0 `/var/lib/buildkite-agent/builds/gpuci-6/julialang/diffeqgpu-dot-jl`
+  [071ae1c0] DiffEqGPU v3.2.0 `/var/lib/buildkite-agent/builds/gpuci-11/julialang/diffeqgpu-dot-jl`
   [77a26b50] DiffEqNoiseProcess v5.19.0
   [163ba53b] DiffResults v1.1.0
   [b552c78f] DiffRules v1.15.1
   [b4f34e82] Distances v0.10.10
-  [31c24e10] Distributions v0.25.102
+  [31c24e10] Distributions v0.25.103
   [ffbed154] DocStringExtensions v0.9.3
   [e30172f5] Documenter v1.1.2
   [fa6b7ba4] DualNumbers v0.6.8
-  [da5c29d0] EllipsisNotation v1.7.0
+  [da5c29d0] EllipsisNotation v1.8.0
   [4e289a0a] EnumX v1.0.4
   [7da242da] Enzyme v0.11.10
   [f151be2c] EnzymeCore v0.6.3
@@ -114,9 +116,9 @@
   [069b7b12] FunctionWrappers v1.1.3
   [77dc65aa] FunctionWrappersWrappers v0.1.3
   [d9f16b24] Functors v0.4.5
- [0c68f7d7] GPUArrays v8.8.1
+  [0c68f7d7] GPUArrays v9.1.0
   [46192b85] GPUArraysCore v0.1.5
- [61eb1bfa] GPUCompiler v0.21.4
+  [61eb1bfa] GPUCompiler v0.25.0
   [28b8d3ca] GR v0.72.10
   [c145ed77] GenericSchur v0.5.3
   [86223c79] Graphs v1.9.0
@@ -129,6 +131,10 @@
   [615f187c] IfElse v0.1.1
   [d25df0c9] Inflate v0.1.4
   [22cec73e] InitialValues v0.3.1
+  [842dd82b] InlineStrings v1.4.0
+  [18e54dd8] IntegerMathUtils v0.1.2
+  [3587e190] InverseFunctions v0.1.12
+  [41ab1584] InvertedIndices v1.3.0
   [92d709cd] IrrationalConstants v0.2.2
   [82899510] IteratorInterfaceExtensions v1.0.0
   [1019f520] JLFzf v0.1.6
@@ -137,27 +143,28 @@
   [b14d175d] JuliaVariables v0.2.4
   [ccbc3e58] JumpProcesses v9.8.0
   [ef3ab10e] KLU v0.4.1
-  [63c18a36] KernelAbstractions v0.9.10
+  [63c18a36] KernelAbstractions v0.9.12
   [ba0b0d4f] Krylov v0.9.4
   [929cbde3] LLVM v6.4.0
+  [8b046642] LLVMLoopInfo v1.0.0
   [b964fa9f] LaTeXStrings v1.3.1
   [23fbe1c1] Latexify v0.16.1
+  [73f95e8e] LatticeRules v0.0.1
   [10f19ff3] LayoutPointers v0.1.15
   [0e77f7df] LazilyInitializedFields v1.2.1
   [50d2b5c4] Lazy v0.15.1
   [2d8b4e74] LevyArea v1.0.0
   [d3d80556] LineSearches v7.2.0
-  [7ed4a6bd] LinearSolve v2.15.0
+  [7ed4a6bd] LinearSolve v2.19.0
   [2ab3a3ac] LogExpFunctions v0.3.26
   [e6f89c97] LoggingExtras v1.0.3
   [bdcacae8] LoopVectorization v0.12.166
-  [33e6dc65] MKL v0.6.1
   [d8e11817] MLStyle v0.4.17
   [f1d291b0] MLUtils v0.4.3
   [1914dd2f] MacroTools v0.5.11
   [d125e4d3] ManualMemory v0.1.8
   [d0879d2d] MarkdownAST v0.1.2
-  [739be429] MbedTLS v1.1.7
+  [739be429] MbedTLS v1.1.8
   [442fdcdd] Measures v0.3.2
   [128add7d] MicroCollections v0.1.4
   [e1d29d7a] Missings v1.1.0
@@ -165,9 +172,10 @@
   [d41bc354] NLSolversBase v7.8.3
   [2774e3e8] NLsolve v4.5.1
   [872c559c] NNlib v0.9.7
+  [5da4648a] NVTX v0.3.3
   [77ba4419] NaNMath v1.0.2
   [71a1bf82] NameResolution v0.1.5
-  [8913a72c] NonlinearSolve v2.6.0
+  [8913a72c] NonlinearSolve v2.8.0
   [d8793406] ObjectFile v0.4.1
   [6fe1bfb0] OffsetArrays v1.12.10
   [0b1bfda6] OneHotArrays v0.2.4
@@ -175,32 +183,36 @@
   [429524aa] Optim v1.7.8
   [3bd65402] Optimisers v0.3.1
   [bac558e1] OrderedCollections v1.6.2
-  [1dea7af3] OrdinaryDiffEq v6.58.1
-  [90014a1f] PDMats v0.11.28
+  [1dea7af3] OrdinaryDiffEq v6.59.0
+  [90014a1f] PDMats v0.11.29
   [65ce6f38] PackageExtensionCompat v1.0.2
   [d96e819e] Parameters v0.12.3
-  [69de0a69] Parsers v2.7.2
+  [69de0a69] Parsers v2.8.0
   [b98c9c47] Pipe v1.3.0
   [ccf2f8ad] PlotThemes v3.1.0
   [995b91a9] PlotUtils v1.3.5
   [91a5bcdd] Plots v1.39.0
   [e409e4f3] PoissonRandom v0.4.4
-  [f517fe37] Polyester v0.7.8
+  [f517fe37] Polyester v0.7.9
   [1d0040c9] PolyesterWeave v0.2.1
+  [2dfb63ee] PooledArrays v1.4.3
   [85a6dd25] PositiveFactorizations v0.2.4
   [d236fae5] PreallocationTools v0.4.12
   [aea7be01] PrecompileTools v1.2.0
   [21216c6a] Preferences v1.4.1
   [8162dcfd] PrettyPrint v0.2.0
+  [08abe8d2] PrettyTables v2.2.8
+  [27ebfcd6] Primes v0.5.4
   [33c8b6b6] ProgressLogging v0.1.4
   [1fd47b50] QuadGK v2.9.1
+  [8a4e6c94] QuasiMonteCarlo v0.3.2
   [74087812] Random123 v1.6.1
   [e6cf234a] RandomNumbers v1.5.3
   [c1ae055f] RealDot v0.1.0
   [3cdcf5f2] RecipesBase v1.3.4
   [01d81517] RecipesPipeline v0.6.12
   [731186ca] RecursiveArrayTools v2.38.10
-  [f2c3362d] RecursiveFactorization v0.2.20
+  [f2c3362d] RecursiveFactorization v0.2.21
   [189a3867] Reexport v1.2.2
   [2792f1a3] RegistryInstances v0.1.0
   [05181044] RelocatableFolders v1.0.1
@@ -212,21 +224,23 @@
   [94e857df] SIMDTypes v0.1.0
   [476501e8] SLEEFPirates v0.6.42
   [1bc83da4] SafeTestsets v0.1.0
-  [0bca4576] SciMLBase v2.6.0
+  [0bca4576] SciMLBase v2.8.0
   [e9a6253c] SciMLNLSolve v0.1.9
-  [c0aeaf25] SciMLOperators v0.3.6
+  [c0aeaf25] SciMLOperators v0.3.7
   [1ed8b502] SciMLSensitivity v7.46.0
   [6c6a2e73] Scratch v1.2.1
+  [91c51154] SentinelArrays v1.4.1
   [efcf1570] Setfield v1.1.1
   [605ecd9f] ShowCases v0.1.0
   [992d4aef] Showoff v1.0.3
   [777ac1f9] SimpleBufferStream v1.1.0
   [05bca326] SimpleDiffEq v1.11.0
-  [727e6d20] SimpleNonlinearSolve v0.1.23
+  [727e6d20] SimpleNonlinearSolve v0.1.25
   [699a6c99] SimpleTraits v0.9.4
   [ce78b400] SimpleUnPack v1.1.0
+  [ed01d8cd] Sobol v1.5.0
   [a2af1166] SortingAlgorithms v1.2.0
-  [47a9eef4] SparseDiffTools v2.9.2
+  [47a9eef4] SparseDiffTools v2.11.0
   [dc90abb0] SparseInverseSubset v0.1.1
   [e56a9233] Sparspak v0.3.9
   [276daf66] SpecialFunctions v2.3.1
@@ -238,8 +252,9 @@
   [82ae8749] StatsAPI v1.7.0
   [2913bbd2] StatsBase v0.34.2
   [4c63d2b9] StatsFuns v1.3.0
-  [789caeaf] StochasticDiffEq v6.63.0
- [7792a7ef] StrideArraysCore v0.4.17
+  [789caeaf] StochasticDiffEq v6.63.2
+  [7792a7ef] StrideArraysCore v0.5.1
+  [892a3eda] StringManipulation v0.3.4
   [09ab397b] StructArrays v0.6.16
   [53d494c1] StructIO v0.3.0
   [2efcf032] SymbolicIndexingInterface v0.2.2
@@ -248,9 +263,9 @@
   [62fd8b95] TensorCore v0.1.1
   [8290d209] ThreadingUtilities v0.5.2
   [a759f4b9] TimerOutputs v0.5.23
-  [9f7883ad] Tracker v0.2.27
-  [3bb67fe8] TranscodingStreams v0.10.1
-  [28d57a85] Transducers v0.4.78
+  [9f7883ad] Tracker v0.2.28
+  [3bb67fe8] TranscodingStreams v0.10.2
+  [28d57a85] Transducers v0.4.79
   [a2a6695c] TreeViews v0.3.0
   [d5829a12] TriangularSolve v0.1.20
   [410a4b4d] Tricks v0.1.8
@@ -268,13 +283,13 @@
   [e88e6eb3] Zygote v0.6.67
   [700de1a5] ZygoteRules v0.2.4
   [6e34b625] Bzip2_jll v1.0.8+0
- [4ee394cb] CUDA_Driver_jll v0.5.0+1
- [76a88914] CUDA_Runtime_jll v0.6.0+0
+  [4ee394cb] CUDA_Driver_jll v0.7.0+0
+  [76a88914] CUDA_Runtime_jll v0.10.0+1
   [83423d85] Cairo_jll v1.16.1+1
-  [7cc45869] Enzyme_jll v0.0.89+0
+ [7cc45869] Enzyme_jll v0.0.89+0
   [2702e6a9] EpollShim_jll v0.0.20230411+0
   [2e619515] Expat_jll v2.5.0+0
- [b22a6f82] FFMPEG_jll v4.4.2+2
+  [b22a6f82] FFMPEG_jll v4.4.4+1
   [a3f928ae] Fontconfig_jll v2.13.93+0
   [d7e528f0] FreeType2_jll v2.13.1+0
   [559328eb] FriBidi_jll v1.0.10+0
@@ -286,6 +301,7 @@
   [2e76f6c2] HarfBuzz_jll v2.8.1+1
   [1d5cc7b8] IntelOpenMP_jll v2023.2.0+0
   [aacddb02] JpegTurbo_jll v2.1.91+0
+  [9c1d0b0a] JuliaNVTXCallbacks_jll v0.2.1+0
   [c1c5ebd0] LAME_jll v3.100.1+0
   [88015f11] LERC_jll v3.0.0+1
   [dad2f222] LLVMExtra_jll v0.0.27+0
@@ -300,19 +316,20 @@
   [89763e89] Libtiff_jll v4.5.1+1
   [38a345b3] Libuuid_jll v2.36.0+0
   [856f044c] MKL_jll v2023.2.0+0
+  [e98f9f5b] NVTX_jll v3.1.0+2
   [e7412a2a] Ogg_jll v1.3.5+1
- [458c3c95] OpenSSL_jll v1.1.23+0
+  [458c3c95] OpenSSL_jll v3.0.12+0
   [efe28fd5] OpenSpecFun_jll v0.5.5+0
   [91d4177d] Opus_jll v1.3.2+0
   [30392449] Pixman_jll v0.42.2+0
-  [c0090381] Qt6Base_jll v6.5.2+2
+  [c0090381] Qt6Base_jll v6.5.3+1
   [f50d1b31] Rmath_jll v0.4.0+0
   [a44049a8] Vulkan_Loader_jll v1.3.243+0
   [a2964d1f] Wayland_jll v1.21.0+1
   [2381bf8a] Wayland_protocols_jll v1.25.0+0
   [02c8fc9c] XML2_jll v2.11.5+0
   [aed1982a] XSLT_jll v1.1.34+0
-  [ffd25f8a] XZ_jll v5.4.4+0
+  [ffd25f8a] XZ_jll v5.4.5+0
   [f67eecfb] Xorg_libICE_jll v1.0.10+1
   [c834827a] Xorg_libSM_jll v1.2.3+0
   [4f6342f7] Xorg_libX11_jll v1.8.6+0
@@ -339,7 +356,7 @@
   [c5fb5394] Xorg_xtrans_jll v1.5.0+0
   [3161d3a3] Zstd_jll v1.5.5+0
   [35ca27e7] eudev_jll v3.2.9+0
-  [214eeab7] fzf_jll v0.35.1+0
+ [214eeab7] fzf_jll v0.35.1+0
   [1a1c6b14] gperf_jll v3.1.1+0
   [a4ae2306] libaom_jll v3.4.0+0
   [0ac62f75] libass_jll v0.15.1+0
@@ -400,4 +417,4 @@
   [8e850b90] libblastrampoline_jll v5.8.0+0
   [8e850ede] nghttp2_jll v1.48.0+0
   [3f19e933] p7zip_jll v17.4.0+0
-Info Packages marked with  and  have new versions available, but those with  are restricted by compatibility constraints from upgrading. To see why use `status --outdated -m`

You can also download the manifest file and the project file.

+Info Packages marked with have new versions available but compatibility constraints restrict them from upgrading. To see why use `status --outdated -m`

You can also download the manifest file and the project file.

diff --git a/dev/manual/backends/index.html b/dev/manual/backends/index.html index 8c8e0bef..afacf4c3 100644 --- a/dev/manual/backends/index.html +++ b/dev/manual/backends/index.html @@ -1,2 +1,2 @@ -Compute Backends (GPU Choices) · DiffEqGPU.jl

Compute Backends (GPU Choices)

DiffEqGPU.jl supports a multitude of different GPU devices. These must be chosen during the construction of the EnsembleGPUArray and EnsembleGPUKernel construction and correpond to the compute backends of KernelAbstractions.jl. The choices for backends are:

  • CUDA.CUDABackend(): For NVIDIA GPUs via code generation for CUDA kernels.
  • AMDGPU.ROCBackend(): For AMD GPUs via code generation for ROCm kernels.
  • oneAPI.oneAPIBackend(): For Intel GPUs via code generation for OneAPI kernels.
  • Metal.MetalBackend(): For Apple Silicon (M-Series such as M1 or M2) via code generation for Metal kernels.

This is used for example like EnsembleGPUKernel(oneAPI.oneAPIBackend()) to enable the computations for Intel GPUs. The choice of backend is mandatory and requires the installation of the respective package. Thus for example, using the OneAPI backend requires that the user has successfully installed oneAPI.jl and has an Intel GPU.

+Compute Backends (GPU Choices) · DiffEqGPU.jl

Compute Backends (GPU Choices)

DiffEqGPU.jl supports a multitude of different GPU devices. These must be chosen during the construction of the EnsembleGPUArray and EnsembleGPUKernel construction and correpond to the compute backends of KernelAbstractions.jl. The choices for backends are:

  • CUDA.CUDABackend(): For NVIDIA GPUs via code generation for CUDA kernels.
  • AMDGPU.ROCBackend(): For AMD GPUs via code generation for ROCm kernels.
  • oneAPI.oneAPIBackend(): For Intel GPUs via code generation for OneAPI kernels.
  • Metal.MetalBackend(): For Apple Silicon (M-Series such as M1 or M2) via code generation for Metal kernels.

This is used for example like EnsembleGPUKernel(oneAPI.oneAPIBackend()) to enable the computations for Intel GPUs. The choice of backend is mandatory and requires the installation of the respective package. Thus for example, using the OneAPI backend requires that the user has successfully installed oneAPI.jl and has an Intel GPU.

diff --git a/dev/manual/choosing_ensembler/index.html b/dev/manual/choosing_ensembler/index.html index dff19ca6..34c624ee 100644 --- a/dev/manual/choosing_ensembler/index.html +++ b/dev/manual/choosing_ensembler/index.html @@ -1,2 +1,2 @@ -Choosing the Ensemble: EnsembleGPUArray vs EnsembleGPUKernel · DiffEqGPU.jl

Choosing the Ensemble: EnsembleGPUArray vs EnsembleGPUKernel

The short answer for how to choose an ensemble method is that, if EnsembleGPUKernel works on your problem, you should use it. A more complex discussion is the following:

  • EnsembleGPUKernel is more asynchronous and has lower kernel call counts than EnsembleGPUArray. This should amount to lower overhead in any case where the algorithms are the same.
  • EnsembleGPUKernel is restrictive on the types of ODE solvers that have been implemented for it. If the most efficient method is not in the list of GPU kernel methods, it may be more efficient to use EnsembleGPUArray with the better method.
  • EnsembleGPUKernel requires equations to be written in out-of-place form, along with a few other restrictions, and thus in some cases can be less automatic than EnsembleGPUArray depending on how the code was originally written.
+Choosing the Ensemble: EnsembleGPUArray vs EnsembleGPUKernel · DiffEqGPU.jl

Choosing the Ensemble: EnsembleGPUArray vs EnsembleGPUKernel

The short answer for how to choose an ensemble method is that, if EnsembleGPUKernel works on your problem, you should use it. A more complex discussion is the following:

  • EnsembleGPUKernel is more asynchronous and has lower kernel call counts than EnsembleGPUArray. This should amount to lower overhead in any case where the algorithms are the same.
  • EnsembleGPUKernel is restrictive on the types of ODE solvers that have been implemented for it. If the most efficient method is not in the list of GPU kernel methods, it may be more efficient to use EnsembleGPUArray with the better method.
  • EnsembleGPUKernel requires equations to be written in out-of-place form, along with a few other restrictions, and thus in some cases can be less automatic than EnsembleGPUArray depending on how the code was originally written.
diff --git a/dev/manual/ensemblegpuarray/index.html b/dev/manual/ensemblegpuarray/index.html index c741b175..d00e9a02 100644 --- a/dev/manual/ensemblegpuarray/index.html +++ b/dev/manual/ensemblegpuarray/index.html @@ -12,4 +12,4 @@ prob = ODEProblem(lorenz,u0,tspan,p) prob_func = (prob,i,repeat) -> remake(prob,p=rand(Float32,3).*p) monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy=false) -@time sol = solve(monteprob,Tsit5(),EnsembleGPUArray(CUDADevice()),trajectories=10_000,saveat=1.0f0)source
DiffEqGPU.EnsembleCPUArrayType
EnsembleCPUArray(cpu_offload = 0.2)

An EnsembleArrayAlgorithm which utilizes the CPU kernels to parallelize each ODE solve with their separate ODE integrator on each kernel. This method is meant to be a debugging counterpart to EnsembleGPUArray, having the same behavior and using the same KernelAbstractions.jl process to build the combined ODE, but without the restrictions of f being a GPU-compatible kernel function.

It is unlikely that this method is useful beyond library development and debugging, as almost any case should be faster with EnsembleThreads or EnsembleDistributed.

source
+@time sol = solve(monteprob,Tsit5(),EnsembleGPUArray(CUDADevice()),trajectories=10_000,saveat=1.0f0)source
DiffEqGPU.EnsembleCPUArrayType
EnsembleCPUArray(cpu_offload = 0.2)

An EnsembleArrayAlgorithm which utilizes the CPU kernels to parallelize each ODE solve with their separate ODE integrator on each kernel. This method is meant to be a debugging counterpart to EnsembleGPUArray, having the same behavior and using the same KernelAbstractions.jl process to build the combined ODE, but without the restrictions of f being a GPU-compatible kernel function.

It is unlikely that this method is useful beyond library development and debugging, as almost any case should be faster with EnsembleThreads or EnsembleDistributed.

source
diff --git a/dev/manual/ensemblegpukernel/index.html b/dev/manual/ensemblegpukernel/index.html index e2687feb..46aec3af 100644 --- a/dev/manual/ensemblegpukernel/index.html +++ b/dev/manual/ensemblegpukernel/index.html @@ -19,13 +19,13 @@ monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false) @time sol = solve(monteprob, GPUTsit5(), EnsembleGPUKernel(), trajectories = 10_000, - adaptive = false, dt = 0.1f0)source

Specialized Solvers

DiffEqGPU.GPUTsit5Type

GPUTsit5()

A specialized implementation of the 5th order Tsit5 method specifically for kernel generation with EnsembleGPUKernel. For a similar CPU implementation, see SimpleATsit5 from SimpleDiffEq.jl.

source
DiffEqGPU.GPUVern7Type

GPUVern7()

A specialized implementation of the 7th order Vern7 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUVern9Type

GPUVern9()

A specialized implementation of the 9th order Vern9 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUEMType

GPUEM()

A specialized implementation of the Euler-Maruyama GPUEM method with weak order 1.0. Made specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUSIEAType

GPUSIEA()

A specialized implementation of the weak order 2.0 for Ito SDEs GPUSIEA method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPURosenbrock23Type

GPURosenbrock23()

A specialized implementation of the W-method Rosenbrock23 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPURodas4Type

GPURodas4()

A specialized implementation of the Rodas4 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPURodas5PType

GPURodas5P()

A specialized implementation of the Rodas5P method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUKvaerno3Type

GPUKvaerno3()

A specialized implementation of the Kvaerno3 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUKvaerno5Type

GPUKvaerno5()

A specialized implementation of the Kvaerno5 method specifically for kernel generation with EnsembleGPUKernel.

source

Lower Level API

DiffEqGPU.vectorized_solveFunction
vectorized_solve(probs, prob::Union{ODEProblem, SDEProblem}alg;
+                  adaptive = false, dt = 0.1f0)
source

Specialized Solvers

DiffEqGPU.GPUTsit5Type

GPUTsit5()

A specialized implementation of the 5th order Tsit5 method specifically for kernel generation with EnsembleGPUKernel. For a similar CPU implementation, see SimpleATsit5 from SimpleDiffEq.jl.

source
DiffEqGPU.GPUVern7Type

GPUVern7()

A specialized implementation of the 7th order Vern7 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUVern9Type

GPUVern9()

A specialized implementation of the 9th order Vern9 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUEMType

GPUEM()

A specialized implementation of the Euler-Maruyama GPUEM method with weak order 1.0. Made specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUSIEAType

GPUSIEA()

A specialized implementation of the weak order 2.0 for Ito SDEs GPUSIEA method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPURosenbrock23Type

GPURosenbrock23()

A specialized implementation of the W-method Rosenbrock23 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPURodas4Type

GPURodas4()

A specialized implementation of the Rodas4 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPURodas5PType

GPURodas5P()

A specialized implementation of the Rodas5P method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUKvaerno3Type

GPUKvaerno3()

A specialized implementation of the Kvaerno3 method specifically for kernel generation with EnsembleGPUKernel.

source
DiffEqGPU.GPUKvaerno5Type

GPUKvaerno5()

A specialized implementation of the Kvaerno5 method specifically for kernel generation with EnsembleGPUKernel.

source

Lower Level API

DiffEqGPU.vectorized_solveFunction
vectorized_solve(probs, prob::Union{ODEProblem, SDEProblem}alg;
                  dt, saveat = nothing,
                  save_everystep = true,
-                 debug = false, callback = CallbackSet(nothing), tstops = nothing)

A lower level interface to the kernel generation solvers of EnsembleGPUKernel with fixed time-stepping.

Arguments

  • probs: the GPU-setup problems generated by the ensemble.
  • prob: the quintessential problem form. Can be just probs[1]
  • alg: the kernel-based differential equation solver. Must be one of the EnsembleGPUKernel specialized methods.

Keyword Arguments

Only a subset of the common solver arguments are supported.

source
DiffEqGPU.vectorized_asolveFunction
vectorized_asolve(probs, prob::ODEProblem, alg;
+                 debug = false, callback = CallbackSet(nothing), tstops = nothing)

A lower level interface to the kernel generation solvers of EnsembleGPUKernel with fixed time-stepping.

Arguments

  • probs: the GPU-setup problems generated by the ensemble.
  • prob: the quintessential problem form. Can be just probs[1]
  • alg: the kernel-based differential equation solver. Must be one of the EnsembleGPUKernel specialized methods.

Keyword Arguments

Only a subset of the common solver arguments are supported.

source
DiffEqGPU.vectorized_asolveFunction
vectorized_asolve(probs, prob::ODEProblem, alg;
                   dt = 0.1f0, saveat = nothing,
                   save_everystep = false,
                   abstol = 1.0f-6, reltol = 1.0f-3,
-                  callback = CallbackSet(nothing), tstops = nothing)

A lower level interface to the kernel generation solvers of EnsembleGPUKernel with adaptive time-stepping.

Arguments

  • probs: the GPU-setup problems generated by the ensemble.
  • prob: the quintessential problem form. Can be just probs[1]
  • alg: the kernel-based differential equation solver. Must be one of the EnsembleGPUKernel specialized methods.

Keyword Arguments

Only a subset of the common solver arguments are supported.

source
DiffEqGPU.vectorized_map_solveFunction

Lower level API for EnsembleArrayAlgorithm. Avoids conversion of solution to CPU arrays.

vectorized_map_solve(probs, alg,
+                  callback = CallbackSet(nothing), tstops = nothing)

A lower level interface to the kernel generation solvers of EnsembleGPUKernel with adaptive time-stepping.

Arguments

  • probs: the GPU-setup problems generated by the ensemble.
  • prob: the quintessential problem form. Can be just probs[1]
  • alg: the kernel-based differential equation solver. Must be one of the EnsembleGPUKernel specialized methods.

Keyword Arguments

Only a subset of the common solver arguments are supported.

source
DiffEqGPU.vectorized_map_solveFunction

Lower level API for EnsembleArrayAlgorithm. Avoids conversion of solution to CPU arrays.

vectorized_map_solve(probs, alg,
                      ensemblealg::Union{EnsembleArrayAlgorithm}, I,
-                     adaptive)

Arguments

  • probs: the GPU-setup problems generated by the ensemble.
  • alg: the kernel-based differential equation solver. Most of the solvers from OrdinaryDiffEq.jl are supported.
  • ensemblealg: The EnsembleGPUArray() algorithm.
  • I: The iterator argument. Can be set to for e.g. 1:10_000 to simulate 10,000 trajectories.
  • adaptive: The Boolean argument for time-stepping. Use true to enable adaptive time-stepping.

Keyword Arguments

Only a subset of the common solver arguments are supported.

source
+ adaptive)

Arguments

Keyword Arguments

Only a subset of the common solver arguments are supported.

source diff --git a/dev/manual/optimal_trajectories/index.html b/dev/manual/optimal_trajectories/index.html index 59bf4def..2a417c35 100644 --- a/dev/manual/optimal_trajectories/index.html +++ b/dev/manual/optimal_trajectories/index.html @@ -1,2 +1,2 @@ -Choosing Optimal Numbers of Trajectories · DiffEqGPU.jl

Choosing Optimal Numbers of Trajectories

There is a balance between two things for choosing the number of trajectories:

  • The number of trajectories needs to be high enough that the work per kernel is sufficient to overcome the kernel call cost.
  • More trajectories means that every trajectory will need more time steps, since the adaptivity syncs all solves.

From our testing, the balance is found at around 10,000 trajectories being optimal for EnsembleGPUArray, since it has higher kernel call costs because every internal operation of the ODE solver requires a kernel call. Thus, for larger sets of trajectories, use a batch size of 10,000. Of course, benchmark for yourself on your own setup, as all GPUs are different.

On the other hand, EnsembleGPUKernel fuses the entire GPU solve into a single kernel, greatly reducing the kernel call cost. This means longer or more expensive ODE solves will require even less of a percentage of time kernel launching, making the cutoff much smaller. We see some cases with around 100 ODEs being viable with EnsembleGPUKernel. Again, this is highly dependent on the ODE and the chosen GPU and thus one will need to benchmark to get accurate numbers for their system, this is merely a ballpark estimate.

+Choosing Optimal Numbers of Trajectories · DiffEqGPU.jl

Choosing Optimal Numbers of Trajectories

There is a balance between two things for choosing the number of trajectories:

  • The number of trajectories needs to be high enough that the work per kernel is sufficient to overcome the kernel call cost.
  • More trajectories means that every trajectory will need more time steps, since the adaptivity syncs all solves.

From our testing, the balance is found at around 10,000 trajectories being optimal for EnsembleGPUArray, since it has higher kernel call costs because every internal operation of the ODE solver requires a kernel call. Thus, for larger sets of trajectories, use a batch size of 10,000. Of course, benchmark for yourself on your own setup, as all GPUs are different.

On the other hand, EnsembleGPUKernel fuses the entire GPU solve into a single kernel, greatly reducing the kernel call cost. This means longer or more expensive ODE solves will require even less of a percentage of time kernel launching, making the cutoff much smaller. We see some cases with around 100 ODEs being viable with EnsembleGPUKernel. Again, this is highly dependent on the ODE and the chosen GPU and thus one will need to benchmark to get accurate numbers for their system, this is merely a ballpark estimate.

diff --git a/dev/tutorials/gpu_ensemble_basic/index.html b/dev/tutorials/gpu_ensemble_basic/index.html index d7bdd839..afb8f1d5 100644 --- a/dev/tutorials/gpu_ensemble_basic/index.html +++ b/dev/tutorials/gpu_ensemble_basic/index.html @@ -67,4 +67,4 @@ solve(monteprob_jac, Rodas5(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10_000, saveat = 1.0f0)
EnsembleSolution Solution of length 10000 with uType:
-SciMLBase.ODESolution{Float32, 2, uType, Nothing, Nothing, Vector{Float32}, rateType, SciMLBase.ODEProblem{Vector{Float32}, Tuple{Float32, Float32}, true, StaticArraysCore.SizedVector{3, Float32, Vector{Float32}}, SciMLBase.ODEFunction{true, SciMLBase.FullSpecialize, typeof(Main.lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, typeof(Main.lorenz_tgrad), typeof(Main.lorenz_jac), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, A, IType, SciMLBase.DEStats, Nothing} where {uType, rateType, A, IType}
+SciMLBase.ODESolution{Float32, 2, uType, Nothing, Nothing, Vector{Float32}, rateType, SciMLBase.ODEProblem{Vector{Float32}, Tuple{Float32, Float32}, true, StaticArraysCore.SizedVector{3, Float32, Vector{Float32}}, SciMLBase.ODEFunction{true, SciMLBase.FullSpecialize, typeof(Main.lorenz), LinearAlgebra.UniformScaling{Bool}, Nothing, typeof(Main.lorenz_tgrad), typeof(Main.lorenz_jac), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, A, IType, SciMLBase.DEStats, Nothing} where {uType, rateType, A, IType} diff --git a/dev/tutorials/lower_level_api/index.html b/dev/tutorials/lower_level_api/index.html index d317bf2d..fcf08865 100644 --- a/dev/tutorials/lower_level_api/index.html +++ b/dev/tutorials/lower_level_api/index.html @@ -41,7 +41,7 @@ save_everystep = false, dt = 0.1f0) @time CUDA.@sync ts, us = DiffEqGPU.vectorized_asolve(probs, prob, GPUTsit5(); - save_everystep = false, dt = 0.1f0)
(Float32[0.0 0.0 … 0.0 0.0; 10.0 10.0 … 10.0 10.0], StaticArraysCore.SVector{3, Float32}[[1.0, 0.0, 0.0] [1.0, 0.0, 0.0] … [1.0, 0.0, 0.0] [1.0, 0.0, 0.0]; [-5.147485, -4.1040325, 19.911242] [-3.1049283, 5.6219974, 31.800125] … [3.219528, 3.480632, 6.593501] [7.060069, 7.0421886, 25.37912]])

Note that the core is the function DiffEqGPU.vectorized_solve which is the solver for the CUDA-based probs which uses the manually converted problems, and returns us which is a vector of CuArrays for the solution.

Similarily, there exists a lower-level API for EnsembleGPUArray as well, primarily for benchmarking purposes. The solution returned for state (sol.u) is a matrix having columns as different parameter-parallel solutions for the ensemble problem. An example is shown below:

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, DiffEqBase
+    save_everystep = false, dt = 0.1f0)
(Float32[0.0 0.0 … 0.0 0.0; 10.0 10.0 … 10.0 10.0], StaticArraysCore.SVector{3, Float32}[[1.0, 0.0, 0.0] [1.0, 0.0, 0.0] … [1.0, 0.0, 0.0] [1.0, 0.0, 0.0]; [4.1197577, 4.0986214, 7.6984396] [-4.7105184, -4.3388095, 11.242742] … [5.5972967, 5.551909, 15.244406] [-12.324175, -21.945198, 25.919806]])

Note that the core is the function DiffEqGPU.vectorized_solve which is the solver for the CUDA-based probs which uses the manually converted problems, and returns us which is a vector of CuArrays for the solution.

Similarily, there exists a lower-level API for EnsembleGPUArray as well, primarily for benchmarking purposes. The solution returned for state (sol.u) is a matrix having columns as different parameter-parallel solutions for the ensemble problem. An example is shown below:

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, CUDA, DiffEqBase
 
 trajectories = 10_000
 
@@ -82,4 +82,4 @@
  10.0
 u: 2-element Vector{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}:
  [1.0 1.0 … 1.0 1.0; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]
- [3.5219166 -0.15447281 … 0.10218355 -0.00015773717; 3.485247 0.035138115 … 0.041242808 -0.00021488701; 7.129202 20.22961 … 13.62647 24.894169]
+ [-4.9595494 4.181431 … 3.7653115f-7 2.4110434; -5.242519 3.9485211 … 3.404836f-7 2.42549; 13.229973 10.301614 … 5.2345487f-13 5.053595] diff --git a/dev/tutorials/multigpu/index.html b/dev/tutorials/multigpu/index.html index 0cecbc63..6ca5c0b7 100644 --- a/dev/tutorials/multigpu/index.html +++ b/dev/tutorials/multigpu/index.html @@ -71,4 +71,4 @@ @time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 100_000, - batch_size = 50_000, saveat = 1.0f0) + batch_size = 50_000, saveat = 1.0f0) diff --git a/dev/tutorials/parallel_callbacks/index.html b/dev/tutorials/parallel_callbacks/index.html index 1c54ff9a..c63c4852 100644 --- a/dev/tutorials/parallel_callbacks/index.html +++ b/dev/tutorials/parallel_callbacks/index.html @@ -19,4 +19,4 @@ trajectories = 10, adaptive = false, dt = 0.01f0, callback = gpu_cb, merge_callbacks = true, tstops = [4.0f0])
EnsembleSolution Solution of length 10 with uType:
-SciMLBase.ODESolution{Float32, 2, SubArray{StaticArraysCore.SVector{1, Float32}, 1, Matrix{StaticArraysCore.SVector{1, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, Nothing, SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, DiffEqGPU.ImmutableODEProblem{StaticArraysCore.SVector{1, Float32}, Tuple{Float32, Float32}, false, SciMLBase.NullParameters, SciMLBase.ODEFunction{false, SciMLBase.AutoSpecialize, typeof(Main.f), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, GPUTsit5, SciMLBase.LinearInterpolation{SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, SubArray{StaticArraysCore.SVector{1, Float32}, 1, Matrix{StaticArraysCore.SVector{1, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}}, Nothing, Nothing}
+SciMLBase.ODESolution{Float32, 2, SubArray{StaticArraysCore.SVector{1, Float32}, 1, Matrix{StaticArraysCore.SVector{1, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, Nothing, SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, Nothing, DiffEqGPU.ImmutableODEProblem{StaticArraysCore.SVector{1, Float32}, Tuple{Float32, Float32}, false, SciMLBase.NullParameters, SciMLBase.ODEFunction{false, SciMLBase.AutoSpecialize, typeof(Main.f), LinearAlgebra.UniformScaling{Bool}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED), Nothing, Nothing}, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, SciMLBase.StandardODEProblem}, GPUTsit5, SciMLBase.LinearInterpolation{SubArray{Float32, 1, Matrix{Float32}, Tuple{UnitRange{Int64}, Int64}, true}, SubArray{StaticArraysCore.SVector{1, Float32}, 1, Matrix{StaticArraysCore.SVector{1, Float32}}, Tuple{UnitRange{Int64}, Int64}, true}}, Nothing, Nothing} diff --git a/dev/tutorials/weak_order_conv_sde/d2b8e041.svg b/dev/tutorials/weak_order_conv_sde/3aa17211.svg similarity index 87% rename from dev/tutorials/weak_order_conv_sde/d2b8e041.svg rename to dev/tutorials/weak_order_conv_sde/3aa17211.svg index b41b7dc6..d6aa320b 100644 --- a/dev/tutorials/weak_order_conv_sde/d2b8e041.svg +++ b/dev/tutorials/weak_order_conv_sde/3aa17211.svg @@ -1,46 +1,46 @@ - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/weak_order_conv_sde/index.html b/dev/tutorials/weak_order_conv_sde/index.html index 0749ceaa..106630b8 100644 --- a/dev/tutorials/weak_order_conv_sde/index.html +++ b/dev/tutorials/weak_order_conv_sde/index.html @@ -30,4 +30,4 @@ plot(ts, us_expect, lw = 5, xaxis = "Time (t)", yaxis = "y(t)", label = "True Expected value") -plot!(ts, us_calc, lw = 3, ls = :dash, label = "Caculated Expected value")Example block output +plot!(ts, us_calc, lw = 3, ls = :dash, label = "Caculated Expected value")Example block output diff --git a/dev/tutorials/within_method_gpu/index.html b/dev/tutorials/within_method_gpu/index.html index 3dd381a2..0ff975cf 100644 --- a/dev/tutorials/within_method_gpu/index.html +++ b/dev/tutorials/within_method_gpu/index.html @@ -53,4 +53,4 @@ Float32[0.001946243, -0.0012600849, -0.0011850193] Float32[0.0017297472, -0.000664239, -0.0016438798] Float32[0.001377614, -5.466292f-5, -0.0019241166] - Float32[0.0013233931, 2.3381774f-5, -0.0019467642]

Notice that both stiff and non-stiff ODE solvers were used here.

Note

Time span was changed to Float32 types, as GPUs generally have very slow Float64 operations, usually around 1/32 of the speed of Float32. cu(x) on an array automatically changes an Array{Float64} to a CuArray{Float32}. If this is not intended, use the CuArray constructor directly. For more information on GPU Float64 performance issues, search around Google for discussions like this.

Warn

Float32 precision is sometimes not enough precision to accurately solve a stiff ODE. Make sure that the precision is necessary by investigating the condition number of the Jacobian. If this value is well-above 1e8, use Float32 with caution!

Restrictions of CuArrays

Note that all the rules of CUDA.jl apply when CuArrays are being used in the solver. While for most of the AbstractArray interface they act similarly to Arrays, such as having valid broadcasting operations (x .* y) defined, they will work on GPUs. For more information on the rules and restrictions of CuArrays, see this page from the CUDA.jl documentation.

+ Float32[0.0013233931, 2.3381774f-5, -0.0019467642]

Notice that both stiff and non-stiff ODE solvers were used here.

Note

Time span was changed to Float32 types, as GPUs generally have very slow Float64 operations, usually around 1/32 of the speed of Float32. cu(x) on an array automatically changes an Array{Float64} to a CuArray{Float32}. If this is not intended, use the CuArray constructor directly. For more information on GPU Float64 performance issues, search around Google for discussions like this.

Warn

Float32 precision is sometimes not enough precision to accurately solve a stiff ODE. Make sure that the precision is necessary by investigating the condition number of the Jacobian. If this value is well-above 1e8, use Float32 with caution!

Restrictions of CuArrays

Note that all the rules of CUDA.jl apply when CuArrays are being used in the solver. While for most of the AbstractArray interface they act similarly to Arrays, such as having valid broadcasting operations (x .* y) defined, they will work on GPUs. For more information on the rules and restrictions of CuArrays, see this page from the CUDA.jl documentation.