From 18610299584c6f27af56fc9cabd8043930b3b6c3 Mon Sep 17 00:00:00 2001
From: "Documenter.jl" <documenter@juliadocs.github.io>
Date: Sun, 2 Feb 2025 20:20:54 +0000
Subject: [PATCH] build based on e705227

---
 dev/.documenter-siteinfo.json |   2 +-
 dev/api/index.html            |  97 ++++++++++++++++++++--------------
 dev/index.html                |   2 +-
 dev/objects.inv               | Bin 697 -> 707 bytes
 dev/search_index.js           |   2 +-
 5 files changed, 59 insertions(+), 44 deletions(-)
diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json
index 6f17d61..806e271 100644
--- a/dev/.documenter-siteinfo.json
+++ b/dev/.documenter-siteinfo.json
@@ -1 +1 @@
-{"documenter":{"julia_version":"1.11.3","generation_timestamp":"2025-02-02T18:14:07","documenter_version":"1.8.0"}}
\ No newline at end of file
+{"documenter":{"julia_version":"1.11.3","generation_timestamp":"2025-02-02T20:20:49","documenter_version":"1.8.0"}}
\ No newline at end of file
diff --git a/dev/api/index.html b/dev/api/index.html
index 5e42fcf..1284c6b 100644
--- a/dev/api/index.html
+++ b/dev/api/index.html
@@ -13,7 +13,7 @@
 true
 
 julia&gt; getobs(x, [1, 3]) == Dict(:a =&gt; [1, 3], :b =&gt; x[:b][:, [1, 3]])
-true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/observation.jl#L57-L106">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.getobs!" href="#MLUtils.getobs!"><code>MLUtils.getobs!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">getobs!(buffer, data, idx)</code></pre><p>Inplace version of <code>getobs(data, idx)</code>. If this method is defined for the type of <code>data</code>, then <code>buffer</code> should be used to store the result, instead of allocating a dedicated object.</p><p>Implementing this function is optional. In the case no such method is provided for the type of <code>data</code>, then <code>buffer</code> will be <em>ignored</em> and the result of <a href="#MLUtils.getobs"><code>getobs</code></a> returned. This could be because the type of <code>data</code> may not lend itself to the concept of <code>copy!</code>. Thus, supporting a custom <code>getobs!</code> is optional and not required.</p><p>Custom implementations of <code>getobs!</code> should be consistent with <a href="#MLUtils.getobs"><code>getobs</code></a> in terms of the output format, that is <code>getobs!(buffer, data, idx) == getobs(data, idx)</code>.</p><p>See also <a href="#MLUtils.getobs"><code>getobs</code></a> and <a href="#MLUtils.numobs"><code>numobs</code></a>. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/observation.jl#L117-L136">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.numobs" href="#MLUtils.numobs"><code>MLUtils.numobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">numobs(data)</code></pre><p>Return the total number of observations contained in <code>data</code>.</p><p>If <code>data</code> does not have <code>numobs</code> defined,  then in the case of <code>Tables.table(data) == true</code> returns the number of rows, otherwise returns <code>length(data)</code>.</p><p>Authors of custom data containers should implement <code>Base.length</code> for their type instead of <code>numobs</code>. <code>numobs</code> should only be implemented for types where there is a difference between <code>numobs</code> and <code>Base.length</code> (such as multi-dimensional arrays).</p><p><code>getobs</code> supports by default nested combinations of array, tuple, named tuples, and dictionaries. </p><p>See also <a href="#MLUtils.getobs"><code>getobs</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = (a = [1, 2, 3], b = ones(6, 3)); # named tuples
+true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/observation.jl#L57-L106">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.getobs!" href="#MLUtils.getobs!"><code>MLUtils.getobs!</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">getobs!(buffer, data, idx)</code></pre><p>Inplace version of <code>getobs(data, idx)</code>. If this method is defined for the type of <code>data</code>, then <code>buffer</code> should be used to store the result, instead of allocating a dedicated object.</p><p>Implementing this function is optional. In the case no such method is provided for the type of <code>data</code>, then <code>buffer</code> will be <em>ignored</em> and the result of <a href="#MLUtils.getobs"><code>getobs</code></a> returned. This could be because the type of <code>data</code> may not lend itself to the concept of <code>copy!</code>. Thus, supporting a custom <code>getobs!</code> is optional and not required.</p><p>Custom implementations of <code>getobs!</code> should be consistent with <a href="#MLUtils.getobs"><code>getobs</code></a> in terms of the output format, that is <code>getobs!(buffer, data, idx) == getobs(data, idx)</code>.</p><p>See also <a href="#MLUtils.getobs"><code>getobs</code></a> and <a href="#MLUtils.numobs"><code>numobs</code></a>. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/observation.jl#L117-L136">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.numobs" href="#MLUtils.numobs"><code>MLUtils.numobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">numobs(data)</code></pre><p>Return the total number of observations contained in <code>data</code>.</p><p>If <code>data</code> does not have <code>numobs</code> defined,  then in the case of <code>Tables.table(data) == true</code> returns the number of rows, otherwise returns <code>length(data)</code>.</p><p>Authors of custom data containers should implement <code>Base.length</code> for their type instead of <code>numobs</code>. <code>numobs</code> should only be implemented for types where there is a difference between <code>numobs</code> and <code>Base.length</code> (such as multi-dimensional arrays).</p><p><code>getobs</code> supports by default nested combinations of array, tuple, named tuples, and dictionaries. </p><p>See also <a href="#MLUtils.getobs"><code>getobs</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = (a = [1, 2, 3], b = ones(6, 3)); # named tuples
 
 julia&gt; numobs(x)
 3
@@ -33,14 +33,14 @@
  [3] numobs(data::NamedTuple{(:a, :b), Tuple{Vector{Int64}, Matrix{Float64}}})
    @ MLUtils ~/.julia/dev/MLUtils/src/observation.jl:177
  [4] top-level scope
-   @ REPL[35]:1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/observation.jl#L1-L49">source</a></section></article><h2 id="Lazy-Transforms"><a class="docs-heading-anchor" href="#Lazy-Transforms">Lazy Transforms</a><a id="Lazy-Transforms-1"></a><a class="docs-heading-anchor-permalink" href="#Lazy-Transforms" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.filterobs" href="#MLUtils.filterobs"><code>MLUtils.filterobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">filterobs(f, data)</code></pre><p>Return a subset of data container <code>data</code> including all indices <code>i</code> for which <code>f(getobs(data, i)) === true</code>.</p><pre><code class="language-julia hljs">data = 1:10
+   @ REPL[35]:1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/observation.jl#L1-L49">source</a></section></article><h2 id="Lazy-Transforms"><a class="docs-heading-anchor" href="#Lazy-Transforms">Lazy Transforms</a><a id="Lazy-Transforms-1"></a><a class="docs-heading-anchor-permalink" href="#Lazy-Transforms" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.filterobs" href="#MLUtils.filterobs"><code>MLUtils.filterobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">filterobs(f, data)</code></pre><p>Return a subset of data container <code>data</code> including all indices <code>i</code> for which <code>f(getobs(data, i)) === true</code>.</p><pre><code class="language-julia hljs">data = 1:10
 numobs(data) == 10
 fdata = filterobs(&gt;(5), data)
-numobs(fdata) == 5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obstransform.jl#L113-L125">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.groupobs" href="#MLUtils.groupobs"><code>MLUtils.groupobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupobs(f, data)</code></pre><p>Split data container data <code>data</code> into different data containers, grouping observations by <code>f(obs)</code>.</p><pre><code class="language-julia hljs">data = -10:10
+numobs(fdata) == 5</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obstransform.jl#L113-L125">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.groupobs" href="#MLUtils.groupobs"><code>MLUtils.groupobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">groupobs(f, data)</code></pre><p>Split data container data <code>data</code> into different data containers, grouping observations by <code>f(obs)</code>.</p><pre><code class="language-julia hljs">data = -10:10
 datas = groupobs(&gt;(0), data)
-length(datas) == 2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obstransform.jl#L135-L146">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.joinobs" href="#MLUtils.joinobs"><code>MLUtils.joinobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">joinobs(datas...)</code></pre><p>Concatenate data containers <code>datas</code>.</p><pre><code class="language-julia hljs">data1, data2 = 1:10, 11:20
+length(datas) == 2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obstransform.jl#L135-L146">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.joinobs" href="#MLUtils.joinobs"><code>MLUtils.joinobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">joinobs(datas...)</code></pre><p>Concatenate data containers <code>datas</code>.</p><pre><code class="language-julia hljs">data1, data2 = 1:10, 11:20
 jdata = joinumobs(data1, data2)
-getobs(jdata, 15) == 15</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obstransform.jl#L186-L196">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.mapobs" href="#MLUtils.mapobs"><code>MLUtils.mapobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapobs(f, data; batched=:auto)</code></pre><p>Lazily map <code>f</code> over the observations in a data container <code>data</code>. Returns a new data container <code>mdata</code> that can be indexed and has a length. Indexing triggers the transformation <code>f</code>.</p><p>The batched keyword argument controls the behavior of <code>mdata[idx]</code> and <code>mdata[idxs]</code>  where <code>idx</code> is an integer and <code>idxs</code> is a vector of integers:</p><ul><li><code>batched=:auto</code> (default). Let <code>f</code> handle the two cases.   Calls <code>f(getobs(data, idx))</code> and <code>f(getobs(data, idxs))</code>.</li><li><code>batched=:never</code>. The function <code>f</code> is always called on a single observation.   Calls <code>f(getobs(data, idx))</code> and <code>[f(getobs(data, idx)) for idx in idxs]</code>.</li><li><code>batched=:always</code>. The function <code>f</code> is always called on a batch of observations.   Calls <code>getobs(f(getobs(data, [idx])), 1)</code> and <code>f(getobs(data, idxs))</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">julia&gt; data = (a=[1,2,3], b=[1,2,3]);
+getobs(jdata, 15) == 15</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obstransform.jl#L186-L196">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.mapobs" href="#MLUtils.mapobs"><code>MLUtils.mapobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapobs(f, data; batched=:auto)</code></pre><p>Lazily map <code>f</code> over the observations in a data container <code>data</code>. Returns a new data container <code>mdata</code> that can be indexed and has a length. Indexing triggers the transformation <code>f</code>.</p><p>The batched keyword argument controls the behavior of <code>mdata[idx]</code> and <code>mdata[idxs]</code>  where <code>idx</code> is an integer and <code>idxs</code> is a vector of integers:</p><ul><li><code>batched=:auto</code> (default). Let <code>f</code> handle the two cases.   Calls <code>f(getobs(data, idx))</code> and <code>f(getobs(data, idxs))</code>.</li><li><code>batched=:never</code>. The function <code>f</code> is always called on a single observation.   Calls <code>f(getobs(data, idx))</code> and <code>[f(getobs(data, idx)) for idx in idxs]</code>.</li><li><code>batched=:always</code>. The function <code>f</code> is always called on a batch of observations.   Calls <code>getobs(f(getobs(data, [idx])), 1)</code> and <code>f(getobs(data, idxs))</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">julia&gt; data = (a=[1,2,3], b=[1,2,3]);
 
 julia&gt; mdata = mapobs(data) do x
          (c = x.a .+ x.b,  d = x.a .- x.b)
@@ -51,10 +51,10 @@
 (c = 2, d = 0)
 
 julia&gt; mdata[1:2]
-(c = [2, 4], d = [0, 0])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obstransform.jl#L30-L62">source</a></section><section><div><pre><code class="language-julia hljs">mapobs(fs, data)</code></pre><p>Lazily map each function in tuple <code>fs</code> over the observations in data container <code>data</code>. Returns a tuple of transformed data containers.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obstransform.jl#L65-L70">source</a></section><section><div><pre><code class="language-julia hljs">mapobs(namedfs::NamedTuple, data)</code></pre><p>Map a <code>NamedTuple</code> of functions over <code>data</code>, turning it into a data container of <code>NamedTuple</code>s. Field syntax can be used to select a column of the resulting data container.</p><pre><code class="language-julia hljs">data = 1:10
+(c = [2, 4], d = [0, 0])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obstransform.jl#L30-L62">source</a></section><section><div><pre><code class="language-julia hljs">mapobs(fs, data)</code></pre><p>Lazily map each function in tuple <code>fs</code> over the observations in data container <code>data</code>. Returns a tuple of transformed data containers.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obstransform.jl#L65-L70">source</a></section><section><div><pre><code class="language-julia hljs">mapobs(namedfs::NamedTuple, data)</code></pre><p>Map a <code>NamedTuple</code> of functions over <code>data</code>, turning it into a data container of <code>NamedTuple</code>s. Field syntax can be used to select a column of the resulting data container.</p><pre><code class="language-julia hljs">data = 1:10
 nameddata = mapobs((x = sqrt, y = log), data)
 getobs(nameddata, 10) == (x = sqrt(10), y = log(10))
-getobs(nameddata.x, 10) == sqrt(10)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obstransform.jl#L93-L106">source</a></section><section><div><pre><code class="language-julia hljs">mapobs(f, d::DataLoader)</code></pre><p>Return a new dataloader based on <code>d</code>  that applies <code>f</code> at each iteration. </p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; X = ones(3, 6);
+getobs(nameddata.x, 10) == sqrt(10)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obstransform.jl#L93-L106">source</a></section><section><div><pre><code class="language-julia hljs">mapobs(f, d::DataLoader)</code></pre><p>Return a new dataloader based on <code>d</code>  that applies <code>f</code> at each iteration. </p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; X = ones(3, 6);
 
 julia&gt; function f(x)
            @show x
@@ -83,13 +83,13 @@
        end
 x = [1.0 1.0; 1.0 1.0; 1.0 1.0]
 x = [1.0 1.0; 1.0 1.0; 1.0 1.0]
-x = [1.0 1.0; 1.0 1.0; 1.0 1.0]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/dataloader.jl#L259-L298">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.shuffleobs" href="#MLUtils.shuffleobs"><code>MLUtils.shuffleobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">shuffleobs([rng], data)</code></pre><p>Return a version of the dataset <code>data</code> that contains all the origin observations in a random reordering.</p><p>The values of <code>data</code> itself are not copied. Instead only the indices are shuffled. This function calls <a href="#MLUtils.obsview"><code>obsview</code></a> to accomplish that, which means that the return value is likely of a different type than <code>data</code>.</p><p>Optionally, a random number generator <code>rng</code> can be passed as the first argument. </p><p>For this function to work, the type of <code>data</code> must implement <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a>. </p><p>See also <a href="#MLUtils.obsview"><code>obsview</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs"># For Arrays the subset will be of type SubArray
+x = [1.0 1.0; 1.0 1.0; 1.0 1.0]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/dataloader.jl#L259-L298">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.shuffleobs" href="#MLUtils.shuffleobs"><code>MLUtils.shuffleobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">shuffleobs([rng], data)</code></pre><p>Return a version of the dataset <code>data</code> that contains all the origin observations in a random reordering.</p><p>The values of <code>data</code> itself are not copied. Instead only the indices are shuffled. This function calls <a href="#MLUtils.obsview"><code>obsview</code></a> to accomplish that, which means that the return value is likely of a different type than <code>data</code>.</p><p>Optionally, a random number generator <code>rng</code> can be passed as the first argument. </p><p>For this function to work, the type of <code>data</code> must implement <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a>. </p><p>See also <a href="#MLUtils.obsview"><code>obsview</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs"># For Arrays the subset will be of type SubArray
 @assert typeof(shuffleobs(rand(4,10))) &lt;: SubArray
 
 # Iterate through all observations in random order
 for x in eachobs(shuffleobs(X))
     ...
-end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obstransform.jl#L204-L234">source</a></section></article><h2 id="Batching,-Iteration,-and-Views"><a class="docs-heading-anchor" href="#Batching,-Iteration,-and-Views">Batching, Iteration, and Views</a><a id="Batching,-Iteration,-and-Views-1"></a><a class="docs-heading-anchor-permalink" href="#Batching,-Iteration,-and-Views" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.batch" href="#MLUtils.batch"><code>MLUtils.batch</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">batch(xs)</code></pre><p>Batch the arrays in <code>xs</code> into a single array with  an extra dimension.</p><p>If the elements of <code>xs</code> are tuples, named tuples, or dicts,  the output will be of the same type. </p><p>See also <a href="#MLUtils.unbatch"><code>unbatch</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; batch([[1,2,3], 
+end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obstransform.jl#L204-L234">source</a></section></article><h2 id="Batching,-Iteration,-and-Views"><a class="docs-heading-anchor" href="#Batching,-Iteration,-and-Views">Batching, Iteration, and Views</a><a id="Batching,-Iteration,-and-Views-1"></a><a class="docs-heading-anchor-permalink" href="#Batching,-Iteration,-and-Views" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.batch" href="#MLUtils.batch"><code>MLUtils.batch</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">batch(xs)</code></pre><p>Batch the arrays in <code>xs</code> into a single array with  an extra dimension.</p><p>If the elements of <code>xs</code> are tuples, named tuples, or dicts,  the output will be of the same type. </p><p>See also <a href="#MLUtils.unbatch"><code>unbatch</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; batch([[1,2,3], 
               [4,5,6]])
 3×2 Matrix{Int64}:
  1  4
@@ -98,15 +98,15 @@
 
 julia&gt; batch([(a=[1,2], b=[3,4])
                (a=[5,6], b=[7,8])]) 
-(a = [1 5; 2 6], b = [3 7; 4 8])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L329-L354">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.batchsize" href="#MLUtils.batchsize"><code>MLUtils.batchsize</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">batchsize(data::BatchView) -&gt; Int</code></pre><p>Return the fixed size of each batch in <code>data</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">using MLUtils
+(a = [1 5; 2 6], b = [3 7; 4 8])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L329-L354">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.batchsize" href="#MLUtils.batchsize"><code>MLUtils.batchsize</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">batchsize(data::BatchView) -&gt; Int</code></pre><p>Return the fixed size of each batch in <code>data</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">using MLUtils
 X, Y = MLUtils.load_iris()
 
 A = BatchView(X, batchsize=30)
-@assert batchsize(A) == 30</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/batchview.jl#L132-L146">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.batchseq" href="#MLUtils.batchseq"><code>MLUtils.batchseq</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">batchseq(seqs, val = 0)</code></pre><p>Take a list of <code>N</code> sequences, and turn them into a single sequence where each item is a batch of <code>N</code>. Short sequences will be padded by <code>val</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; batchseq([[1, 2, 3], [4, 5]], 0)
+@assert batchsize(A) == 30</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/batchview.jl#L132-L146">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.batchseq" href="#MLUtils.batchseq"><code>MLUtils.batchseq</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">batchseq(seqs, val = 0)</code></pre><p>Take a list of <code>N</code> sequences, and turn them into a single sequence where each item is a batch of <code>N</code>. Short sequences will be padded by <code>val</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; batchseq([[1, 2, 3], [4, 5]], 0)
 3-element Vector{Vector{Int64}}:
  [1, 4]
  [2, 5]
- [3, 0]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L417-L432">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.BatchView" href="#MLUtils.BatchView"><code>MLUtils.BatchView</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">BatchView(data, batchsize; partial=true, collate=nothing)
+ [3, 0]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L417-L432">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.BatchView" href="#MLUtils.BatchView"><code>MLUtils.BatchView</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">BatchView(data, batchsize; partial=true, collate=nothing)
 BatchView(data; batchsize=1, partial=true, collate=nothing)</code></pre><p>Create a view of the given <code>data</code> that represents it as a vector of batches. Each batch will contain an equal amount of observations in them. The batch-size can be specified using the  parameter <code>batchsize</code>. In the case that the size of the dataset is not dividable by the specified <code>batchsize</code>, the remaining observations will be ignored if <code>partial=false</code>. If  <code>partial=true</code> instead the last batch-size can be slightly smaller.</p><p>If used as an iterator, the object will iterate over the dataset once, effectively denoting an epoch. </p><p>Any data access is delayed until iteration or indexing is perfomed.  The <a href="#MLUtils.getobs"><code>getobs</code></a> function is called on the data object to retrieve the observations.</p><p>For <code>BatchView</code> to work on some data structure, the type of the given variable <code>data</code> must implement the data container interface. See <a href="#MLUtils.ObsView"><code>ObsView</code></a> for more info.</p><p><strong>Arguments</strong></p><ul><li><p><strong><code>data</code></strong> : The object describing the dataset. Can be of any   type as long as it implements <a href="#MLUtils.getobs"><code>getobs</code></a> and   <a href="#MLUtils.numobs"><code>numobs</code></a> (see Details for more information).</p></li><li><p><strong><code>batchsize</code></strong> : The batch-size of each batch.   It is the number of observations that each batch must contain   (except possibly for the last one).</p></li><li><p><strong><code>partial</code></strong> : If <code>partial=false</code> and the number of observations is   not divisible by the batch-size, then the last mini-batch is dropped.</p></li><li><p><strong><code>collate</code></strong>: Defines the batching behavior. </p><ul><li>If <code>nothing</code> (default), a batch is <code>getobs(data, indices)</code>. </li><li>If <code>false</code>, each batch is <code>[getobs(data, i) for i in indices]</code>. </li><li>If <code>true</code>, applies MLUtils to the vector of observations in a batch,  recursively collating arrays in the last dimensions. See <a href="#MLUtils.batch"><code>MLUtils.batch</code></a> for more information and examples.</li><li>If a custom function, it will be used in place of <code>MLUtils.batch</code>. It should take a vector of observations as input.</li></ul></li></ul><p>Se also <a href="#MLUtils.DataLoader"><code>DataLoader</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; using MLUtils
 
 julia&gt; X, Y = MLUtils.load_iris();
@@ -145,7 +145,7 @@
 julia&gt; for (x, y) in BatchView((rand(10, 4), [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;]), batchsize=2, collate=collate_fn)
            @assert size(x) == (10, 2)
            @assert y isa String
-       end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/batchview.jl#L1-L92">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.eachobs" href="#MLUtils.eachobs"><code>MLUtils.eachobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">eachobs(data; kws...)</code></pre><p>Return an iterator over <code>data</code>.</p><p>Supports the same arguments as <a href="#MLUtils.DataLoader"><code>DataLoader</code></a>. The <code>batchsize</code> default is <code>-1</code> here while it is <code>1</code> for <code>DataLoader</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">X = rand(4,100)
+       end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/batchview.jl#L1-L92">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.eachobs" href="#MLUtils.eachobs"><code>MLUtils.eachobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">eachobs(data; kws...)</code></pre><p>Return an iterator over <code>data</code>.</p><p>Supports the same arguments as <a href="#MLUtils.DataLoader"><code>DataLoader</code></a>. The <code>batchsize</code> default is <code>-1</code> here while it is <code>1</code> for <code>DataLoader</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">X = rand(4,100)
 
 for x in eachobs(X)
     # loop entered 100 times
@@ -163,7 +163,7 @@
 # support for tuples, named tuples, dicts
 for (x, y) in eachobs((X, Y))
     # ...
-end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/dataloader.jl#L1-L33">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.DataLoader" href="#MLUtils.DataLoader"><code>MLUtils.DataLoader</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataLoader(data; [batchsize, buffer, collate, parallel, partial, rng, shuffle])</code></pre><p>An object that iterates over mini-batches of <code>data</code>, each mini-batch containing <code>batchsize</code> observations (except possibly the last one).</p><p>Takes as input a single data array, a tuple (or a named tuple) of arrays, or in general any <code>data</code> object that implements the <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a> methods.</p><p>The last dimension in each array is the observation dimension, i.e. the one divided into mini-batches.</p><p>The original data is preserved in the <code>data</code> field of the DataLoader.</p><p><strong>Arguments</strong></p><ul><li><strong><code>data</code></strong>: The data to be iterated over. The data type has to be supported by <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a>.</li><li><strong><code>batchsize</code></strong>: If less than 0, iterates over individual observations. Otherwise, each iteration (except possibly the last) yields a mini-batch containing <code>batchsize</code> observations. Default <code>1</code>.</li><li><strong><code>buffer</code></strong>: If <code>buffer=true</code> and supported by the type of <code>data</code>, a buffer will be allocated and reused for memory efficiency. May want to set <code>partial=false</code> to avoid size mismatch.  Finally, can pass an external buffer to be used in <code>getobs!</code> (depending on the <code>collate</code> and <code>batchsize</code> options, could be <code>getobs!(buffer, data, idxs)</code> or <code>getobs!(buffer[i], data, idx)</code>). Default <code>false</code>. </li><li><strong><code>collate</code></strong>: Defines the batching behavior. Default <code>nothing</code>. <ul><li>If <code>nothing</code> , a batch is <code>getobs(data, indices)</code>. </li><li>If <code>false</code>, each batch is <code>[getobs(data, i) for i in indices]</code>. </li><li>If <code>true</code>, applies <code>MLUtils.batch</code> to the vector of observations in a batch,  recursively collating arrays in the last dimensions. See <a href="#MLUtils.batch"><code>MLUtils.batch</code></a> for more information and examples.</li><li>If a custom function, it will be used in place of <code>MLUtils.batch</code>. It should take a vector of observations as input.</li></ul></li><li><strong><code>parallel</code></strong>: Whether to use load data in parallel using worker threads. Greatly   speeds up data loading by factor of available threads. Requires starting   Julia with multiple threads. Check <code>Threads.nthreads()</code> to see the number of   available threads. <strong>Passing <code>parallel = true</code> breaks ordering guarantees</strong>.   Default <code>false</code>.</li><li><strong><code>partial</code></strong>: This argument is used only when <code>batchsize &gt; 0</code>. If <code>partial=false</code> and the number of observations is not divisible by the batchsize, then the last mini-batch is dropped. Default <code>true</code>.</li><li><strong><code>rng</code></strong>: A random number generator. Default <code>Random.default_rng()</code>.</li><li><strong>`shuffle</strong>: Whether to shuffle the observations before iterating. Unlike   wrapping the data container with <code>shuffleobs(data)</code>, <code>shuffle=true</code> ensures   that the observations are shuffled anew every time you start iterating over   <code>eachobs</code>. Default <code>false</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; Xtrain = rand(10, 100);
+end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/dataloader.jl#L1-L33">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.DataLoader" href="#MLUtils.DataLoader"><code>MLUtils.DataLoader</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DataLoader(data; [batchsize, buffer, collate, parallel, partial, rng, shuffle])</code></pre><p>An object that iterates over mini-batches of <code>data</code>, each mini-batch containing <code>batchsize</code> observations (except possibly the last one).</p><p>Takes as input a single data array, a tuple (or a named tuple) of arrays, or in general any <code>data</code> object that implements the <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a> methods.</p><p>The last dimension in each array is the observation dimension, i.e. the one divided into mini-batches.</p><p>The original data is preserved in the <code>data</code> field of the DataLoader.</p><p><strong>Arguments</strong></p><ul><li><strong><code>data</code></strong>: The data to be iterated over. The data type has to be supported by <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a>.</li><li><strong><code>batchsize</code></strong>: If less than 0, iterates over individual observations. Otherwise, each iteration (except possibly the last) yields a mini-batch containing <code>batchsize</code> observations. Default <code>1</code>.</li><li><strong><code>buffer</code></strong>: If <code>buffer=true</code> and supported by the type of <code>data</code>, a buffer will be allocated and reused for memory efficiency. May want to set <code>partial=false</code> to avoid size mismatch.  Finally, can pass an external buffer to be used in <code>getobs!</code> (depending on the <code>collate</code> and <code>batchsize</code> options, could be <code>getobs!(buffer, data, idxs)</code> or <code>getobs!(buffer[i], data, idx)</code>). Default <code>false</code>. </li><li><strong><code>collate</code></strong>: Defines the batching behavior. Default <code>nothing</code>. <ul><li>If <code>nothing</code> , a batch is <code>getobs(data, indices)</code>. </li><li>If <code>false</code>, each batch is <code>[getobs(data, i) for i in indices]</code>. </li><li>If <code>true</code>, applies <code>MLUtils.batch</code> to the vector of observations in a batch,  recursively collating arrays in the last dimensions. See <a href="#MLUtils.batch"><code>MLUtils.batch</code></a> for more information and examples.</li><li>If a custom function, it will be used in place of <code>MLUtils.batch</code>. It should take a vector of observations as input.</li></ul></li><li><strong><code>parallel</code></strong>: Whether to use load data in parallel using worker threads. Greatly   speeds up data loading by factor of available threads. Requires starting   Julia with multiple threads. Check <code>Threads.nthreads()</code> to see the number of   available threads. <strong>Passing <code>parallel = true</code> breaks ordering guarantees</strong>.   Default <code>false</code>.</li><li><strong><code>partial</code></strong>: This argument is used only when <code>batchsize &gt; 0</code>. If <code>partial=false</code> and the number of observations is not divisible by the batchsize, then the last mini-batch is dropped. Default <code>true</code>.</li><li><strong><code>rng</code></strong>: A random number generator. Default <code>Random.default_rng()</code>.</li><li><strong>`shuffle</strong>: Whether to shuffle the observations before iterating. Unlike   wrapping the data container with <code>shuffleobs(data)</code>, <code>shuffle=true</code> ensures   that the observations are shuffled anew every time you start iterating over   <code>eachobs</code>. Default <code>false</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; Xtrain = rand(10, 100);
 
 julia&gt; array_loader = DataLoader(Xtrain, batchsize=2);
 
@@ -208,7 +208,7 @@
 julia&gt; collate_fn(batch) = join(batch);
 
 julia&gt; first(DataLoader([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;], batchsize=2, collate=collate_fn))
-&quot;ab&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/dataloader.jl#L38-L138">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.obsview" href="#MLUtils.obsview"><code>MLUtils.obsview</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">obsview(data, [indices])</code></pre><p>Returns a lazy view of the observations in <code>data</code> that correspond to the given <code>indices</code>. No data will be copied except of the indices. It is similar to constructing an <a href="#MLUtils.ObsView"><code>ObsView</code></a>,  but returns a <code>SubArray</code> if the type of <code>data</code> is <code>Array</code> or <code>SubArray</code>. Furthermore, this function may be extended for custom types of <code>data</code> that also want to provide their own subset-type.</p><p>In case <code>data</code> is a tuple, the constructor will be mapped over its elements. That means that the constructor returns a tuple of <code>ObsView</code> instead of a <code>ObsView</code> of tuples.</p><p>If instead you want to get the subset of observations corresponding to the given <code>indices</code> in their native type, use <code>getobs</code>.</p><p>See <a href="#MLUtils.ObsView"><code>ObsView</code></a> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obsview.jl#L197-L217">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.ObsView" href="#MLUtils.ObsView"><code>MLUtils.ObsView</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ObsView(data, [indices])</code></pre><p>Used to represent a subset of some <code>data</code> of arbitrary type by storing which observation-indices the subset spans. Furthermore, subsequent subsettings are accumulated without needing to access actual data.</p><p>The main purpose for the existence of <code>ObsView</code> is to delay data access and movement until an actual batch of data (or single observation) is needed for some computation. This is particularily useful when the data is not located in memory, but on the hard drive or some remote location. In such a scenario one wants to load the required data only when needed.</p><p>Any data access is delayed until <code>getindex</code> is called,  and even <code>getindex</code> returns the result of <a href="#MLUtils.obsview"><code>obsview</code></a> which in general avoids data movement until <a href="#MLUtils.getobs"><code>getobs</code></a> is called. If used as an iterator, the view will iterate over the dataset once, effectively denoting an epoch. Each iteration will return a lazy subset to the current observation.</p><p><strong>Arguments</strong></p><ul><li><p><strong><code>data</code></strong> : The object describing the dataset. Can be of any   type as long as it implements <a href="#MLUtils.getobs"><code>getobs</code></a> and   <a href="#MLUtils.numobs"><code>numobs</code></a> (see Details for more information).</p></li><li><p><strong><code>indices</code></strong> : Optional. The index or indices of the   observation(s) in <code>data</code> that the subset should represent.   Can be of type <code>Int</code> or some subtype of <code>AbstractVector</code>.</p></li></ul><p><strong>Methods</strong></p><ul><li><p><strong><code>getindex</code></strong> : Returns the observation(s) of the given   index/indices. No data is copied aside   from the required indices.</p></li><li><p><strong><code>numobs</code></strong> : Returns the total number observations in the subset.</p></li><li><p><strong><code>getobs</code></strong> : Returns the underlying data that the   <code>ObsView</code> represents at the given relative indices. Note   that these indices are in &quot;subset space&quot;, and in general will   not directly correspond to the same indices in the underlying   data set.</p></li></ul><p><strong>Details</strong></p><p>For <code>ObsView</code> to work on some data structure, the desired type <code>MyType</code> must implement the following interface:</p><ul><li><p><code>getobs(data::MyType, idx)</code> :   Should return the observation(s) indexed by <code>idx</code>.   In what form is up to the user.   Note that <code>idx</code> can be of type <code>Int</code> or <code>AbstractVector</code>.</p></li><li><p><code>numobs(data::MyType)</code> :   Should return the total number of observations in <code>data</code></p></li></ul><p>The following methods can also be provided and are optional:</p><ul><li><p><code>getobs(data::MyType)</code> :   By default this function is the identity function.   If that is not the behaviour that you want for your type,   you need to provide this method as well.</p></li><li><p><code>obsview(data::MyType, idx)</code> :   If your custom type has its own kind of subset type, you can   return it here. An example for such a case are <code>SubArray</code> for   representing a subset of some <code>AbstractArray</code>.</p></li><li><p><code>getobs!(buffer, data::MyType, [idx])</code> :   Inplace version of <code>getobs(data, idx)</code>. If this method   is provided for <code>MyType</code>, then <code>eachobs</code> can preallocate a buffer that is then reused   every iteration. Note: <code>buffer</code> should be equivalent to the   return value of <code>getobs(::MyType, ...)</code>, since this is how   <code>buffer</code> is preallocated by default.</p></li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">X, Y = MLUtils.load_iris()
+&quot;ab&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/dataloader.jl#L38-L138">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.obsview" href="#MLUtils.obsview"><code>MLUtils.obsview</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">obsview(data, [indices])</code></pre><p>Returns a lazy view of the observations in <code>data</code> that correspond to the given <code>indices</code>. No data will be copied except of the indices. It is similar to constructing an <a href="#MLUtils.ObsView"><code>ObsView</code></a>,  but returns a <code>SubArray</code> if the type of <code>data</code> is <code>Array</code> or <code>SubArray</code>. Furthermore, this function may be extended for custom types of <code>data</code> that also want to provide their own subset-type.</p><p>In case <code>data</code> is a tuple, the constructor will be mapped over its elements. That means that the constructor returns a tuple of <code>ObsView</code> instead of a <code>ObsView</code> of tuples.</p><p>If instead you want to get the subset of observations corresponding to the given <code>indices</code> in their native type, use <code>getobs</code>.</p><p>See <a href="#MLUtils.ObsView"><code>ObsView</code></a> for more information.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obsview.jl#L197-L217">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.ObsView" href="#MLUtils.ObsView"><code>MLUtils.ObsView</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ObsView(data, [indices])</code></pre><p>Used to represent a subset of some <code>data</code> of arbitrary type by storing which observation-indices the subset spans. Furthermore, subsequent subsettings are accumulated without needing to access actual data.</p><p>The main purpose for the existence of <code>ObsView</code> is to delay data access and movement until an actual batch of data (or single observation) is needed for some computation. This is particularily useful when the data is not located in memory, but on the hard drive or some remote location. In such a scenario one wants to load the required data only when needed.</p><p>Any data access is delayed until <code>getindex</code> is called,  and even <code>getindex</code> returns the result of <a href="#MLUtils.obsview"><code>obsview</code></a> which in general avoids data movement until <a href="#MLUtils.getobs"><code>getobs</code></a> is called. If used as an iterator, the view will iterate over the dataset once, effectively denoting an epoch. Each iteration will return a lazy subset to the current observation.</p><p><strong>Arguments</strong></p><ul><li><p><strong><code>data</code></strong> : The object describing the dataset. Can be of any   type as long as it implements <a href="#MLUtils.getobs"><code>getobs</code></a> and   <a href="#MLUtils.numobs"><code>numobs</code></a> (see Details for more information).</p></li><li><p><strong><code>indices</code></strong> : Optional. The index or indices of the   observation(s) in <code>data</code> that the subset should represent.   Can be of type <code>Int</code> or some subtype of <code>AbstractVector</code>.</p></li></ul><p><strong>Methods</strong></p><ul><li><p><strong><code>getindex</code></strong> : Returns the observation(s) of the given   index/indices. No data is copied aside   from the required indices.</p></li><li><p><strong><code>numobs</code></strong> : Returns the total number observations in the subset.</p></li><li><p><strong><code>getobs</code></strong> : Returns the underlying data that the   <code>ObsView</code> represents at the given relative indices. Note   that these indices are in &quot;subset space&quot;, and in general will   not directly correspond to the same indices in the underlying   data set.</p></li></ul><p><strong>Details</strong></p><p>For <code>ObsView</code> to work on some data structure, the desired type <code>MyType</code> must implement the following interface:</p><ul><li><p><code>getobs(data::MyType, idx)</code> :   Should return the observation(s) indexed by <code>idx</code>.   In what form is up to the user.   Note that <code>idx</code> can be of type <code>Int</code> or <code>AbstractVector</code>.</p></li><li><p><code>numobs(data::MyType)</code> :   Should return the total number of observations in <code>data</code></p></li></ul><p>The following methods can also be provided and are optional:</p><ul><li><p><code>getobs(data::MyType)</code> :   By default this function is the identity function.   If that is not the behaviour that you want for your type,   you need to provide this method as well.</p></li><li><p><code>obsview(data::MyType, idx)</code> :   If your custom type has its own kind of subset type, you can   return it here. An example for such a case are <code>SubArray</code> for   representing a subset of some <code>AbstractArray</code>.</p></li><li><p><code>getobs!(buffer, data::MyType, [idx])</code> :   Inplace version of <code>getobs(data, idx)</code>. If this method   is provided for <code>MyType</code>, then <code>eachobs</code> can preallocate a buffer that is then reused   every iteration. Note: <code>buffer</code> should be equivalent to the   return value of <code>getobs(::MyType, ...)</code>, since this is how   <code>buffer</code> is preallocated by default.</p></li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">X, Y = MLUtils.load_iris()
 
 # The iris set has 150 observations and 4 features
 @assert size(X) == (4,150)
@@ -250,7 +250,22 @@
 end
 
 # Indexing: take first 10 observations
-x, y = ObsView((X, Y))[1:10]</code></pre><p><strong>See also</strong></p><p><a href="#MLUtils.obsview"><code>obsview</code></a>,  <a href="#MLUtils.getobs"><code>getobs</code></a>, <a href="#MLUtils.numobs"><code>numobs</code></a>, <a href="#MLUtils.splitobs"><code>splitobs</code></a>, <a href="#MLUtils.shuffleobs"><code>shuffleobs</code></a>, <a href="#MLUtils.kfolds"><code>kfolds</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/obsview.jl#L1-L133">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.randobs" href="#MLUtils.randobs"><code>MLUtils.randobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">randobs(data, [n])</code></pre><p>Pick a random observation or a batch of <code>n</code> random observations from <code>data</code>. For this function to work, the type of <code>data</code> must implement <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/randobs.jl#L3-L9">source</a></section></article><h2 id="Partitioning"><a class="docs-heading-anchor" href="#Partitioning">Partitioning</a><a id="Partitioning-1"></a><a class="docs-heading-anchor-permalink" href="#Partitioning" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.leavepout" href="#MLUtils.leavepout"><code>MLUtils.leavepout</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">leavepout(n::Integer, [size = 1]) -&gt; Tuple</code></pre><p>Compute the train/validation assignments for <code>k ≈ n/size</code> repartitions of <code>n</code> observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. Each validation subset will have either <code>size</code> or <code>size+1</code> observations assigned to it. The following code snippet generates the index-vectors for <code>size = 2</code>.</p><pre><code class="language-julia hljs">julia&gt; train_idx, val_idx = leavepout(10, 2);</code></pre><p>Each observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range <code>1:n</code>. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.</p><pre><code class="language-julia hljs">julia&gt; train_idx
+x, y = ObsView((X, Y))[1:10]</code></pre><p><strong>See also</strong></p><p><a href="#MLUtils.obsview"><code>obsview</code></a>,  <a href="#MLUtils.getobs"><code>getobs</code></a>, <a href="#MLUtils.numobs"><code>numobs</code></a>, <a href="#MLUtils.splitobs"><code>splitobs</code></a>, <a href="#MLUtils.shuffleobs"><code>shuffleobs</code></a>, <a href="#MLUtils.kfolds"><code>kfolds</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/obsview.jl#L1-L133">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.randobs" href="#MLUtils.randobs"><code>MLUtils.randobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">randobs(data, [n])</code></pre><p>Pick a random observation or a batch of <code>n</code> random observations from <code>data</code>. For this function to work, the type of <code>data</code> must implement <a href="#MLUtils.numobs"><code>numobs</code></a> and <a href="#MLUtils.getobs"><code>getobs</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/randobs.jl#L3-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.slidingwindow" href="#MLUtils.slidingwindow"><code>MLUtils.slidingwindow</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">slidingwindow(data; size, stride=1) -&gt; SlidingWindow</code></pre><p>Return a vector-like view of the <code>data</code> for which each element is a fixed size &quot;window&quot; of <code>size</code> adjacent observations. Note that only complete windows are included in the output, which implies that it is possible for excess observations to be omitted from the view.</p><p>Note that the windows are not materialized at construction time.  To actually get a copy of the data at some window use indexing or <a href="#MLUtils.getobs"><code>getobs</code></a>.</p><pre><code class="language-julia-repl hljs">julia&gt; s = slidingwindow(1:20, size=6)
+slidingwindow(1:20, size=6, stride=1)
+
+julia&gt; s[1]
+1:6
+
+julia&gt; s[2]
+2:7</code></pre><p>The optional parameter <code>stride</code> can be used to specify the distance between the start elements of each adjacent window. By default the stride is equal to 1.</p><pre><code class="language-julia-repl hljs">julia&gt; s = slidingwindow(1:20, size=6, stride=3)
+slidingwindow(1:20, size=6, stride=3)
+
+julia&gt; for w in s; println(w); end
+1:6
+4:9
+7:12
+10:15
+13:18</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/slidingwindow.jl#L27-L64">source</a></section></article><h2 id="Partitioning"><a class="docs-heading-anchor" href="#Partitioning">Partitioning</a><a id="Partitioning-1"></a><a class="docs-heading-anchor-permalink" href="#Partitioning" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.leavepout" href="#MLUtils.leavepout"><code>MLUtils.leavepout</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">leavepout(n::Integer, [size = 1]) -&gt; Tuple</code></pre><p>Compute the train/validation assignments for <code>k ≈ n/size</code> repartitions of <code>n</code> observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. Each validation subset will have either <code>size</code> or <code>size+1</code> observations assigned to it. The following code snippet generates the index-vectors for <code>size = 2</code>.</p><pre><code class="language-julia hljs">julia&gt; train_idx, val_idx = leavepout(10, 2);</code></pre><p>Each observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range <code>1:n</code>. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.</p><pre><code class="language-julia hljs">julia&gt; train_idx
 5-element Array{Array{Int64,1},1}:
  [3,4,5,6,7,8,9,10]
  [1,2,5,6,7,8,9,10]
@@ -264,11 +279,11 @@
  3:4
  5:6
  7:8
- 9:10</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/folds.jl#L116-L154">source</a></section><section><div><pre><code class="language-julia hljs">leavepout(data, p = 1)</code></pre><p>Repartition a <code>data</code> container using a k-fold strategy, where <code>k</code> is chosen in such a way, that each validation subset of the resulting folds contains roughly <code>p</code> observations. Defaults to <code>p = 1</code>, which is also known as &quot;leave-one-out&quot; partitioning.</p><p>The resulting sequence of folds is returned as a lazy iterator. Only data subsets are created. That means no actual data is copied until <a href="#MLUtils.getobs"><code>getobs</code></a> is invoked.</p><pre><code class="language-julia hljs">for (train, val) in leavepout(X, p=2)
-    # if nobs(X) is dividable by 2,
+ 9:10</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/folds.jl#L116-L154">source</a></section><section><div><pre><code class="language-julia hljs">leavepout(data, p = 1)</code></pre><p>Repartition a <code>data</code> container using a k-fold strategy, where <code>k</code> is chosen in such a way, that each validation subset of the resulting folds contains roughly <code>p</code> observations. Defaults to <code>p = 1</code>, which is also known as &quot;leave-one-out&quot; partitioning.</p><p>The resulting sequence of folds is returned as a lazy iterator. Only data subsets are created. That means no actual data is copied until <a href="#MLUtils.getobs"><code>getobs</code></a> is invoked.</p><pre><code class="language-julia hljs">for (train, val) in leavepout(X, p=2)
+    # if numobs(X) is dividable by 2,
     # then numobs(val) will be 2 for each iteraton,
     # otherwise it may be 3 for the first few iterations.
-end</code></pre><p>See<a href="#MLUtils.kfolds"><code>kfolds</code></a> for a related function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/folds.jl#L161-L182">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.kfolds" href="#MLUtils.kfolds"><code>MLUtils.kfolds</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">kfolds(n::Integer, k = 5) -&gt; Tuple</code></pre><p>Compute the train/validation assignments for <code>k</code> repartitions of <code>n</code> observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. A general rule of thumb is to use either <code>k = 5</code> or <code>k = 10</code>. The following code snippet generates the indices assignments for <code>k = 5</code></p><pre><code class="language-julia hljs">julia&gt; train_idx, val_idx = kfolds(10, 5);</code></pre><p>Each observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range <code>1:n</code>. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.</p><pre><code class="language-julia hljs">julia&gt; train_idx
+end</code></pre><p>See<a href="#MLUtils.kfolds"><code>kfolds</code></a> for a related function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/folds.jl#L161-L182">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.kfolds" href="#MLUtils.kfolds"><code>MLUtils.kfolds</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">kfolds(n::Integer, k = 5) -&gt; Tuple</code></pre><p>Compute the train/validation assignments for <code>k</code> repartitions of <code>n</code> observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. A general rule of thumb is to use either <code>k = 5</code> or <code>k = 10</code>. The following code snippet generates the indices assignments for <code>k = 5</code></p><pre><code class="language-julia hljs">julia&gt; train_idx, val_idx = kfolds(10, 5);</code></pre><p>Each observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range <code>1:n</code>. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.</p><pre><code class="language-julia hljs">julia&gt; train_idx
 5-element Array{Array{Int64,1},1}:
  [3,4,5,6,7,8,9,10]
  [1,2,5,6,7,8,9,10]
@@ -282,18 +297,18 @@
  3:4
  5:6
  7:8
- 9:10</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/folds.jl#L2-L40">source</a></section><section><div><pre><code class="language-julia hljs">kfolds(data, [k = 5])</code></pre><p>Repartition a <code>data</code> container <code>k</code> times using a <code>k</code> folds strategy and return the sequence of folds as a lazy iterator.  Only data subsets are created, which means that no actual data is copied until <a href="#MLUtils.getobs"><code>getobs</code></a> is invoked.</p><p>Conceptually, a k-folds repartitioning strategy divides the given <code>data</code> into <code>k</code> roughly equal-sized parts. Each part will serve as validation set once, while the remaining parts are used for training. This results in <code>k</code> different partitions of <code>data</code>.</p><p>In the case that the size of the dataset is not dividable by the specified <code>k</code>, the remaining observations will be evenly distributed among the parts.</p><pre><code class="language-julia hljs">for (x_train, x_val) in kfolds(X, k=10)
+ 9:10</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/folds.jl#L2-L40">source</a></section><section><div><pre><code class="language-julia hljs">kfolds(data, [k = 5])</code></pre><p>Repartition a <code>data</code> container <code>k</code> times using a <code>k</code> folds strategy and return the sequence of folds as a lazy iterator.  Only data subsets are created, which means that no actual data is copied until <a href="#MLUtils.getobs"><code>getobs</code></a> is invoked.</p><p>Conceptually, a k-folds repartitioning strategy divides the given <code>data</code> into <code>k</code> roughly equal-sized parts. Each part will serve as validation set once, while the remaining parts are used for training. This results in <code>k</code> different partitions of <code>data</code>.</p><p>In the case that the size of the dataset is not dividable by the specified <code>k</code>, the remaining observations will be evenly distributed among the parts.</p><pre><code class="language-julia hljs">for (x_train, x_val) in kfolds(X, k=10)
     # code called 10 times
-    # nobs(x_val) may differ up to ±1 over iterations
+    # numobs(x_val) may differ up to ±1 over iterations
 end</code></pre><p>Multiple variables are supported (e.g. for labeled data)</p><pre><code class="language-julia hljs">for ((x_train, y_train), val) in kfolds((X, Y), k=10)
     # ...
 end</code></pre><p>By default the folds are created using static splits. Use <a href="#MLUtils.shuffleobs"><code>shuffleobs</code></a> to randomly assign observations to the folds.</p><pre><code class="language-julia hljs">for (x_train, x_val) in kfolds(shuffleobs(X), k = 10)
     # ...
-end</code></pre><p>See <a href="#MLUtils.leavepout"><code>leavepout</code></a> for a related function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/folds.jl#L62-L105">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.splitobs" href="#MLUtils.splitobs"><code>MLUtils.splitobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">splitobs(n::Int; at) -&gt; Tuple</code></pre><p>Compute the indices for two or more disjoint subsets of the range <code>1:n</code> with split sizes determined by <code>at</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">julia&gt; splitobs(100, at=0.7)
+end</code></pre><p>See <a href="#MLUtils.leavepout"><code>leavepout</code></a> for a related function.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/folds.jl#L62-L105">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.splitobs" href="#MLUtils.splitobs"><code>MLUtils.splitobs</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">splitobs(n::Int; at) -&gt; Tuple</code></pre><p>Compute the indices for two or more disjoint subsets of the range <code>1:n</code> with split sizes determined by <code>at</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">julia&gt; splitobs(100, at=0.7)
 (1:70, 71:100)
 
 julia&gt; splitobs(100, at=(0.1, 0.4))
-(1:10, 11:50, 51:100)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/splitobs.jl#L1-L16">source</a></section><section><div><pre><code class="language-julia hljs">splitobs([rng,] data; at, shuffle=false, stratified=nothing) -&gt; Tuple</code></pre><p>Partition the <code>data</code> into two or more subsets.</p><p>The argument <code>at</code> specifies how to split the data:</p><ul><li>When <code>at</code> is a number between 0 and 1, this specifies the proportion in the first subset.</li><li>When <code>at</code> is an integer, it specifies the number of observations in the first subset.</li><li>When <code>at</code> is a tuple, entries specifies the number or proportion in each subset, except</li></ul><p>for the last which will contain the remaning observations.  The number of returned subsets is <code>length(at)+1</code>.</p><p>If <code>shuffle=true</code>, randomly permute the observations before splitting. A random number generator <code>rng</code> can be optionally passed as the first argument.</p><p>If <code>stratified</code> is not <code>nothing</code>, it should be an array of labels with the same length as the data. The observations will be split in such a way that the proportion of each label is preserved in each subset.</p><p>Supports any datatype implementing <a href="#MLUtils.numobs"><code>numobs</code></a>. </p><p>It relies on <a href="#MLUtils.obsview"><code>obsview</code></a> to create views of the data.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; splitobs(reshape(1:100, 1, :); at=0.7)  # simple 70%-30% split, of a matrix
+(1:10, 11:50, 51:100)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/splitobs.jl#L1-L16">source</a></section><section><div><pre><code class="language-julia hljs">splitobs([rng,] data; at, shuffle=false, stratified=nothing) -&gt; Tuple</code></pre><p>Partition the <code>data</code> into two or more subsets.</p><p>The argument <code>at</code> specifies how to split the data:</p><ul><li>When <code>at</code> is a number between 0 and 1, this specifies the proportion in the first subset.</li><li>When <code>at</code> is an integer, it specifies the number of observations in the first subset.</li><li>When <code>at</code> is a tuple, entries specifies the number or proportion in each subset, except</li></ul><p>for the last which will contain the remaning observations.  The number of returned subsets is <code>length(at)+1</code>.</p><p>If <code>shuffle=true</code>, randomly permute the observations before splitting. A random number generator <code>rng</code> can be optionally passed as the first argument.</p><p>If <code>stratified</code> is not <code>nothing</code>, it should be an array of labels with the same length as the data. The observations will be split in such a way that the proportion of each label is preserved in each subset.</p><p>Supports any datatype implementing <a href="#MLUtils.numobs"><code>numobs</code></a>. </p><p>It relies on <a href="#MLUtils.obsview"><code>obsview</code></a> to create views of the data.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; splitobs(reshape(1:100, 1, :); at=0.7)  # simple 70%-30% split, of a matrix
 ([1 2 … 69 70], [71 72 … 99 100])
 
 julia&gt; data = (x=ones(2,10), n=1:10)  # a NamedTuple, consistent last dimension
@@ -308,7 +323,7 @@
 true
 
 julia&gt; splitobs(1:10, at=0.5, stratified=[0,0,0,0,1,1,1,1,1,1]) # 2 zeros and 3 ones in each subset
-([1, 2, 5, 6, 7], [3, 4, 8, 9, 10])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/splitobs.jl#L40-L82">source</a></section></article><h2 id="Array-Constructors"><a class="docs-heading-anchor" href="#Array-Constructors">Array Constructors</a><a id="Array-Constructors-1"></a><a class="docs-heading-anchor-permalink" href="#Array-Constructors" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.falses_like" href="#MLUtils.falses_like"><code>MLUtils.falses_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">falses_like(x, [dims=size(x)])</code></pre><p>Equivalent to <code>fill_like(x, false, Bool, dims)</code>.</p><p>See also [<code>fill_like</code>] and <a href="#MLUtils.trues_like"><code>trues_like</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L747-L753">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.fill_like" href="#MLUtils.fill_like"><code>MLUtils.fill_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">fill_like(x, val, [element_type=eltype(x)], [dims=size(x)]))</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to <code>val</code>.  The third and fourth arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>See also <a href="#MLUtils.zeros_like"><code>zeros_like</code></a> and <a href="#MLUtils.ones_like"><code>ones_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = rand(Float32, 2)
+([1, 2, 5, 6, 7], [3, 4, 8, 9, 10])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/splitobs.jl#L40-L82">source</a></section></article><h2 id="Array-Constructors"><a class="docs-heading-anchor" href="#Array-Constructors">Array Constructors</a><a id="Array-Constructors-1"></a><a class="docs-heading-anchor-permalink" href="#Array-Constructors" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.falses_like" href="#MLUtils.falses_like"><code>MLUtils.falses_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">falses_like(x, [dims=size(x)])</code></pre><p>Equivalent to <code>fill_like(x, false, Bool, dims)</code>.</p><p>See also [<code>fill_like</code>] and <a href="#MLUtils.trues_like"><code>trues_like</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L747-L753">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.fill_like" href="#MLUtils.fill_like"><code>MLUtils.fill_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">fill_like(x, val, [element_type=eltype(x)], [dims=size(x)]))</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to <code>val</code>.  The third and fourth arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>See also <a href="#MLUtils.zeros_like"><code>zeros_like</code></a> and <a href="#MLUtils.ones_like"><code>ones_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = rand(Float32, 2)
 2-element Vector{Float32}:
  0.16087806
  0.89916044
@@ -329,7 +344,7 @@
 julia&gt; fill_like(x, 1.7, Float64)
 2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  1.7  1.7
- 1.7  1.7</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L698-L734">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.ones_like" href="#MLUtils.ones_like"><code>MLUtils.ones_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ones_like(x, [element_type=eltype(x)], [dims=size(x)]))</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to 1.  The second and third arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>See also <a href="#MLUtils.zeros_like"><code>zeros_like</code></a> and <a href="#MLUtils.fill_like"><code>fill_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = rand(Float32, 2)
+ 1.7  1.7</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L698-L734">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.ones_like" href="#MLUtils.ones_like"><code>MLUtils.ones_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ones_like(x, [element_type=eltype(x)], [dims=size(x)]))</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to 1.  The second and third arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>See also <a href="#MLUtils.zeros_like"><code>zeros_like</code></a> and <a href="#MLUtils.fill_like"><code>fill_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = rand(Float32, 2)
 2-element Vector{Float32}:
  0.8621633
  0.5158395
@@ -350,7 +365,7 @@
 julia&gt; ones_like(x, Float64)
 2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  1.0  1.0
- 1.0  1.0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L527-L563">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.rand_like" href="#MLUtils.rand_like"><code>MLUtils.rand_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rand_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to a random value. The last two arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>The default random number generator is used, unless a custom one is passed in explicitly as the first argument.</p><p>See also <code>Base.rand</code> and <a href="#MLUtils.randn_like"><code>randn_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = ones(Float32, 2)
+ 1.0  1.0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L527-L563">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.rand_like" href="#MLUtils.rand_like"><code>MLUtils.rand_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rand_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to a random value. The last two arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>The default random number generator is used, unless a custom one is passed in explicitly as the first argument.</p><p>See also <code>Base.rand</code> and <a href="#MLUtils.randn_like"><code>randn_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = ones(Float32, 2)
 2-element Vector{Float32}:
  1.0
  1.0
@@ -371,7 +386,7 @@
 julia&gt; rand_like(x, Float64)
 2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  0.429274  0.135379
- 0.718895  0.0098756</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L609-L648">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.randn_like" href="#MLUtils.randn_like"><code>MLUtils.randn_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">randn_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to a random value drawn from a normal distribution. The last two arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>The default random number generator is used, unless a custom one is passed in explicitly as the first argument.</p><p>See also <code>Base.randn</code> and <a href="#MLUtils.rand_like"><code>rand_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = ones(Float32, 2)
+ 0.718895  0.0098756</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L609-L648">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.randn_like" href="#MLUtils.randn_like"><code>MLUtils.randn_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">randn_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to a random value drawn from a normal distribution. The last two arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>The default random number generator is used, unless a custom one is passed in explicitly as the first argument.</p><p>See also <code>Base.randn</code> and <a href="#MLUtils.rand_like"><code>rand_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = ones(Float32, 2)
 2-element Vector{Float32}:
  1.0
  1.0
@@ -392,7 +407,7 @@
 julia&gt; randn_like(x, Float64)
 2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  -0.578527   0.823445
- -1.01338   -0.612053</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L654-L692">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.trues_like" href="#MLUtils.trues_like"><code>MLUtils.trues_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">trues_like(x, [dims=size(x)])</code></pre><p>Equivalent to <code>fill_like(x, true, Bool, dims)</code>.</p><p>See also [<code>fill_like</code>] and <a href="#MLUtils.falses_like"><code>falses_like</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L738-L744">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.zeros_like" href="#MLUtils.zeros_like"><code>MLUtils.zeros_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">zeros_like(x, [element_type=eltype(x)], [dims=size(x)]))</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to 0.  The second and third arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>See also <a href="#MLUtils.ones_like"><code>ones_like</code></a> and <a href="#MLUtils.fill_like"><code>fill_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = rand(Float32, 2)
+ -1.01338   -0.612053</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L654-L692">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.trues_like" href="#MLUtils.trues_like"><code>MLUtils.trues_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">trues_like(x, [dims=size(x)])</code></pre><p>Equivalent to <code>fill_like(x, true, Bool, dims)</code>.</p><p>See also [<code>fill_like</code>] and <a href="#MLUtils.falses_like"><code>falses_like</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L738-L744">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.zeros_like" href="#MLUtils.zeros_like"><code>MLUtils.zeros_like</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">zeros_like(x, [element_type=eltype(x)], [dims=size(x)]))</code></pre><p>Create an array with the given element type and size, based upon the given source array <code>x</code>. All element of the new array will be set to 0.  The second and third arguments are both optional, defaulting to the given array&#39;s eltype and size. The dimensions may be specified as an integer or as a tuple argument.</p><p>See also <a href="#MLUtils.ones_like"><code>ones_like</code></a> and <a href="#MLUtils.fill_like"><code>fill_like</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = rand(Float32, 2)
 2-element Vector{Float32}:
  0.4005432
  0.36934233
@@ -413,7 +428,7 @@
 julia&gt; zeros_like(x, Float64)
 2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
  0.0  0.0
- 0.0  0.0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L568-L605">source</a></section></article><h2 id="Resampling"><a class="docs-heading-anchor" href="#Resampling">Resampling</a><a id="Resampling-1"></a><a class="docs-heading-anchor-permalink" href="#Resampling" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.oversample" href="#MLUtils.oversample"><code>MLUtils.oversample</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">oversample([rng], data, classes; fraction=1, shuffle=true)
+ 0.0  0.0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L568-L605">source</a></section></article><h2 id="Resampling"><a class="docs-heading-anchor" href="#Resampling">Resampling</a><a id="Resampling-1"></a><a class="docs-heading-anchor-permalink" href="#Resampling" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.oversample" href="#MLUtils.oversample"><code>MLUtils.oversample</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">oversample([rng], data, classes; fraction=1, shuffle=true)
 oversample([rng], data::Tuple; fraction=1, shuffle=true)</code></pre><p>Generate a re-balanced version of <code>data</code> by repeatedly sampling existing observations in such a way that every class will have at least <code>fraction</code> times the number observations of the largest class in <code>classes</code>. This way, all classes will have a minimum number of observations in the resulting data set relative to what largest class has in the given (original) <code>data</code>.</p><p>As an example, by default (i.e. with <code>fraction = 1</code>) the resulting dataset will be near perfectly balanced. On the other hand, with <code>fraction = 0.5</code> every class in the resulting data with have at least 50% as many observations as the largest class.</p><p>The <code>classes</code> input is an array with the same length as <code>numobs(data)</code>.  </p><p>The convenience parameter <code>shuffle</code> determines if the resulting data will be shuffled after its creation; if it is not shuffled then all the repeated samples will be together at the end, sorted by class. Defaults to <code>true</code>.</p><p>The random number generator <code>rng</code> can be optionally passed as the first argument. </p><p>The output will contain both the resampled data and classes.</p><pre><code class="language-julia hljs"># 6 observations with 3 features each
 X = rand(3, 6)
 # 2 classes, severely imbalanced
@@ -451,7 +466,7 @@
    5 │ 0.376304  0.100022   a
    6 │ 0.427064  0.0648339  a
    7 │ 0.427064  0.0648339  a
-   8 │ 0.457043  0.490688   b</code></pre><p>See <a href="#MLUtils.ObsView"><code>ObsView</code></a> for more information on data subsets. See also <a href="#MLUtils.undersample"><code>undersample</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/resample.jl#L1-L82">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.undersample" href="#MLUtils.undersample"><code>MLUtils.undersample</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">undersample([rng], data, classes; shuffle=true)
+   8 │ 0.457043  0.490688   b</code></pre><p>See <a href="#MLUtils.ObsView"><code>ObsView</code></a> for more information on data subsets. See also <a href="#MLUtils.undersample"><code>undersample</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/resample.jl#L1-L82">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.undersample" href="#MLUtils.undersample"><code>MLUtils.undersample</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">undersample([rng], data, classes; shuffle=true)
 undersample([rng], data::Tuple; shuffle=true)</code></pre><p>Generate a class-balanced version of <code>data</code> by subsampling its observations in such a way that the resulting number of observations will be the same number for every class. This way, all classes will have as many observations in the resulting data set as the smallest class has in the given (original) <code>data</code>.</p><p>The convenience parameter <code>shuffle</code> determines if the resulting data will be shuffled after its creation; if it is not shuffled then all the observations will be in their original order. Defaults to <code>false</code>.</p><p>If <code>data</code> is a tuple and <code>classes</code> is not given,  then it will be assumed that the last element of the tuple contains the classes.</p><p>The output will contain both the resampled data and classes.</p><pre><code class="language-julia hljs"># 6 observations with 3 features each
 X = rand(3, 6)
 # 2 classes, severely imbalanced
@@ -485,7 +500,7 @@
    1 │ 0.427064  0.0648339  a
    2 │ 0.376304  0.100022   a
    3 │ 0.467095  0.185437   b
-   4 │ 0.457043  0.490688   b</code></pre><p>See <a href="#MLUtils.ObsView"><code>ObsView</code></a> for more information on data subsets. See also <a href="#MLUtils.oversample"><code>oversample</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/resample.jl#L119-L188">source</a></section></article><h2 id="Operations"><a class="docs-heading-anchor" href="#Operations">Operations</a><a id="Operations-1"></a><a class="docs-heading-anchor-permalink" href="#Operations" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.chunk" href="#MLUtils.chunk"><code>MLUtils.chunk</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">chunk(x, n; [dims])
+   4 │ 0.457043  0.490688   b</code></pre><p>See <a href="#MLUtils.ObsView"><code>ObsView</code></a> for more information on data subsets. See also <a href="#MLUtils.oversample"><code>oversample</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/resample.jl#L119-L188">source</a></section></article><h2 id="Operations"><a class="docs-heading-anchor" href="#Operations">Operations</a><a id="Operations-1"></a><a class="docs-heading-anchor-permalink" href="#Operations" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.chunk" href="#MLUtils.chunk"><code>MLUtils.chunk</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">chunk(x, n; [dims])
 chunk(x; [size, dims])</code></pre><p>Split <code>x</code> into <code>n</code> parts or alternatively, if <code>size</code> is an integer, into equal chunks of size <code>size</code>.  The parts contain the same number of elements except possibly for the last one that can be smaller.</p><p>In case <code>size</code> is a collection of integers instead, the elements of <code>x</code> are split into chunks of the given sizes.</p><p>If <code>x</code> is an array, <code>dims</code> can be used to specify along which dimension to  split (defaults to the last dimension).</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; chunk(1:10, 3)
 3-element Vector{UnitRange{Int64}}:
  1:4
@@ -535,7 +550,7 @@
 julia&gt; chunk(1:6; size = [2, 4])
 2-element Vector{UnitRange{Int64}}:
  1:2
- 3:6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L87-L154">source</a></section><section><div><pre><code class="language-julia hljs">chunk(x, partition_idxs; [npartitions, dims])</code></pre><p>Partition the array <code>x</code> along the dimension <code>dims</code> according to the indexes  in <code>partition_idxs</code>.</p><p><code>partition_idxs</code> must be sorted and contain only positive integers  between 1 and the number of partitions. </p><p>If the number of partition <code>npartitions</code> is not provided,  it is inferred from <code>partition_idxs</code>.</p><p>If <code>dims</code> is not provided, it defaults to the last dimension.</p><p>See also <a href="#MLUtils.unbatch"><code>unbatch</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = reshape([1:10;], 2, 5)
+ 3:6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L87-L154">source</a></section><section><div><pre><code class="language-julia hljs">chunk(x, partition_idxs; [npartitions, dims])</code></pre><p>Partition the array <code>x</code> along the dimension <code>dims</code> according to the indexes  in <code>partition_idxs</code>.</p><p><code>partition_idxs</code> must be sorted and contain only positive integers  between 1 and the number of partitions. </p><p>If the number of partition <code>npartitions</code> is not provided,  it is inferred from <code>partition_idxs</code>.</p><p>If <code>dims</code> is not provided, it defaults to the last dimension.</p><p>See also <a href="#MLUtils.unbatch"><code>unbatch</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = reshape([1:10;], 2, 5)
 2×5 Matrix{Int64}:
  1  3  5  7   9
  2  4  6  8  10
@@ -544,17 +559,17 @@
 3-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}}:
  [1; 2;;]
  [3 5; 4 6]
- [7 9; 8 10]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L167-L197">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.flatten" href="#MLUtils.flatten"><code>MLUtils.flatten</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">flatten(x::AbstractArray)</code></pre><p>Reshape arbitrarly-shaped input into a matrix-shaped output, preserving the size of the last dimension.</p><p>See also <a href="#MLUtils.unsqueeze"><code>unsqueeze</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; rand(3,4,5) |&gt; flatten |&gt; size
-(12, 5)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L490-L504">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.group_counts" href="#MLUtils.group_counts"><code>MLUtils.group_counts</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">group_counts(x)</code></pre><p>Count the number of times that each element of <code>x</code> appears.</p><p>See also <a href="#MLUtils.group_indices"><code>group_indices</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; group_counts([&#39;a&#39;, &#39;b&#39;, &#39;b&#39;])
+ [7 9; 8 10]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L167-L197">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.flatten" href="#MLUtils.flatten"><code>MLUtils.flatten</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">flatten(x::AbstractArray)</code></pre><p>Reshape arbitrarly-shaped input into a matrix-shaped output, preserving the size of the last dimension.</p><p>See also <a href="#MLUtils.unsqueeze"><code>unsqueeze</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; rand(3,4,5) |&gt; flatten |&gt; size
+(12, 5)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L490-L504">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.group_counts" href="#MLUtils.group_counts"><code>MLUtils.group_counts</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">group_counts(x)</code></pre><p>Count the number of times that each element of <code>x</code> appears.</p><p>See also <a href="#MLUtils.group_indices"><code>group_indices</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; group_counts([&#39;a&#39;, &#39;b&#39;, &#39;b&#39;])
 Dict{Char, Int64} with 2 entries:
   &#39;a&#39; =&gt; 1
-  &#39;b&#39; =&gt; 2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L273-L287">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.group_indices" href="#MLUtils.group_indices"><code>MLUtils.group_indices</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">group_indices(x) -&gt; Dict</code></pre><p>Computes the indices of elements in the vector <code>x</code> for each distinct value contained.  This information is useful for resampling strategies, such as stratified sampling.</p><p>See also <a href="#MLUtils.group_counts"><code>group_counts</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = [:yes, :no, :maybe, :yes];
+  &#39;b&#39; =&gt; 2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L273-L287">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.group_indices" href="#MLUtils.group_indices"><code>MLUtils.group_indices</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">group_indices(x) -&gt; Dict</code></pre><p>Computes the indices of elements in the vector <code>x</code> for each distinct value contained.  This information is useful for resampling strategies, such as stratified sampling.</p><p>See also <a href="#MLUtils.group_counts"><code>group_counts</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; x = [:yes, :no, :maybe, :yes];
 
 julia&gt; group_indices(x)
 Dict{Symbol, Vector{Int64}} with 3 entries:
   :yes   =&gt; [1, 4]
   :maybe =&gt; [3]
-  :no    =&gt; [2]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L296-L315">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.normalise" href="#MLUtils.normalise"><code>MLUtils.normalise</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">normalise(x; dims=ndims(x), ϵ=1e-5)</code></pre><p>Normalise the array <code>x</code> to mean 0 and standard deviation 1 across the dimension(s) given by <code>dims</code>. Per default, <code>dims</code> is the last dimension. </p><p><code>ϵ</code> is a small additive factor added to the denominator for numerical stability.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L509-L516">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.rpad_constant" href="#MLUtils.rpad_constant"><code>MLUtils.rpad_constant</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rpad_constant(v::AbstractArray, n::Union{Integer, Tuple}, val = 0; dims=:)</code></pre><p>Return the given sequence padded with <code>val</code> along the dimensions <code>dims</code> up to a maximum length in each direction specified by <code>n</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; rpad_constant([1, 2], 4, -1) # passing with -1 up to size 4
+  :no    =&gt; [2]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L296-L315">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.normalise" href="#MLUtils.normalise"><code>MLUtils.normalise</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">normalise(x; dims=ndims(x), ϵ=1e-5)</code></pre><p>Normalise the array <code>x</code> to mean 0 and standard deviation 1 across the dimension(s) given by <code>dims</code>. Per default, <code>dims</code> is the last dimension. </p><p><code>ϵ</code> is a small additive factor added to the denominator for numerical stability.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L509-L516">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.rpad_constant" href="#MLUtils.rpad_constant"><code>MLUtils.rpad_constant</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rpad_constant(v::AbstractArray, n::Union{Integer, Tuple}, val = 0; dims=:)</code></pre><p>Return the given sequence padded with <code>val</code> along the dimensions <code>dims</code> up to a maximum length in each direction specified by <code>n</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; rpad_constant([1, 2], 4, -1) # passing with -1 up to size 4
 4-element Vector{Int64}:
   1
   2
@@ -579,13 +594,13 @@
  1  2  0  0
  3  4  0  0
  0  0  0  0
- 0  0  0  0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L439-L474">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.unbatch" href="#MLUtils.unbatch"><code>MLUtils.unbatch</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unbatch(x)</code></pre><p>Reverse of the <a href="#MLUtils.batch"><code>batch</code></a> operation, unstacking the last dimension of the array <code>x</code>.</p><p>See also <a href="#MLUtils.unstack"><code>unstack</code></a> and <a href="#MLUtils.chunk"><code>chunk</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; unbatch([1 3 5 7;
+ 0  0  0  0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L439-L474">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.unbatch" href="#MLUtils.unbatch"><code>MLUtils.unbatch</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unbatch(x)</code></pre><p>Reverse of the <a href="#MLUtils.batch"><code>batch</code></a> operation, unstacking the last dimension of the array <code>x</code>.</p><p>See also <a href="#MLUtils.unstack"><code>unstack</code></a> and <a href="#MLUtils.chunk"><code>chunk</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; unbatch([1 3 5 7;
                 2 4 6 8])
 4-element Vector{Vector{Int64}}:
  [1, 2]
  [3, 4]
  [5, 6]
- [7, 8]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L394-L413">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.unsqueeze" href="#MLUtils.unsqueeze"><code>MLUtils.unsqueeze</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unsqueeze(x; dims)</code></pre><p>Return <code>x</code> reshaped into an array one dimensionality higher than <code>x</code>, where <code>dims</code> indicates in which dimension <code>x</code> is extended. <code>dims</code> can be an integer between 1 and <code>ndims(x)+1</code>.</p><p>See also <a href="#MLUtils.flatten"><code>flatten</code></a>, <code>stack</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; unsqueeze([1 2; 3 4], dims=2)
+ [7, 8]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L394-L413">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.unsqueeze" href="#MLUtils.unsqueeze"><code>MLUtils.unsqueeze</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unsqueeze(x; dims)</code></pre><p>Return <code>x</code> reshaped into an array one dimensionality higher than <code>x</code>, where <code>dims</code> indicates in which dimension <code>x</code> is extended. <code>dims</code> can be an integer between 1 and <code>ndims(x)+1</code>.</p><p>See also <a href="#MLUtils.flatten"><code>flatten</code></a>, <code>stack</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; unsqueeze([1 2; 3 4], dims=2)
 2×1×2 Array{Int64, 3}:
 [:, :, 1] =
  1
@@ -604,10 +619,10 @@
 
 julia&gt; unsqueeze(xs, dims=1)
 1×3 Matrix{Vector{Int64}}:
- [1, 2]  [3, 4]  [5, 6]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L3-L36">source</a></section><section><div><pre><code class="language-julia hljs">unsqueeze(; dims)</code></pre><p>Returns a function which, acting on an array, inserts a dimension of size 1 at <code>dims</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; rand(21, 22, 23) |&gt; unsqueeze(dims=2) |&gt; size
-(21, 1, 22, 23)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L43-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.unstack" href="#MLUtils.unstack"><code>MLUtils.unstack</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unstack(xs; dims)</code></pre><p>Unroll the given <code>xs</code> into an array of arrays along the given dimension <code>dims</code>.</p><p>It is the inverse operation of <a href="https://docs.julialang.org/en/v1/base/arrays/#Base.stack">stack</a>.</p><p>See also <a href="#MLUtils.unbatch"><code>unbatch</code></a> and <a href="#MLUtils.chunk"><code>chunk</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; unstack([1 3 5 7; 2 4 6 8], dims=2)
+ [1, 2]  [3, 4]  [5, 6]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L3-L36">source</a></section><section><div><pre><code class="language-julia hljs">unsqueeze(; dims)</code></pre><p>Returns a function which, acting on an array, inserts a dimension of size 1 at <code>dims</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; rand(21, 22, 23) |&gt; unsqueeze(dims=2) |&gt; size
+(21, 1, 22, 23)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L43-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.unstack" href="#MLUtils.unstack"><code>MLUtils.unstack</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">unstack(xs; dims)</code></pre><p>Unroll the given <code>xs</code> into an array of arrays along the given dimension <code>dims</code>.</p><p>It is the inverse operation of <a href="https://docs.julialang.org/en/v1/base/arrays/#Base.stack">stack</a>.</p><p>See also <a href="#MLUtils.unbatch"><code>unbatch</code></a> and <a href="#MLUtils.chunk"><code>chunk</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl hljs">julia&gt; unstack([1 3 5 7; 2 4 6 8], dims=2)
 4-element Vector{Vector{Int64}}:
  [1, 2]
  [3, 4]
  [5, 6]
- [7, 8]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/utils.jl#L60-L79">source</a></section></article><h2 id="Datasets"><a class="docs-heading-anchor" href="#Datasets">Datasets</a><a id="Datasets-1"></a><a class="docs-heading-anchor-permalink" href="#Datasets" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.load_iris" href="#MLUtils.Datasets.load_iris"><code>MLUtils.Datasets.load_iris</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">load_iris() -&gt; X, y, names</code></pre><p>Loads the first 150 observations from the Iris flower data set introduced by Ronald Fisher (1936). The 4 by 150 matrix <code>X</code> contains the numeric measurements, in which each individual column denotes an observation. The vector <code>y</code> contains the class labels as strings. The vector <code>names</code> contains the names of the features (i.e. rows of <code>X</code>)</p><p>[1] Fisher, Ronald A. &quot;The use of multiple measurements in taxonomic problems.&quot; Annals of eugenics 7.2 (1936): 179-188.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/Datasets/load_datasets.jl#L3-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_sin" href="#MLUtils.Datasets.make_sin"><code>MLUtils.Datasets.make_sin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_sin(n, start, stop; noise = 0.3, f_rand = randn) -&gt; x, y</code></pre><p>Generates <code>n</code> noisy equally spaces samples of a sinus from <code>start</code> to <code>stop</code> by adding <code>noise .* f_rand(length(x))</code> to the result of <code>sin(x)</code>.</p><p>Returns the vector <code>x</code> with the samples and the noisy response <code>y</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/Datasets/generators.jl#L2-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_spiral" href="#MLUtils.Datasets.make_spiral"><code>MLUtils.Datasets.make_spiral</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_spiral(n, a, theta, b; noise = 0.01, f_rand = randn) -&gt; x, y</code></pre><p>Generates <code>n</code> noisy responses for a spiral with two labels. Uses the radius, angle and scaling arguments to space the points in 2D space and adding <code>noise .* f_randn(n)</code> to the response.</p><p>Returns the 2 x n matrix <code>x</code> with the coordinates of the samples and the vector <code>y</code> with the labels.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/Datasets/generators.jl#L45-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_poly" href="#MLUtils.Datasets.make_poly"><code>MLUtils.Datasets.make_poly</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_poly(coef, x; noise = 0.01, f_rand = randn) -&gt; x, y</code></pre><p>Generates a noisy response for a polynomial of degree <code>length(coef)</code> and with the coefficients given by <code>coef</code>. The response is generated by elmentwise computation of the polynome on the elements of <code>x</code> and adding <code>noise .* f_randn(length(x))</code> to the result.</p><p>The vector <code>coef</code> contains the coefficients for the terms of the polynome. The first element of <code>coef</code> denotes the coefficient for the term with the highest degree, while the last element of <code>coef</code> denotes the intercept.</p><p>Return the input <code>x</code> and the noisy response <code>y</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/Datasets/generators.jl#L16-L29">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_moons" href="#MLUtils.Datasets.make_moons"><code>MLUtils.Datasets.make_moons</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_moons(n; noise=0.0, f_rand=randn, shuffle=true) -&gt; x, y</code></pre><p>Generate a dataset with two interleaving half circles. </p><p>If <code>n</code> is an integer, the number of samples is <code>n</code> and the number of samples for each half circle is <code>n ÷ 2</code>. If <code>n</code> is a tuple, the first element of the tuple denotes the number of samples for the first half circle and the second element denotes the number of samples for the second half circle.</p><p>The noise level can be controlled by the <code>noise</code> argument.</p><p>Set <code>shuffle=false</code> to keep the order of the samples.</p><p>Returns a 2 x n matrix with the the samples. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/da07fd6b1615412bc37ee4ef529a66ff2c289fcf/src/Datasets/generators.jl#L78-L93">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« Home</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Sunday 2 February 2025 18:14">Sunday 2 February 2025</span>. Using Julia version 1.11.3.</p></section><footer class="modal-card-foot"></footer></div></div></div></body><div data-docstringscollapsed="true"></div></html>
+ [7, 8]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/utils.jl#L60-L79">source</a></section></article><h2 id="Datasets"><a class="docs-heading-anchor" href="#Datasets">Datasets</a><a id="Datasets-1"></a><a class="docs-heading-anchor-permalink" href="#Datasets" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.load_iris" href="#MLUtils.Datasets.load_iris"><code>MLUtils.Datasets.load_iris</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">load_iris() -&gt; X, y, names</code></pre><p>Loads the first 150 observations from the Iris flower data set introduced by Ronald Fisher (1936). The 4 by 150 matrix <code>X</code> contains the numeric measurements, in which each individual column denotes an observation. The vector <code>y</code> contains the class labels as strings. The vector <code>names</code> contains the names of the features (i.e. rows of <code>X</code>)</p><p>[1] Fisher, Ronald A. &quot;The use of multiple measurements in taxonomic problems.&quot; Annals of eugenics 7.2 (1936): 179-188.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/Datasets/load_datasets.jl#L3-L14">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_sin" href="#MLUtils.Datasets.make_sin"><code>MLUtils.Datasets.make_sin</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_sin(n, start, stop; noise = 0.3, f_rand = randn) -&gt; x, y</code></pre><p>Generates <code>n</code> noisy equally spaces samples of a sinus from <code>start</code> to <code>stop</code> by adding <code>noise .* f_rand(length(x))</code> to the result of <code>sin(x)</code>.</p><p>Returns the vector <code>x</code> with the samples and the noisy response <code>y</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/Datasets/generators.jl#L2-L9">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_spiral" href="#MLUtils.Datasets.make_spiral"><code>MLUtils.Datasets.make_spiral</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_spiral(n, a, theta, b; noise = 0.01, f_rand = randn) -&gt; x, y</code></pre><p>Generates <code>n</code> noisy responses for a spiral with two labels. Uses the radius, angle and scaling arguments to space the points in 2D space and adding <code>noise .* f_randn(n)</code> to the response.</p><p>Returns the 2 x n matrix <code>x</code> with the coordinates of the samples and the vector <code>y</code> with the labels.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/Datasets/generators.jl#L45-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_poly" href="#MLUtils.Datasets.make_poly"><code>MLUtils.Datasets.make_poly</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_poly(coef, x; noise = 0.01, f_rand = randn) -&gt; x, y</code></pre><p>Generates a noisy response for a polynomial of degree <code>length(coef)</code> and with the coefficients given by <code>coef</code>. The response is generated by elmentwise computation of the polynome on the elements of <code>x</code> and adding <code>noise .* f_randn(length(x))</code> to the result.</p><p>The vector <code>coef</code> contains the coefficients for the terms of the polynome. The first element of <code>coef</code> denotes the coefficient for the term with the highest degree, while the last element of <code>coef</code> denotes the intercept.</p><p>Return the input <code>x</code> and the noisy response <code>y</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/Datasets/generators.jl#L16-L29">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="MLUtils.Datasets.make_moons" href="#MLUtils.Datasets.make_moons"><code>MLUtils.Datasets.make_moons</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">make_moons(n; noise=0.0, f_rand=randn, shuffle=true) -&gt; x, y</code></pre><p>Generate a dataset with two interleaving half circles. </p><p>If <code>n</code> is an integer, the number of samples is <code>n</code> and the number of samples for each half circle is <code>n ÷ 2</code>. If <code>n</code> is a tuple, the first element of the tuple denotes the number of samples for the first half circle and the second element denotes the number of samples for the second half circle.</p><p>The noise level can be controlled by the <code>noise</code> argument.</p><p>Set <code>shuffle=false</code> to keep the order of the samples.</p><p>Returns a 2 x n matrix with the the samples. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaML/MLUtils.jl/blob/e705227427c968e8684f14ce0c81a1622ec49409/src/Datasets/generators.jl#L78-L93">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« Home</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Sunday 2 February 2025 20:20">Sunday 2 February 2025</span>. Using Julia version 1.11.3.</p></section><footer class="modal-card-foot"></footer></div></div></div></body><div data-docstringscollapsed="true"></div></html>
diff --git a/dev/index.html b/dev/index.html
index 6381575..b9b4450 100644
--- a/dev/index.html
+++ b/dev/index.html
@@ -25,4 +25,4 @@
             # ... train supervised model on minibatches here
         end
     end
-end</code></pre><p>In the above code snippet, the inner loop for <code>eachobs</code> is the only place where data other than indices is actually being copied. In fact, while <code>x</code> and <code>y</code> are materialized arrays,  all the rest are data views. </p><h2 id="Historical-Notes"><a class="docs-heading-anchor" href="#Historical-Notes">Historical Notes</a><a id="Historical-Notes-1"></a><a class="docs-heading-anchor-permalink" href="#Historical-Notes" title="Permalink"></a></h2><p><em>MLUtils.jl</em> brings together functionalities previously found in <a href="https://github.com/JuliaML/LearnBase.jl">LearnBase.jl</a> , <a href="https://github.com/JuliaML/MLDataPattern.jl">MLDataPattern.jl</a> and <a href="https://github.com/JuliaML/MLLabelUtils.jl">MLLabelUtils.jl</a>. These packages are now discontinued. </p><p>Other features were ported from the deep learning library <a href="https://github.com/FluxML/Flux.jl">Flux.jl</a>, as they are of general use. </p><h2 id="Alternatives-and-Related-Packages"><a class="docs-heading-anchor" href="#Alternatives-and-Related-Packages">Alternatives and Related Packages</a><a id="Alternatives-and-Related-Packages-1"></a><a class="docs-heading-anchor-permalink" href="#Alternatives-and-Related-Packages" title="Permalink"></a></h2><ul><li><p><a href="https://alan-turing-institute.github.io/MLJ.jl/dev/">MLJ.jl</a> is a more complete package for managing the whole machine learning pipeline if you are looking for a sklearn replacement.</p></li><li><p><a href="https://github.com/FluxML/NNlib.jl">NNlib.jl</a> provides utility functions for neural networks.</p></li><li><p><a href="https://github.com/JuliaML/TableTransforms.jl">TableTransforms.jl</a> contains transformations for tabular datasets.</p></li><li><p><a href="https://github.com/FluxML/DataAugmentation.jl">DataAugmentation.jl</a>. Efficient, composable data augmentation for machine and deep learning with support for n-dimensional images, keypoints and categorical masks.</p></li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="api/">API »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Sunday 2 February 2025 18:14">Sunday 2 February 2025</span>. Using Julia version 1.11.3.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+end</code></pre><p>In the above code snippet, the inner loop for <code>eachobs</code> is the only place where data other than indices is actually being copied. In fact, while <code>x</code> and <code>y</code> are materialized arrays,  all the rest are data views. </p><h2 id="Historical-Notes"><a class="docs-heading-anchor" href="#Historical-Notes">Historical Notes</a><a id="Historical-Notes-1"></a><a class="docs-heading-anchor-permalink" href="#Historical-Notes" title="Permalink"></a></h2><p><em>MLUtils.jl</em> brings together functionalities previously found in <a href="https://github.com/JuliaML/LearnBase.jl">LearnBase.jl</a> , <a href="https://github.com/JuliaML/MLDataPattern.jl">MLDataPattern.jl</a> and <a href="https://github.com/JuliaML/MLLabelUtils.jl">MLLabelUtils.jl</a>. These packages are now discontinued. </p><p>Other features were ported from the deep learning library <a href="https://github.com/FluxML/Flux.jl">Flux.jl</a>, as they are of general use. </p><h2 id="Alternatives-and-Related-Packages"><a class="docs-heading-anchor" href="#Alternatives-and-Related-Packages">Alternatives and Related Packages</a><a id="Alternatives-and-Related-Packages-1"></a><a class="docs-heading-anchor-permalink" href="#Alternatives-and-Related-Packages" title="Permalink"></a></h2><ul><li><p><a href="https://alan-turing-institute.github.io/MLJ.jl/dev/">MLJ.jl</a> is a more complete package for managing the whole machine learning pipeline if you are looking for a sklearn replacement.</p></li><li><p><a href="https://github.com/FluxML/NNlib.jl">NNlib.jl</a> provides utility functions for neural networks.</p></li><li><p><a href="https://github.com/JuliaML/TableTransforms.jl">TableTransforms.jl</a> contains transformations for tabular datasets.</p></li><li><p><a href="https://github.com/FluxML/DataAugmentation.jl">DataAugmentation.jl</a>. Efficient, composable data augmentation for machine and deep learning with support for n-dimensional images, keypoints and categorical masks.</p></li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="api/">API »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Sunday 2 February 2025 20:20">Sunday 2 February 2025</span>. Using Julia version 1.11.3.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/objects.inv b/dev/objects.inv
index 4222012937e07fd71ef4602e18bca439c99bfbdc..f7297e540a9eb9e6efbdc05b3e1fc96e3c1b093b 100644
GIT binary patch
delta 580
zcmV-K0=xaW1;Yi9iho5Zl4EYJ(;@-f#xUAkjTO1FX;Y-ahaLOtOVZB-bjcockT^TL
zJ3G{J$?%OB%kG(?q`oEOQFT~B&(A;l*RNzSeyaRaPq0Oel43*%Xiyw5pT7hYIU=eN
zY&Z394brqqcP1GTFyw@(o_o|_xsqvkT@97SI^;<mhr?Lv4}S#fG=3n71mD2YIhV8R
z`pNEdUDqI4{lamb%6bU6x*MKC!pkY>$VpC|#2=G*T#Z3n9zJ@=g|;SWpzFnDz(8x*
z)I%?f)s9-F(*i{pIfqpbV8NihB<T0IkT@yTBR8IbQxNqpWfPDSfJ8tw5Wauu1z4ep
z`b<!OMOKf2E`MQ`pg50+lI<hNA)@*@`eV5;Hw%#{@o+dps~Yo(EW~~5Bn@JdeUtAY
zS9iB(VRY`~G8J^r-THzo${ef-CMW_KzQr8Scr%20bGXBFZ#r1wv{T{c{8xzK+0GV<
z%YsszERtZ0TA8Wf%d7^{bp>g6bIlGBgu$}3yBXnYzJFh1y-9O0-2<D_a3ew59N6F5
z!YY@o60B&}BLxg~K-x`@NeQ)bebI!?t3%AYHxf>{WSnAtG+c<Hmi?MwWybNl1Ozul
zjIxMQ$-=n^gcbJ6Wk>z~8um(Tvi}6Zncom=ud7;T3vSHHZ=W0tfzAsiA2-~~p6`;4
zKKxjEu08!?$Xd#KGw!~uvPXL14L#+WmpIFBmxA`58<T5pXKAXOv{||iN^X;xyS5cr
SfK;neje?>7LHHL0zD#LFMIH43

delta 570
zcmV-A0>%Bq1-S)~ihqSEl4EYJ+adwn#xUAkjWxBhX;Y-aha3CrOVST10bQ~O9VE`q
z?#>RiT-NwXjIHl$#R$D6<UzGqLC1$5{p)8knm$(kp)+hyqhuHn0v0Gvn2(<VikuPE
z1ok`nOM^6x(yd8G1Ptjh)l-KWELSoOr`1?#q#;l0I2^`OzkefGr|}(0B=`cBt+-rX
z(?@&EbxlFm^qJ$IRMu0#)!n!c2`_!nnUnOK#2=G*Tunh+9zJ@=g|=pBpzXwEz({M@
z(XkiCYR9b7X@Me)3}Mw*uwc+$67<_^NSu`Ffg8`j83_7Q*#x8qkO-(o!uKz|09!Os
zpBW0USko!64Sy^X6vK!p*)f8gBB~#g-<At=vlfXGPlrpiYGFQ*wYYDbWI=3lZ1OGS
z>h5+B##T;lQb9xR#usE!=3t9pf+CRNd&~iiH$$j5hdWL8riBg8TNQ53e}x#H?QE^M
zEGWgvA_?}Wm6;2^$!Z{7TaflJ*X$5M7%WSBm=V6@`+qgon=}X09n^CcZX`&X1N&QB
zSmmZsqAr^CL_v)@AnmThq=Z_zu4uyM#UbY18wuxJ)|_E}G+c<Hmi?Nj%Z%f92?%bA
z7-bQql7({-2wUuw%Z~c(HSCnwWouZNtzRfP7y?@r3^OjR=Of=GXT1AYbX@y?eby!3
zZgHJu#WOn73-8f6*Sy3meqj`}_p~s%<_<1p%1N80YoX+Vn3Zc9fdxplE!8M^^dE$O
I0oVdP0j{VJ?f?J)

diff --git a/dev/search_index.js b/dev/search_index.js
index 799e95e..e2092dd 100644
--- a/dev/search_index.js
+++ b/dev/search_index.js
@@ -1,3 +1,3 @@
 var documenterSearchIndex = {"docs":
-[{"location":"api/","page":"API","title":"API","text":"CollapsedDocStrings = true","category":"page"},{"location":"api/#API-Reference","page":"API","title":"API Reference","text":"","category":"section"},{"location":"api/#Core-API","page":"API","title":"Core API","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"getobs\ngetobs!\nnumobs","category":"page"},{"location":"api/#MLUtils.getobs","page":"API","title":"MLUtils.getobs","text":"getobs(data, [idx])\n\nReturn the observations corresponding to the observation index idx. Note that idx can be any type as long as data has defined getobs for that type. If idx is not provided, then materialize all observations in data.\n\nIf data does not have getobs defined, then in the case of Tables.table(data) == true returns the row(s) in position idx, otherwise returns data[idx].\n\nAuthors of custom data containers should implement Base.getindex for their type instead of getobs. getobs should only be implemented for types where there is a difference between getobs and Base.getindex (such as multi-dimensional arrays).\n\nThe returned observation(s) should be in the form intended to be passed as-is to some learning algorithm. There is no strict interface requirement on how this \"actual data\" must look like. Every author behind some custom data container can make this decision themselves. The output should be consistent when idx is a scalar vs vector.\n\ngetobs supports by default nested combinations of array, tuple, named tuples, and dictionaries. \n\nSee also getobs! and numobs.\n\nExamples\n\njulia> x = (a = [1, 2, 3], b = rand(6, 3));\n\njulia> getobs(x, 2) == (a = 2, b = x.b[:, 2])\ntrue\n\njulia> getobs(x, [1, 3]) == (a = [1, 3], b = x.b[:, [1, 3]])\ntrue\n\njulia> x = Dict(:a => [1, 2, 3], :b => rand(6, 3));\n\njulia> getobs(x, 2) == Dict(:a => 2, :b => x[:b][:, 2])\ntrue\n\njulia> getobs(x, [1, 3]) == Dict(:a => [1, 3], :b => x[:b][:, [1, 3]])\ntrue\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.getobs!","page":"API","title":"MLUtils.getobs!","text":"getobs!(buffer, data, idx)\n\nInplace version of getobs(data, idx). If this method is defined for the type of data, then buffer should be used to store the result, instead of allocating a dedicated object.\n\nImplementing this function is optional. In the case no such method is provided for the type of data, then buffer will be ignored and the result of getobs returned. This could be because the type of data may not lend itself to the concept of copy!. Thus, supporting a custom getobs! is optional and not required.\n\nCustom implementations of getobs! should be consistent with getobs in terms of the output format, that is getobs!(buffer, data, idx) == getobs(data, idx).\n\nSee also getobs and numobs. \n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.numobs","page":"API","title":"MLUtils.numobs","text":"numobs(data)\n\nReturn the total number of observations contained in data.\n\nIf data does not have numobs defined,  then in the case of Tables.table(data) == true returns the number of rows, otherwise returns length(data).\n\nAuthors of custom data containers should implement Base.length for their type instead of numobs. numobs should only be implemented for types where there is a difference between numobs and Base.length (such as multi-dimensional arrays).\n\ngetobs supports by default nested combinations of array, tuple, named tuples, and dictionaries. \n\nSee also getobs.\n\nExamples\n\njulia> x = (a = [1, 2, 3], b = ones(6, 3)); # named tuples\n\njulia> numobs(x)\n3\n\njulia> x = Dict(:a => [1, 2, 3], :b => ones(6, 3)); # dictionaries\n\njulia> numobs(x) \n3\n\nAll internal containers must have the same number of observations:\n\njulia> x = (a = [1, 2, 3, 4], b = ones(6, 3));\n\njulia> numobs(x)\nERROR: DimensionMismatch: All data containers must have the same number of observations.\nStacktrace:\n [1] _check_numobs_error()\n   @ MLUtils ~/.julia/dev/MLUtils/src/observation.jl:163\n [2] _check_numobs\n   @ ~/.julia/dev/MLUtils/src/observation.jl:130 [inlined]\n [3] numobs(data::NamedTuple{(:a, :b), Tuple{Vector{Int64}, Matrix{Float64}}})\n   @ MLUtils ~/.julia/dev/MLUtils/src/observation.jl:177\n [4] top-level scope\n   @ REPL[35]:1\n\n\n\n\n\n","category":"function"},{"location":"api/#Lazy-Transforms","page":"API","title":"Lazy Transforms","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"filterobs\ngroupobs\njoinobs\nmapobs\nshuffleobs","category":"page"},{"location":"api/#MLUtils.filterobs","page":"API","title":"MLUtils.filterobs","text":"filterobs(f, data)\n\nReturn a subset of data container data including all indices i for which f(getobs(data, i)) === true.\n\ndata = 1:10\nnumobs(data) == 10\nfdata = filterobs(>(5), data)\nnumobs(fdata) == 5\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.groupobs","page":"API","title":"MLUtils.groupobs","text":"groupobs(f, data)\n\nSplit data container data data into different data containers, grouping observations by f(obs).\n\ndata = -10:10\ndatas = groupobs(>(0), data)\nlength(datas) == 2\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.joinobs","page":"API","title":"MLUtils.joinobs","text":"joinobs(datas...)\n\nConcatenate data containers datas.\n\ndata1, data2 = 1:10, 11:20\njdata = joinumobs(data1, data2)\ngetobs(jdata, 15) == 15\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.mapobs","page":"API","title":"MLUtils.mapobs","text":"mapobs(f, data; batched=:auto)\n\nLazily map f over the observations in a data container data. Returns a new data container mdata that can be indexed and has a length. Indexing triggers the transformation f.\n\nThe batched keyword argument controls the behavior of mdata[idx] and mdata[idxs]  where idx is an integer and idxs is a vector of integers:\n\nbatched=:auto (default). Let f handle the two cases.   Calls f(getobs(data, idx)) and f(getobs(data, idxs)).\nbatched=:never. The function f is always called on a single observation.   Calls f(getobs(data, idx)) and [f(getobs(data, idx)) for idx in idxs].\nbatched=:always. The function f is always called on a batch of observations.   Calls getobs(f(getobs(data, [idx])), 1) and f(getobs(data, idxs)).\n\nExamples\n\njulia> data = (a=[1,2,3], b=[1,2,3]);\n\njulia> mdata = mapobs(data) do x\n         (c = x.a .+ x.b,  d = x.a .- x.b)\n       end\nmapobs(#25, (a = [1, 2, 3], b = [1, 2, 3]); batched=:auto))\n\njulia> mdata[1]\n(c = 2, d = 0)\n\njulia> mdata[1:2]\n(c = [2, 4], d = [0, 0])\n\n\n\n\n\nmapobs(fs, data)\n\nLazily map each function in tuple fs over the observations in data container data. Returns a tuple of transformed data containers.\n\n\n\n\n\nmapobs(namedfs::NamedTuple, data)\n\nMap a NamedTuple of functions over data, turning it into a data container of NamedTuples. Field syntax can be used to select a column of the resulting data container.\n\ndata = 1:10\nnameddata = mapobs((x = sqrt, y = log), data)\ngetobs(nameddata, 10) == (x = sqrt(10), y = log(10))\ngetobs(nameddata.x, 10) == sqrt(10)\n\n\n\n\n\nmapobs(f, d::DataLoader)\n\nReturn a new dataloader based on d  that applies f at each iteration. \n\nExamples\n\njulia> X = ones(3, 6);\n\njulia> function f(x)\n           @show x\n           return x\n       end\nf (generic function with 1 method)\n\njulia> d = DataLoader(X, batchsize=2, collate=false);\n\njulia> d = mapobs(f, d);\n\njulia> for x in d\n           @assert size(x) == (2,)\n           @assert size(x[1]) == (3,)\n       end\nx = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]\nx = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]\nx = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]\n\njulia> d2 = DataLoader(X, batchsize=2, collate=true);\n\njulia> d2 = mapobs(f, d2);\n\njulia> for x in d2\n           @assert size(x) == (3, 2)\n       end\nx = [1.0 1.0; 1.0 1.0; 1.0 1.0]\nx = [1.0 1.0; 1.0 1.0; 1.0 1.0]\nx = [1.0 1.0; 1.0 1.0; 1.0 1.0]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.shuffleobs","page":"API","title":"MLUtils.shuffleobs","text":"shuffleobs([rng], data)\n\nReturn a version of the dataset data that contains all the origin observations in a random reordering.\n\nThe values of data itself are not copied. Instead only the indices are shuffled. This function calls obsview to accomplish that, which means that the return value is likely of a different type than data.\n\nOptionally, a random number generator rng can be passed as the first argument. \n\nFor this function to work, the type of data must implement numobs and getobs. \n\nSee also obsview.\n\nExamples\n\n# For Arrays the subset will be of type SubArray\n@assert typeof(shuffleobs(rand(4,10))) <: SubArray\n\n# Iterate through all observations in random order\nfor x in eachobs(shuffleobs(X))\n    ...\nend\n\n\n\n\n\n","category":"function"},{"location":"api/#Batching,-Iteration,-and-Views","page":"API","title":"Batching, Iteration, and Views","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"batch\nbatchsize\nbatchseq\nBatchView\neachobs\nDataLoader\nobsview\nObsView\nrandobs","category":"page"},{"location":"api/#MLUtils.batch","page":"API","title":"MLUtils.batch","text":"batch(xs)\n\nBatch the arrays in xs into a single array with  an extra dimension.\n\nIf the elements of xs are tuples, named tuples, or dicts,  the output will be of the same type. \n\nSee also unbatch.\n\nExamples\n\njulia> batch([[1,2,3], \n              [4,5,6]])\n3×2 Matrix{Int64}:\n 1  4\n 2  5\n 3  6\n\njulia> batch([(a=[1,2], b=[3,4])\n               (a=[5,6], b=[7,8])]) \n(a = [1 5; 2 6], b = [3 7; 4 8])\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.batchsize","page":"API","title":"MLUtils.batchsize","text":"batchsize(data::BatchView) -> Int\n\nReturn the fixed size of each batch in data.\n\nExamples\n\nusing MLUtils\nX, Y = MLUtils.load_iris()\n\nA = BatchView(X, batchsize=30)\n@assert batchsize(A) == 30\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.batchseq","page":"API","title":"MLUtils.batchseq","text":"batchseq(seqs, val = 0)\n\nTake a list of N sequences, and turn them into a single sequence where each item is a batch of N. Short sequences will be padded by val.\n\nExamples\n\njulia> batchseq([[1, 2, 3], [4, 5]], 0)\n3-element Vector{Vector{Int64}}:\n [1, 4]\n [2, 5]\n [3, 0]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.BatchView","page":"API","title":"MLUtils.BatchView","text":"BatchView(data, batchsize; partial=true, collate=nothing)\nBatchView(data; batchsize=1, partial=true, collate=nothing)\n\nCreate a view of the given data that represents it as a vector of batches. Each batch will contain an equal amount of observations in them. The batch-size can be specified using the  parameter batchsize. In the case that the size of the dataset is not dividable by the specified batchsize, the remaining observations will be ignored if partial=false. If  partial=true instead the last batch-size can be slightly smaller.\n\nIf used as an iterator, the object will iterate over the dataset once, effectively denoting an epoch. \n\nAny data access is delayed until iteration or indexing is perfomed.  The getobs function is called on the data object to retrieve the observations.\n\nFor BatchView to work on some data structure, the type of the given variable data must implement the data container interface. See ObsView for more info.\n\nArguments\n\ndata : The object describing the dataset. Can be of any   type as long as it implements getobs and   numobs (see Details for more information).\nbatchsize : The batch-size of each batch.   It is the number of observations that each batch must contain   (except possibly for the last one).\npartial : If partial=false and the number of observations is   not divisible by the batch-size, then the last mini-batch is dropped.\ncollate: Defines the batching behavior. \nIf nothing (default), a batch is getobs(data, indices). \nIf false, each batch is [getobs(data, i) for i in indices]. \nIf true, applies MLUtils to the vector of observations in a batch,  recursively collating arrays in the last dimensions. See MLUtils.batch for more information and examples.\nIf a custom function, it will be used in place of MLUtils.batch. It should take a vector of observations as input.\n\nSe also DataLoader.\n\nExamples\n\njulia> using MLUtils\n\njulia> X, Y = MLUtils.load_iris();\n\njulia> A = BatchView(X, batchsize=30);\n\njulia> @assert eltype(A) <: Matrix{Float64}\n\njulia> @assert length(A) == 5 # Iris has 150 observations\n\njulia> @assert size(A[1]) == (4,30) # Iris has 4 features\n\njulia> for x in BatchView(X, batchsize=30)\n           # 5 batches of size 30 observations\n           @assert size(x) == (4, 30)\n           @assert numobs(x) === 30\n       end\n\njulia> for (x, y) in BatchView((X, Y), batchsize=20, partial=true)\n           # 7 batches of size 20 observations + 1 batch of 10 observations\n           @assert typeof(x) <: Matrix{Float64}\n           @assert typeof(y) <: Vector{String}\n       end\n\njulia> for batch in BatchView((X, Y), batchsize=20, partial=false, collate=false)\n           # 7 batches of size 20 observations\n           @assert length(batch) == 20\n           x1, y1 = batch[1]\n       end\n\njulia> function collate_fn(batch)\n           # collate observations into a custom batch\n           return hcat([x[1] for x in batch]...), join([x[2] for x in batch])\n        end;\n\njulia> for (x, y) in BatchView((rand(10, 4), [\"a\", \"b\", \"c\", \"d\"]), batchsize=2, collate=collate_fn)\n           @assert size(x) == (10, 2)\n           @assert y isa String\n       end\n\n\n\n\n\n","category":"type"},{"location":"api/#MLUtils.eachobs","page":"API","title":"MLUtils.eachobs","text":"eachobs(data; kws...)\n\nReturn an iterator over data.\n\nSupports the same arguments as DataLoader. The batchsize default is -1 here while it is 1 for DataLoader.\n\nExamples\n\nX = rand(4,100)\n\nfor x in eachobs(X)\n    # loop entered 100 times\n    @assert typeof(x) <: Vector{Float64}\n    @assert size(x) == (4,)\nend\n\n# mini-batch iterations\nfor x in eachobs(X, batchsize=10)\n    # loop entered 10 times\n    @assert typeof(x) <: Matrix{Float64}\n    @assert size(x) == (4,10)\nend\n\n# support for tuples, named tuples, dicts\nfor (x, y) in eachobs((X, Y))\n    # ...\nend\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.DataLoader","page":"API","title":"MLUtils.DataLoader","text":"DataLoader(data; [batchsize, buffer, collate, parallel, partial, rng, shuffle])\n\nAn object that iterates over mini-batches of data, each mini-batch containing batchsize observations (except possibly the last one).\n\nTakes as input a single data array, a tuple (or a named tuple) of arrays, or in general any data object that implements the numobs and getobs methods.\n\nThe last dimension in each array is the observation dimension, i.e. the one divided into mini-batches.\n\nThe original data is preserved in the data field of the DataLoader.\n\nArguments\n\ndata: The data to be iterated over. The data type has to be supported by numobs and getobs.\nbatchsize: If less than 0, iterates over individual observations. Otherwise, each iteration (except possibly the last) yields a mini-batch containing batchsize observations. Default 1.\nbuffer: If buffer=true and supported by the type of data, a buffer will be allocated and reused for memory efficiency. May want to set partial=false to avoid size mismatch.  Finally, can pass an external buffer to be used in getobs! (depending on the collate and batchsize options, could be getobs!(buffer, data, idxs) or getobs!(buffer[i], data, idx)). Default false. \ncollate: Defines the batching behavior. Default nothing. \nIf nothing , a batch is getobs(data, indices). \nIf false, each batch is [getobs(data, i) for i in indices]. \nIf true, applies MLUtils.batch to the vector of observations in a batch,  recursively collating arrays in the last dimensions. See MLUtils.batch for more information and examples.\nIf a custom function, it will be used in place of MLUtils.batch. It should take a vector of observations as input.\nparallel: Whether to use load data in parallel using worker threads. Greatly   speeds up data loading by factor of available threads. Requires starting   Julia with multiple threads. Check Threads.nthreads() to see the number of   available threads. Passing parallel = true breaks ordering guarantees.   Default false.\npartial: This argument is used only when batchsize > 0. If partial=false and the number of observations is not divisible by the batchsize, then the last mini-batch is dropped. Default true.\nrng: A random number generator. Default Random.default_rng().\n`shuffle: Whether to shuffle the observations before iterating. Unlike   wrapping the data container with shuffleobs(data), shuffle=true ensures   that the observations are shuffled anew every time you start iterating over   eachobs. Default false.\n\nExamples\n\njulia> Xtrain = rand(10, 100);\n\njulia> array_loader = DataLoader(Xtrain, batchsize=2);\n\njulia> for x in array_loader\n         @assert size(x) == (10, 2)\n         # do something with x, 50 times\n       end\n\njulia> array_loader.data === Xtrain\ntrue\n\njulia> tuple_loader = DataLoader((Xtrain,), batchsize=2);  # similar, but yielding 1-element tuples\n\njulia> for x in tuple_loader\n         @assert x isa Tuple{Matrix}\n         @assert size(x[1]) == (10, 2)\n       end\n\njulia> Ytrain = rand('a':'z', 100);  # now make a DataLoader yielding 2-element named tuples\n\njulia> train_loader = DataLoader((data=Xtrain, label=Ytrain), batchsize=5, shuffle=true);\n\njulia> for epoch in 1:100\n         for (x, y) in train_loader  # access via tuple destructuring\n           @assert size(x) == (10, 5)\n           @assert size(y) == (5,)\n           # loss += f(x, y) # etc, runs 100 * 20 times\n         end\n       end\n\njulia> first(train_loader).label isa Vector{Char}  # access via property name\ntrue\n\njulia> first(train_loader).label == Ytrain[1:5]  # because of shuffle=true\nfalse\n\njulia> foreach(println∘summary, DataLoader(rand(Int8, 10, 64), batchsize=30))  # partial=false would omit last\n10×30 Matrix{Int8}\n10×30 Matrix{Int8}\n10×4 Matrix{Int8}\n\njulia> collate_fn(batch) = join(batch);\n\njulia> first(DataLoader([\"a\", \"b\", \"c\", \"d\"], batchsize=2, collate=collate_fn))\n\"ab\"\n\n\n\n\n\n","category":"type"},{"location":"api/#MLUtils.obsview","page":"API","title":"MLUtils.obsview","text":"obsview(data, [indices])\n\nReturns a lazy view of the observations in data that correspond to the given indices. No data will be copied except of the indices. It is similar to constructing an ObsView,  but returns a SubArray if the type of data is Array or SubArray. Furthermore, this function may be extended for custom types of data that also want to provide their own subset-type.\n\nIn case data is a tuple, the constructor will be mapped over its elements. That means that the constructor returns a tuple of ObsView instead of a ObsView of tuples.\n\nIf instead you want to get the subset of observations corresponding to the given indices in their native type, use getobs.\n\nSee ObsView for more information.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.ObsView","page":"API","title":"MLUtils.ObsView","text":"ObsView(data, [indices])\n\nUsed to represent a subset of some data of arbitrary type by storing which observation-indices the subset spans. Furthermore, subsequent subsettings are accumulated without needing to access actual data.\n\nThe main purpose for the existence of ObsView is to delay data access and movement until an actual batch of data (or single observation) is needed for some computation. This is particularily useful when the data is not located in memory, but on the hard drive or some remote location. In such a scenario one wants to load the required data only when needed.\n\nAny data access is delayed until getindex is called,  and even getindex returns the result of obsview which in general avoids data movement until getobs is called. If used as an iterator, the view will iterate over the dataset once, effectively denoting an epoch. Each iteration will return a lazy subset to the current observation.\n\nArguments\n\ndata : The object describing the dataset. Can be of any   type as long as it implements getobs and   numobs (see Details for more information).\nindices : Optional. The index or indices of the   observation(s) in data that the subset should represent.   Can be of type Int or some subtype of AbstractVector.\n\nMethods\n\ngetindex : Returns the observation(s) of the given   index/indices. No data is copied aside   from the required indices.\nnumobs : Returns the total number observations in the subset.\ngetobs : Returns the underlying data that the   ObsView represents at the given relative indices. Note   that these indices are in \"subset space\", and in general will   not directly correspond to the same indices in the underlying   data set.\n\nDetails\n\nFor ObsView to work on some data structure, the desired type MyType must implement the following interface:\n\ngetobs(data::MyType, idx) :   Should return the observation(s) indexed by idx.   In what form is up to the user.   Note that idx can be of type Int or AbstractVector.\nnumobs(data::MyType) :   Should return the total number of observations in data\n\nThe following methods can also be provided and are optional:\n\ngetobs(data::MyType) :   By default this function is the identity function.   If that is not the behaviour that you want for your type,   you need to provide this method as well.\nobsview(data::MyType, idx) :   If your custom type has its own kind of subset type, you can   return it here. An example for such a case are SubArray for   representing a subset of some AbstractArray.\ngetobs!(buffer, data::MyType, [idx]) :   Inplace version of getobs(data, idx). If this method   is provided for MyType, then eachobs can preallocate a buffer that is then reused   every iteration. Note: buffer should be equivalent to the   return value of getobs(::MyType, ...), since this is how   buffer is preallocated by default.\n\nExamples\n\nX, Y = MLUtils.load_iris()\n\n# The iris set has 150 observations and 4 features\n@assert size(X) == (4,150)\n\n# Represents the 80 observations as a ObsView\nv = ObsView(X, 21:100)\n@assert numobs(v) == 80\n@assert typeof(v) <: ObsView\n# getobs indexes into v\n@assert getobs(v, 1:10) == X[:, 21:30]\n\n# Use `obsview` to avoid boxing into ObsView\n# for types that provide a custom \"subset\", such as arrays.\n# Here it instead creates a native SubArray.\nv = obsview(X, 1:100)\n@assert numobs(v) == 100\n@assert typeof(v) <: SubArray\n\n# Also works for tuples of arbitrary length\nsubset = obsview((X, Y), 1:100)\n@assert numobs(subset) == 100\n@assert typeof(subset) <: Tuple # tuple of SubArray\n\n# Use as iterator\nfor x in ObsView(X)\n    @assert typeof(x) <: SubArray{Float64,1}\nend\n\n# iterate over each individual labeled observation\nfor (x, y) in ObsView((X, Y))\n    @assert typeof(x) <: SubArray{Float64,1}\n    @assert typeof(y) <: String\nend\n\n# same but in random order\nfor (x, y) in ObsView(shuffleobs((X, Y)))\n    @assert typeof(x) <: SubArray{Float64,1}\n    @assert typeof(y) <: String\nend\n\n# Indexing: take first 10 observations\nx, y = ObsView((X, Y))[1:10]\n\nSee also\n\nobsview,  getobs, numobs, splitobs, shuffleobs, kfolds.\n\n\n\n\n\n","category":"type"},{"location":"api/#MLUtils.randobs","page":"API","title":"MLUtils.randobs","text":"randobs(data, [n])\n\nPick a random observation or a batch of n random observations from data. For this function to work, the type of data must implement numobs and getobs.\n\n\n\n\n\n","category":"function"},{"location":"api/#Partitioning","page":"API","title":"Partitioning","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"leavepout\nkfolds\nsplitobs","category":"page"},{"location":"api/#MLUtils.leavepout","page":"API","title":"MLUtils.leavepout","text":"leavepout(n::Integer, [size = 1]) -> Tuple\n\nCompute the train/validation assignments for k ≈ n/size repartitions of n observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. Each validation subset will have either size or size+1 observations assigned to it. The following code snippet generates the index-vectors for size = 2.\n\njulia> train_idx, val_idx = leavepout(10, 2);\n\nEach observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range 1:n. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.\n\njulia> train_idx\n5-element Array{Array{Int64,1},1}:\n [3,4,5,6,7,8,9,10]\n [1,2,5,6,7,8,9,10]\n [1,2,3,4,7,8,9,10]\n [1,2,3,4,5,6,9,10]\n [1,2,3,4,5,6,7,8]\n\njulia> val_idx\n5-element Array{UnitRange{Int64},1}:\n 1:2\n 3:4\n 5:6\n 7:8\n 9:10\n\n\n\n\n\nleavepout(data, p = 1)\n\nRepartition a data container using a k-fold strategy, where k is chosen in such a way, that each validation subset of the resulting folds contains roughly p observations. Defaults to p = 1, which is also known as \"leave-one-out\" partitioning.\n\nThe resulting sequence of folds is returned as a lazy iterator. Only data subsets are created. That means no actual data is copied until getobs is invoked.\n\nfor (train, val) in leavepout(X, p=2)\n    # if nobs(X) is dividable by 2,\n    # then numobs(val) will be 2 for each iteraton,\n    # otherwise it may be 3 for the first few iterations.\nend\n\nSeekfolds for a related function.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.kfolds","page":"API","title":"MLUtils.kfolds","text":"kfolds(n::Integer, k = 5) -> Tuple\n\nCompute the train/validation assignments for k repartitions of n observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. A general rule of thumb is to use either k = 5 or k = 10. The following code snippet generates the indices assignments for k = 5\n\njulia> train_idx, val_idx = kfolds(10, 5);\n\nEach observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range 1:n. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.\n\njulia> train_idx\n5-element Array{Array{Int64,1},1}:\n [3,4,5,6,7,8,9,10]\n [1,2,5,6,7,8,9,10]\n [1,2,3,4,7,8,9,10]\n [1,2,3,4,5,6,9,10]\n [1,2,3,4,5,6,7,8]\n\njulia> val_idx\n5-element Array{UnitRange{Int64},1}:\n 1:2\n 3:4\n 5:6\n 7:8\n 9:10\n\n\n\n\n\nkfolds(data, [k = 5])\n\nRepartition a data container k times using a k folds strategy and return the sequence of folds as a lazy iterator.  Only data subsets are created, which means that no actual data is copied until getobs is invoked.\n\nConceptually, a k-folds repartitioning strategy divides the given data into k roughly equal-sized parts. Each part will serve as validation set once, while the remaining parts are used for training. This results in k different partitions of data.\n\nIn the case that the size of the dataset is not dividable by the specified k, the remaining observations will be evenly distributed among the parts.\n\nfor (x_train, x_val) in kfolds(X, k=10)\n    # code called 10 times\n    # nobs(x_val) may differ up to ±1 over iterations\nend\n\nMultiple variables are supported (e.g. for labeled data)\n\nfor ((x_train, y_train), val) in kfolds((X, Y), k=10)\n    # ...\nend\n\nBy default the folds are created using static splits. Use shuffleobs to randomly assign observations to the folds.\n\nfor (x_train, x_val) in kfolds(shuffleobs(X), k = 10)\n    # ...\nend\n\nSee leavepout for a related function.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.splitobs","page":"API","title":"MLUtils.splitobs","text":"splitobs(n::Int; at) -> Tuple\n\nCompute the indices for two or more disjoint subsets of the range 1:n with split sizes determined by at.\n\nExamples\n\njulia> splitobs(100, at=0.7)\n(1:70, 71:100)\n\njulia> splitobs(100, at=(0.1, 0.4))\n(1:10, 11:50, 51:100)\n\n\n\n\n\nsplitobs([rng,] data; at, shuffle=false, stratified=nothing) -> Tuple\n\nPartition the data into two or more subsets.\n\nThe argument at specifies how to split the data:\n\nWhen at is a number between 0 and 1, this specifies the proportion in the first subset.\nWhen at is an integer, it specifies the number of observations in the first subset.\nWhen at is a tuple, entries specifies the number or proportion in each subset, except\n\nfor the last which will contain the remaning observations.  The number of returned subsets is length(at)+1.\n\nIf shuffle=true, randomly permute the observations before splitting. A random number generator rng can be optionally passed as the first argument.\n\nIf stratified is not nothing, it should be an array of labels with the same length as the data. The observations will be split in such a way that the proportion of each label is preserved in each subset.\n\nSupports any datatype implementing numobs. \n\nIt relies on obsview to create views of the data.\n\nExamples\n\njulia> splitobs(reshape(1:100, 1, :); at=0.7)  # simple 70%-30% split, of a matrix\n([1 2 … 69 70], [71 72 … 99 100])\n\njulia> data = (x=ones(2,10), n=1:10)  # a NamedTuple, consistent last dimension\n(x = [1.0 1.0 … 1.0 1.0; 1.0 1.0 … 1.0 1.0], n = 1:10)\n\njulia> splitobs(data, at=(0.5, 0.3))  # a 50%-30%-20% split, e.g. train/test/validation\n((x = [1.0 1.0 … 1.0 1.0; 1.0 1.0 … 1.0 1.0], n = 1:5), (x = [1.0 1.0 1.0; 1.0 1.0 1.0], n = 6:8), (x = [1.0 1.0; 1.0 1.0], n = 9:10))\n\njulia> train, test = splitobs((reshape(1.0:100.0, 1, :), 101:200), at=0.7, shuffle=true);  # split a Tuple\n\njulia> vec(test[1]) .+ 100 == test[2]\ntrue\n\njulia> splitobs(1:10, at=0.5, stratified=[0,0,0,0,1,1,1,1,1,1]) # 2 zeros and 3 ones in each subset\n([1, 2, 5, 6, 7], [3, 4, 8, 9, 10])\n\n\n\n\n\n","category":"function"},{"location":"api/#Array-Constructors","page":"API","title":"Array Constructors","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"falses_like\nfill_like\nones_like\nrand_like\nrandn_like\ntrues_like\nzeros_like","category":"page"},{"location":"api/#MLUtils.falses_like","page":"API","title":"MLUtils.falses_like","text":"falses_like(x, [dims=size(x)])\n\nEquivalent to fill_like(x, false, Bool, dims).\n\nSee also [fill_like] and trues_like.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.fill_like","page":"API","title":"MLUtils.fill_like","text":"fill_like(x, val, [element_type=eltype(x)], [dims=size(x)]))\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to val.  The third and fourth arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nSee also zeros_like and ones_like.\n\nExamples\n\njulia> x = rand(Float32, 2)\n2-element Vector{Float32}:\n 0.16087806\n 0.89916044\n\njulia> fill_like(x, 1.7, (3, 3))\n3×3 Matrix{Float32}:\n 1.7  1.7  1.7\n 1.7  1.7  1.7\n 1.7  1.7  1.7\n\njulia> using CUDA\n\njulia> x = CUDA.rand(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 0.803167  0.476101\n 0.303041  0.317581\n\njulia> fill_like(x, 1.7, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 1.7  1.7\n 1.7  1.7\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.ones_like","page":"API","title":"MLUtils.ones_like","text":"ones_like(x, [element_type=eltype(x)], [dims=size(x)]))\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to 1.  The second and third arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nSee also zeros_like and fill_like.\n\nExamples\n\njulia> x = rand(Float32, 2)\n2-element Vector{Float32}:\n 0.8621633\n 0.5158395\n\njulia> ones_like(x, (3, 3))\n3×3 Matrix{Float32}:\n 1.0  1.0  1.0\n 1.0  1.0  1.0\n 1.0  1.0  1.0\n\njulia> using CUDA\n\njulia> x = CUDA.rand(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 0.82297   0.656143\n 0.701828  0.391335\n\njulia> ones_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 1.0  1.0\n 1.0  1.0\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.rand_like","page":"API","title":"MLUtils.rand_like","text":"rand_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to a random value. The last two arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nThe default random number generator is used, unless a custom one is passed in explicitly as the first argument.\n\nSee also Base.rand and randn_like.\n\nExamples\n\njulia> x = ones(Float32, 2)\n2-element Vector{Float32}:\n 1.0\n 1.0\n\njulia> rand_like(x, (3, 3))\n3×3 Matrix{Float32}:\n 0.780032  0.920552  0.53689\n 0.121451  0.741334  0.5449\n 0.55348   0.138136  0.556404\n\njulia> using CUDA\n\njulia> CUDA.ones(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 1.0  1.0\n 1.0  1.0\n\njulia> rand_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 0.429274  0.135379\n 0.718895  0.0098756\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.randn_like","page":"API","title":"MLUtils.randn_like","text":"randn_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to a random value drawn from a normal distribution. The last two arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nThe default random number generator is used, unless a custom one is passed in explicitly as the first argument.\n\nSee also Base.randn and rand_like.\n\nExamples\n\njulia> x = ones(Float32, 2)\n2-element Vector{Float32}:\n 1.0\n 1.0\n\njulia> randn_like(x, (3, 3))\n3×3 Matrix{Float32}:\n -0.385331    0.956231   0.0745102\n  1.43756    -0.967328   2.06311\n  0.0482372   1.78728   -0.902547\n\njulia> using CUDA\n\njulia> CUDA.ones(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 1.0  1.0\n 1.0  1.0\n\njulia> randn_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n -0.578527   0.823445\n -1.01338   -0.612053\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.trues_like","page":"API","title":"MLUtils.trues_like","text":"trues_like(x, [dims=size(x)])\n\nEquivalent to fill_like(x, true, Bool, dims).\n\nSee also [fill_like] and falses_like.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.zeros_like","page":"API","title":"MLUtils.zeros_like","text":"zeros_like(x, [element_type=eltype(x)], [dims=size(x)]))\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to 0.  The second and third arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nSee also ones_like and fill_like.\n\nExamples\n\njulia> x = rand(Float32, 2)\n2-element Vector{Float32}:\n 0.4005432\n 0.36934233\n\njulia> zeros_like(x, (3, 3))\n3×3 Matrix{Float32}:\n 0.0  0.0  0.0\n 0.0  0.0  0.0\n 0.0  0.0  0.0\n\njulia> using CUDA\n\njulia> x = CUDA.rand(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 0.0695155  0.667979\n 0.558468   0.59903\n\njulia> zeros_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 0.0  0.0\n 0.0  0.0\n\n\n\n\n\n","category":"function"},{"location":"api/#Resampling","page":"API","title":"Resampling","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"oversample\nundersample","category":"page"},{"location":"api/#MLUtils.oversample","page":"API","title":"MLUtils.oversample","text":"oversample([rng], data, classes; fraction=1, shuffle=true)\noversample([rng], data::Tuple; fraction=1, shuffle=true)\n\nGenerate a re-balanced version of data by repeatedly sampling existing observations in such a way that every class will have at least fraction times the number observations of the largest class in classes. This way, all classes will have a minimum number of observations in the resulting data set relative to what largest class has in the given (original) data.\n\nAs an example, by default (i.e. with fraction = 1) the resulting dataset will be near perfectly balanced. On the other hand, with fraction = 0.5 every class in the resulting data with have at least 50% as many observations as the largest class.\n\nThe classes input is an array with the same length as numobs(data).  \n\nThe convenience parameter shuffle determines if the resulting data will be shuffled after its creation; if it is not shuffled then all the repeated samples will be together at the end, sorted by class. Defaults to true.\n\nThe random number generator rng can be optionally passed as the first argument. \n\nThe output will contain both the resampled data and classes.\n\n# 6 observations with 3 features each\nX = rand(3, 6)\n# 2 classes, severely imbalanced\nY = [\"a\", \"b\", \"b\", \"b\", \"b\", \"a\"]\n\n# oversample the class \"a\" to match \"b\"\nX_bal, Y_bal = oversample(X, Y)\n\n# this results in a bigger dataset with repeated data\n@assert size(X_bal) == (3,8)\n@assert length(Y_bal) == 8\n\n# now both \"a\", and \"b\" have 4 observations each\n@assert sum(Y_bal .== \"a\") == 4\n@assert sum(Y_bal .== \"b\") == 4\n\nFor this function to work, the type of data must implement numobs and getobs. \n\nIf data is a tuple and classes is not given,  then it will be assumed that the last element of the tuple contains the classes.\n\njulia> data = DataFrame(X1=rand(6), X2=rand(6), Y=[:a,:b,:b,:b,:b,:a])\n6×3 DataFrames.DataFrame\n│ Row │ X1        │ X2          │ Y │\n├─────┼───────────┼─────────────┼───┤\n│ 1   │ 0.226582  │ 0.0443222   │ a │\n│ 2   │ 0.504629  │ 0.722906    │ b │\n│ 3   │ 0.933372  │ 0.812814    │ b │\n│ 4   │ 0.522172  │ 0.245457    │ b │\n│ 5   │ 0.505208  │ 0.11202     │ b │\n│ 6   │ 0.0997825 │ 0.000341996 │ a │\n\njulia> getobs(oversample(data, data.Y))\n8×3 DataFrame\n Row │ X1        X2         Y      \n     │ Float64   Float64    Symbol \n─────┼─────────────────────────────\n   1 │ 0.376304  0.100022   a\n   2 │ 0.467095  0.185437   b\n   3 │ 0.481957  0.319906   b\n   4 │ 0.336762  0.390811   b\n   5 │ 0.376304  0.100022   a\n   6 │ 0.427064  0.0648339  a\n   7 │ 0.427064  0.0648339  a\n   8 │ 0.457043  0.490688   b\n\nSee ObsView for more information on data subsets. See also undersample.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.undersample","page":"API","title":"MLUtils.undersample","text":"undersample([rng], data, classes; shuffle=true)\nundersample([rng], data::Tuple; shuffle=true)\n\nGenerate a class-balanced version of data by subsampling its observations in such a way that the resulting number of observations will be the same number for every class. This way, all classes will have as many observations in the resulting data set as the smallest class has in the given (original) data.\n\nThe convenience parameter shuffle determines if the resulting data will be shuffled after its creation; if it is not shuffled then all the observations will be in their original order. Defaults to false.\n\nIf data is a tuple and classes is not given,  then it will be assumed that the last element of the tuple contains the classes.\n\nThe output will contain both the resampled data and classes.\n\n# 6 observations with 3 features each\nX = rand(3, 6)\n# 2 classes, severely imbalanced\nY = [\"a\", \"b\", \"b\", \"b\", \"b\", \"a\"]\n\n# subsample the class \"b\" to match \"a\"\nX_bal, Y_bal = undersample(X, Y)\n\n# this results in a smaller dataset\n@assert size(X_bal) == (3,4)\n@assert length(Y_bal) == 4\n\n# now both \"a\", and \"b\" have 2 observations each\n@assert sum(Y_bal .== \"a\") == 2\n@assert sum(Y_bal .== \"b\") == 2\n\nFor this function to work, the type of data must implement numobs and getobs. \n\nNote that if data is a tuple, then it will be assumed that the last element of the tuple contains the targets.\n\njulia> data = DataFrame(X1=rand(6), X2=rand(6), Y=[:a,:b,:b,:b,:b,:a])\n6×3 DataFrames.DataFrame\n│ Row │ X1        │ X2          │ Y │\n├─────┼───────────┼─────────────┼───┤\n│ 1   │ 0.226582  │ 0.0443222   │ a │\n│ 2   │ 0.504629  │ 0.722906    │ b │\n│ 3   │ 0.933372  │ 0.812814    │ b │\n│ 4   │ 0.522172  │ 0.245457    │ b │\n│ 5   │ 0.505208  │ 0.11202     │ b │\n│ 6   │ 0.0997825 │ 0.000341996 │ a │\n\njulia> getobs(undersample(data, data.Y))\n4×3 DataFrame\n Row │ X1        X2         Y      \n     │ Float64   Float64    Symbol \n─────┼─────────────────────────────\n   1 │ 0.427064  0.0648339  a\n   2 │ 0.376304  0.100022   a\n   3 │ 0.467095  0.185437   b\n   4 │ 0.457043  0.490688   b\n\nSee ObsView for more information on data subsets. See also oversample.\n\n\n\n\n\n","category":"function"},{"location":"api/#Operations","page":"API","title":"Operations","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"chunk\nflatten\ngroup_counts\ngroup_indices\nnormalise\nrpad_constant\nunbatch\nunsqueeze\nunstack","category":"page"},{"location":"api/#MLUtils.chunk","page":"API","title":"MLUtils.chunk","text":"chunk(x, n; [dims])\nchunk(x; [size, dims])\n\nSplit x into n parts or alternatively, if size is an integer, into equal chunks of size size.  The parts contain the same number of elements except possibly for the last one that can be smaller.\n\nIn case size is a collection of integers instead, the elements of x are split into chunks of the given sizes.\n\nIf x is an array, dims can be used to specify along which dimension to  split (defaults to the last dimension).\n\nExamples\n\njulia> chunk(1:10, 3)\n3-element Vector{UnitRange{Int64}}:\n 1:4\n 5:8\n 9:10\n\njulia> chunk(1:10; size = 2)\n5-element Vector{UnitRange{Int64}}:\n 1:2\n 3:4\n 5:6\n 7:8\n 9:10\n\njulia> x = reshape(collect(1:20), (5, 4))\n5×4 Matrix{Int64}:\n 1   6  11  16\n 2   7  12  17\n 3   8  13  18\n 4   9  14  19\n 5  10  15  20\n\njulia> xs = chunk(x, 2, dims=1)\n2-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}}:\n [1 6 11 16; 2 7 12 17; 3 8 13 18]\n [4 9 14 19; 5 10 15 20]\n\njulia> xs[1]\n3×4 view(::Matrix{Int64}, 1:3, :) with eltype Int64:\n 1  6  11  16\n 2  7  12  17\n 3  8  13  18\n\njulia> xes = chunk(x; size = 2, dims = 2)\n2-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}}:\n [1 6; 2 7; … ; 4 9; 5 10]\n [11 16; 12 17; … ; 14 19; 15 20]\n\njulia> xes[2]\n5×2 view(::Matrix{Int64}, :, 3:4) with eltype Int64:\n 11  16\n 12  17\n 13  18\n 14  19\n 15  20\n\njulia> chunk(1:6; size = [2, 4])\n2-element Vector{UnitRange{Int64}}:\n 1:2\n 3:6\n\n\n\n\n\nchunk(x, partition_idxs; [npartitions, dims])\n\nPartition the array x along the dimension dims according to the indexes  in partition_idxs.\n\npartition_idxs must be sorted and contain only positive integers  between 1 and the number of partitions. \n\nIf the number of partition npartitions is not provided,  it is inferred from partition_idxs.\n\nIf dims is not provided, it defaults to the last dimension.\n\nSee also unbatch.\n\nExamples\n\njulia> x = reshape([1:10;], 2, 5)\n2×5 Matrix{Int64}:\n 1  3  5  7   9\n 2  4  6  8  10\n\njulia> chunk(x, [1, 2, 2, 3, 3])\n3-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}}:\n [1; 2;;]\n [3 5; 4 6]\n [7 9; 8 10]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.flatten","page":"API","title":"MLUtils.flatten","text":"flatten(x::AbstractArray)\n\nReshape arbitrarly-shaped input into a matrix-shaped output, preserving the size of the last dimension.\n\nSee also unsqueeze.\n\nExamples\n\njulia> rand(3,4,5) |> flatten |> size\n(12, 5)\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.group_counts","page":"API","title":"MLUtils.group_counts","text":"group_counts(x)\n\nCount the number of times that each element of x appears.\n\nSee also group_indices\n\nExamples\n\njulia> group_counts(['a', 'b', 'b'])\nDict{Char, Int64} with 2 entries:\n  'a' => 1\n  'b' => 2\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.group_indices","page":"API","title":"MLUtils.group_indices","text":"group_indices(x) -> Dict\n\nComputes the indices of elements in the vector x for each distinct value contained.  This information is useful for resampling strategies, such as stratified sampling.\n\nSee also group_counts.\n\nExamples\n\njulia> x = [:yes, :no, :maybe, :yes];\n\njulia> group_indices(x)\nDict{Symbol, Vector{Int64}} with 3 entries:\n  :yes   => [1, 4]\n  :maybe => [3]\n  :no    => [2]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.normalise","page":"API","title":"MLUtils.normalise","text":"normalise(x; dims=ndims(x), ϵ=1e-5)\n\nNormalise the array x to mean 0 and standard deviation 1 across the dimension(s) given by dims. Per default, dims is the last dimension. \n\nϵ is a small additive factor added to the denominator for numerical stability.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.rpad_constant","page":"API","title":"MLUtils.rpad_constant","text":"rpad_constant(v::AbstractArray, n::Union{Integer, Tuple}, val = 0; dims=:)\n\nReturn the given sequence padded with val along the dimensions dims up to a maximum length in each direction specified by n.\n\nExamples\n\njulia> rpad_constant([1, 2], 4, -1) # passing with -1 up to size 4\n4-element Vector{Int64}:\n  1\n  2\n -1\n -1\n\njulia> rpad_constant([1, 2, 3], 2) # no padding if length is already greater than n\n3-element Vector{Int64}:\n 1\n 2\n 3\n\njulia> rpad_constant([1 2; 3 4], 4; dims=1) # padding along the first dimension\n4×2 Matrix{Int64}:\n 1  2\n 3  4\n 0  0\n 0  0\n\njulia> rpad_constant([1 2; 3 4], 4) # padding along all dimensions by default\n4×4 Matrix{Int64}:\n 1  2  0  0\n 3  4  0  0\n 0  0  0  0\n 0  0  0  0\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.unbatch","page":"API","title":"MLUtils.unbatch","text":"unbatch(x)\n\nReverse of the batch operation, unstacking the last dimension of the array x.\n\nSee also unstack and chunk.\n\nExamples\n\njulia> unbatch([1 3 5 7;\n                2 4 6 8])\n4-element Vector{Vector{Int64}}:\n [1, 2]\n [3, 4]\n [5, 6]\n [7, 8]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.unsqueeze","page":"API","title":"MLUtils.unsqueeze","text":"unsqueeze(x; dims)\n\nReturn x reshaped into an array one dimensionality higher than x, where dims indicates in which dimension x is extended. dims can be an integer between 1 and ndims(x)+1.\n\nSee also flatten, stack.\n\nExamples\n\njulia> unsqueeze([1 2; 3 4], dims=2)\n2×1×2 Array{Int64, 3}:\n[:, :, 1] =\n 1\n 3\n\n[:, :, 2] =\n 2\n 4\n\n\njulia> xs = [[1, 2], [3, 4], [5, 6]]\n3-element Vector{Vector{Int64}}:\n [1, 2]\n [3, 4]\n [5, 6]\n\njulia> unsqueeze(xs, dims=1)\n1×3 Matrix{Vector{Int64}}:\n [1, 2]  [3, 4]  [5, 6]\n\n\n\n\n\nunsqueeze(; dims)\n\nReturns a function which, acting on an array, inserts a dimension of size 1 at dims.\n\nExamples\n\njulia> rand(21, 22, 23) |> unsqueeze(dims=2) |> size\n(21, 1, 22, 23)\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.unstack","page":"API","title":"MLUtils.unstack","text":"unstack(xs; dims)\n\nUnroll the given xs into an array of arrays along the given dimension dims.\n\nIt is the inverse operation of stack.\n\nSee also unbatch and chunk.\n\nExamples\n\njulia> unstack([1 3 5 7; 2 4 6 8], dims=2)\n4-element Vector{Vector{Int64}}:\n [1, 2]\n [3, 4]\n [5, 6]\n [7, 8]\n\n\n\n\n\n","category":"function"},{"location":"api/#Datasets","page":"API","title":"Datasets","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"Datasets.load_iris\nDatasets.make_sin\nDatasets.make_spiral\nDatasets.make_poly\nDatasets.make_moons","category":"page"},{"location":"api/#MLUtils.Datasets.load_iris","page":"API","title":"MLUtils.Datasets.load_iris","text":"load_iris() -> X, y, names\n\nLoads the first 150 observations from the Iris flower data set introduced by Ronald Fisher (1936). The 4 by 150 matrix X contains the numeric measurements, in which each individual column denotes an observation. The vector y contains the class labels as strings. The vector names contains the names of the features (i.e. rows of X)\n\n[1] Fisher, Ronald A. \"The use of multiple measurements in taxonomic problems.\" Annals of eugenics 7.2 (1936): 179-188.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_sin","page":"API","title":"MLUtils.Datasets.make_sin","text":"make_sin(n, start, stop; noise = 0.3, f_rand = randn) -> x, y\n\nGenerates n noisy equally spaces samples of a sinus from start to stop by adding noise .* f_rand(length(x)) to the result of sin(x).\n\nReturns the vector x with the samples and the noisy response y.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_spiral","page":"API","title":"MLUtils.Datasets.make_spiral","text":"make_spiral(n, a, theta, b; noise = 0.01, f_rand = randn) -> x, y\n\nGenerates n noisy responses for a spiral with two labels. Uses the radius, angle and scaling arguments to space the points in 2D space and adding noise .* f_randn(n) to the response.\n\nReturns the 2 x n matrix x with the coordinates of the samples and the vector y with the labels.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_poly","page":"API","title":"MLUtils.Datasets.make_poly","text":"make_poly(coef, x; noise = 0.01, f_rand = randn) -> x, y\n\nGenerates a noisy response for a polynomial of degree length(coef) and with the coefficients given by coef. The response is generated by elmentwise computation of the polynome on the elements of x and adding noise .* f_randn(length(x)) to the result.\n\nThe vector coef contains the coefficients for the terms of the polynome. The first element of coef denotes the coefficient for the term with the highest degree, while the last element of coef denotes the intercept.\n\nReturn the input x and the noisy response y.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_moons","page":"API","title":"MLUtils.Datasets.make_moons","text":"make_moons(n; noise=0.0, f_rand=randn, shuffle=true) -> x, y\n\nGenerate a dataset with two interleaving half circles. \n\nIf n is an integer, the number of samples is n and the number of samples for each half circle is n ÷ 2. If n is a tuple, the first element of the tuple denotes the number of samples for the first half circle and the second element denotes the number of samples for the second half circle.\n\nThe noise level can be controlled by the noise argument.\n\nSet shuffle=false to keep the order of the samples.\n\nReturns a 2 x n matrix with the the samples. \n\n\n\n\n\n","category":"function"},{"location":"#MLUtils.jl","page":"Home","title":"MLUtils.jl","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"(Image: ) (Image: ) (Image: ) (Image: )","category":"page"},{"location":"","page":"Home","title":"Home","text":"MLUtils.jl defines interfaces and implements common utilities for Machine Learning pipelines.","category":"page"},{"location":"#Features","page":"Home","title":"Features","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"An extensible dataset interface  (numobs and getobs).\nData iteration and dataloaders (eachobs and DataLoader).\nLazy data views (obsview). \nResampling procedures (undersample and oversample).\nTrain/test splits (splitobs) \nData partitioning and aggregation tools (batch, unbatch, chunk, group_counts, group_indices).\nFolds for cross-validation (kfolds, leavepout).\nDatasets lazy tranformations (mapobs, filterobs, groupobs, joinobs, shuffleobs).\nToy datasets for demonstration purpose. \nOther data handling utilities (flatten, normalise, unsqueeze, stack, unstack).","category":"page"},{"location":"#Examples","page":"Home","title":"Examples","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Let us take a look at a hello world example to get a feeling for  how to use this package in a typical ML scenario. ","category":"page"},{"location":"","page":"Home","title":"Home","text":"using MLUtils\n\n# X is a matrix of floats\n# Y is a vector of strings\nX, Y = load_iris()\n\n# The iris dataset is ordered according to their labels,\n# which means that we should shuffle the dataset before\n# partitioning it into training- and test-set.\nXs, Ys = shuffleobs((X, Y))\n\n# We leave out 15 % of the data for testing\ncv_data, test_data = splitobs((Xs, Ys); at=0.85)\n\n# Next we partition the data using a 10-fold scheme.\nfor (train_data, val_data) in kfolds(cv_data; k=10)\n\n    # We apply a lazy transform for data augmentation\n    train_data = mapobs(xy -> (xy[1] .+ 0.1 .* randn.(), xy[2]),  train_data)\n\n    for epoch = 1:10\n        # Iterate over the data using mini-batches of 5 observations each\n        for (x, y) in eachobs(train_data, batchsize=5)\n            # ... train supervised model on minibatches here\n        end\n    end\nend","category":"page"},{"location":"","page":"Home","title":"Home","text":"In the above code snippet, the inner loop for eachobs is the only place where data other than indices is actually being copied. In fact, while x and y are materialized arrays,  all the rest are data views. ","category":"page"},{"location":"#Historical-Notes","page":"Home","title":"Historical Notes","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"MLUtils.jl brings together functionalities previously found in LearnBase.jl , MLDataPattern.jl and MLLabelUtils.jl. These packages are now discontinued. ","category":"page"},{"location":"","page":"Home","title":"Home","text":"Other features were ported from the deep learning library Flux.jl, as they are of general use. ","category":"page"},{"location":"#Alternatives-and-Related-Packages","page":"Home","title":"Alternatives and Related Packages","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"MLJ.jl is a more complete package for managing the whole machine learning pipeline if you are looking for a sklearn replacement.\nNNlib.jl provides utility functions for neural networks.\nTableTransforms.jl contains transformations for tabular datasets.\nDataAugmentation.jl. Efficient, composable data augmentation for machine and deep learning with support for n-dimensional images, keypoints and categorical masks.","category":"page"}]
+[{"location":"api/","page":"API","title":"API","text":"CollapsedDocStrings = true","category":"page"},{"location":"api/#API-Reference","page":"API","title":"API Reference","text":"","category":"section"},{"location":"api/#Core-API","page":"API","title":"Core API","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"getobs\ngetobs!\nnumobs","category":"page"},{"location":"api/#MLUtils.getobs","page":"API","title":"MLUtils.getobs","text":"getobs(data, [idx])\n\nReturn the observations corresponding to the observation index idx. Note that idx can be any type as long as data has defined getobs for that type. If idx is not provided, then materialize all observations in data.\n\nIf data does not have getobs defined, then in the case of Tables.table(data) == true returns the row(s) in position idx, otherwise returns data[idx].\n\nAuthors of custom data containers should implement Base.getindex for their type instead of getobs. getobs should only be implemented for types where there is a difference between getobs and Base.getindex (such as multi-dimensional arrays).\n\nThe returned observation(s) should be in the form intended to be passed as-is to some learning algorithm. There is no strict interface requirement on how this \"actual data\" must look like. Every author behind some custom data container can make this decision themselves. The output should be consistent when idx is a scalar vs vector.\n\ngetobs supports by default nested combinations of array, tuple, named tuples, and dictionaries. \n\nSee also getobs! and numobs.\n\nExamples\n\njulia> x = (a = [1, 2, 3], b = rand(6, 3));\n\njulia> getobs(x, 2) == (a = 2, b = x.b[:, 2])\ntrue\n\njulia> getobs(x, [1, 3]) == (a = [1, 3], b = x.b[:, [1, 3]])\ntrue\n\njulia> x = Dict(:a => [1, 2, 3], :b => rand(6, 3));\n\njulia> getobs(x, 2) == Dict(:a => 2, :b => x[:b][:, 2])\ntrue\n\njulia> getobs(x, [1, 3]) == Dict(:a => [1, 3], :b => x[:b][:, [1, 3]])\ntrue\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.getobs!","page":"API","title":"MLUtils.getobs!","text":"getobs!(buffer, data, idx)\n\nInplace version of getobs(data, idx). If this method is defined for the type of data, then buffer should be used to store the result, instead of allocating a dedicated object.\n\nImplementing this function is optional. In the case no such method is provided for the type of data, then buffer will be ignored and the result of getobs returned. This could be because the type of data may not lend itself to the concept of copy!. Thus, supporting a custom getobs! is optional and not required.\n\nCustom implementations of getobs! should be consistent with getobs in terms of the output format, that is getobs!(buffer, data, idx) == getobs(data, idx).\n\nSee also getobs and numobs. \n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.numobs","page":"API","title":"MLUtils.numobs","text":"numobs(data)\n\nReturn the total number of observations contained in data.\n\nIf data does not have numobs defined,  then in the case of Tables.table(data) == true returns the number of rows, otherwise returns length(data).\n\nAuthors of custom data containers should implement Base.length for their type instead of numobs. numobs should only be implemented for types where there is a difference between numobs and Base.length (such as multi-dimensional arrays).\n\ngetobs supports by default nested combinations of array, tuple, named tuples, and dictionaries. \n\nSee also getobs.\n\nExamples\n\njulia> x = (a = [1, 2, 3], b = ones(6, 3)); # named tuples\n\njulia> numobs(x)\n3\n\njulia> x = Dict(:a => [1, 2, 3], :b => ones(6, 3)); # dictionaries\n\njulia> numobs(x) \n3\n\nAll internal containers must have the same number of observations:\n\njulia> x = (a = [1, 2, 3, 4], b = ones(6, 3));\n\njulia> numobs(x)\nERROR: DimensionMismatch: All data containers must have the same number of observations.\nStacktrace:\n [1] _check_numobs_error()\n   @ MLUtils ~/.julia/dev/MLUtils/src/observation.jl:163\n [2] _check_numobs\n   @ ~/.julia/dev/MLUtils/src/observation.jl:130 [inlined]\n [3] numobs(data::NamedTuple{(:a, :b), Tuple{Vector{Int64}, Matrix{Float64}}})\n   @ MLUtils ~/.julia/dev/MLUtils/src/observation.jl:177\n [4] top-level scope\n   @ REPL[35]:1\n\n\n\n\n\n","category":"function"},{"location":"api/#Lazy-Transforms","page":"API","title":"Lazy Transforms","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"filterobs\ngroupobs\njoinobs\nmapobs\nshuffleobs","category":"page"},{"location":"api/#MLUtils.filterobs","page":"API","title":"MLUtils.filterobs","text":"filterobs(f, data)\n\nReturn a subset of data container data including all indices i for which f(getobs(data, i)) === true.\n\ndata = 1:10\nnumobs(data) == 10\nfdata = filterobs(>(5), data)\nnumobs(fdata) == 5\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.groupobs","page":"API","title":"MLUtils.groupobs","text":"groupobs(f, data)\n\nSplit data container data data into different data containers, grouping observations by f(obs).\n\ndata = -10:10\ndatas = groupobs(>(0), data)\nlength(datas) == 2\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.joinobs","page":"API","title":"MLUtils.joinobs","text":"joinobs(datas...)\n\nConcatenate data containers datas.\n\ndata1, data2 = 1:10, 11:20\njdata = joinumobs(data1, data2)\ngetobs(jdata, 15) == 15\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.mapobs","page":"API","title":"MLUtils.mapobs","text":"mapobs(f, data; batched=:auto)\n\nLazily map f over the observations in a data container data. Returns a new data container mdata that can be indexed and has a length. Indexing triggers the transformation f.\n\nThe batched keyword argument controls the behavior of mdata[idx] and mdata[idxs]  where idx is an integer and idxs is a vector of integers:\n\nbatched=:auto (default). Let f handle the two cases.   Calls f(getobs(data, idx)) and f(getobs(data, idxs)).\nbatched=:never. The function f is always called on a single observation.   Calls f(getobs(data, idx)) and [f(getobs(data, idx)) for idx in idxs].\nbatched=:always. The function f is always called on a batch of observations.   Calls getobs(f(getobs(data, [idx])), 1) and f(getobs(data, idxs)).\n\nExamples\n\njulia> data = (a=[1,2,3], b=[1,2,3]);\n\njulia> mdata = mapobs(data) do x\n         (c = x.a .+ x.b,  d = x.a .- x.b)\n       end\nmapobs(#25, (a = [1, 2, 3], b = [1, 2, 3]); batched=:auto))\n\njulia> mdata[1]\n(c = 2, d = 0)\n\njulia> mdata[1:2]\n(c = [2, 4], d = [0, 0])\n\n\n\n\n\nmapobs(fs, data)\n\nLazily map each function in tuple fs over the observations in data container data. Returns a tuple of transformed data containers.\n\n\n\n\n\nmapobs(namedfs::NamedTuple, data)\n\nMap a NamedTuple of functions over data, turning it into a data container of NamedTuples. Field syntax can be used to select a column of the resulting data container.\n\ndata = 1:10\nnameddata = mapobs((x = sqrt, y = log), data)\ngetobs(nameddata, 10) == (x = sqrt(10), y = log(10))\ngetobs(nameddata.x, 10) == sqrt(10)\n\n\n\n\n\nmapobs(f, d::DataLoader)\n\nReturn a new dataloader based on d  that applies f at each iteration. \n\nExamples\n\njulia> X = ones(3, 6);\n\njulia> function f(x)\n           @show x\n           return x\n       end\nf (generic function with 1 method)\n\njulia> d = DataLoader(X, batchsize=2, collate=false);\n\njulia> d = mapobs(f, d);\n\njulia> for x in d\n           @assert size(x) == (2,)\n           @assert size(x[1]) == (3,)\n       end\nx = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]\nx = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]\nx = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]\n\njulia> d2 = DataLoader(X, batchsize=2, collate=true);\n\njulia> d2 = mapobs(f, d2);\n\njulia> for x in d2\n           @assert size(x) == (3, 2)\n       end\nx = [1.0 1.0; 1.0 1.0; 1.0 1.0]\nx = [1.0 1.0; 1.0 1.0; 1.0 1.0]\nx = [1.0 1.0; 1.0 1.0; 1.0 1.0]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.shuffleobs","page":"API","title":"MLUtils.shuffleobs","text":"shuffleobs([rng], data)\n\nReturn a version of the dataset data that contains all the origin observations in a random reordering.\n\nThe values of data itself are not copied. Instead only the indices are shuffled. This function calls obsview to accomplish that, which means that the return value is likely of a different type than data.\n\nOptionally, a random number generator rng can be passed as the first argument. \n\nFor this function to work, the type of data must implement numobs and getobs. \n\nSee also obsview.\n\nExamples\n\n# For Arrays the subset will be of type SubArray\n@assert typeof(shuffleobs(rand(4,10))) <: SubArray\n\n# Iterate through all observations in random order\nfor x in eachobs(shuffleobs(X))\n    ...\nend\n\n\n\n\n\n","category":"function"},{"location":"api/#Batching,-Iteration,-and-Views","page":"API","title":"Batching, Iteration, and Views","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"batch\nbatchsize\nbatchseq\nBatchView\neachobs\nDataLoader\nobsview\nObsView\nrandobs\nslidingwindow","category":"page"},{"location":"api/#MLUtils.batch","page":"API","title":"MLUtils.batch","text":"batch(xs)\n\nBatch the arrays in xs into a single array with  an extra dimension.\n\nIf the elements of xs are tuples, named tuples, or dicts,  the output will be of the same type. \n\nSee also unbatch.\n\nExamples\n\njulia> batch([[1,2,3], \n              [4,5,6]])\n3×2 Matrix{Int64}:\n 1  4\n 2  5\n 3  6\n\njulia> batch([(a=[1,2], b=[3,4])\n               (a=[5,6], b=[7,8])]) \n(a = [1 5; 2 6], b = [3 7; 4 8])\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.batchsize","page":"API","title":"MLUtils.batchsize","text":"batchsize(data::BatchView) -> Int\n\nReturn the fixed size of each batch in data.\n\nExamples\n\nusing MLUtils\nX, Y = MLUtils.load_iris()\n\nA = BatchView(X, batchsize=30)\n@assert batchsize(A) == 30\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.batchseq","page":"API","title":"MLUtils.batchseq","text":"batchseq(seqs, val = 0)\n\nTake a list of N sequences, and turn them into a single sequence where each item is a batch of N. Short sequences will be padded by val.\n\nExamples\n\njulia> batchseq([[1, 2, 3], [4, 5]], 0)\n3-element Vector{Vector{Int64}}:\n [1, 4]\n [2, 5]\n [3, 0]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.BatchView","page":"API","title":"MLUtils.BatchView","text":"BatchView(data, batchsize; partial=true, collate=nothing)\nBatchView(data; batchsize=1, partial=true, collate=nothing)\n\nCreate a view of the given data that represents it as a vector of batches. Each batch will contain an equal amount of observations in them. The batch-size can be specified using the  parameter batchsize. In the case that the size of the dataset is not dividable by the specified batchsize, the remaining observations will be ignored if partial=false. If  partial=true instead the last batch-size can be slightly smaller.\n\nIf used as an iterator, the object will iterate over the dataset once, effectively denoting an epoch. \n\nAny data access is delayed until iteration or indexing is perfomed.  The getobs function is called on the data object to retrieve the observations.\n\nFor BatchView to work on some data structure, the type of the given variable data must implement the data container interface. See ObsView for more info.\n\nArguments\n\ndata : The object describing the dataset. Can be of any   type as long as it implements getobs and   numobs (see Details for more information).\nbatchsize : The batch-size of each batch.   It is the number of observations that each batch must contain   (except possibly for the last one).\npartial : If partial=false and the number of observations is   not divisible by the batch-size, then the last mini-batch is dropped.\ncollate: Defines the batching behavior. \nIf nothing (default), a batch is getobs(data, indices). \nIf false, each batch is [getobs(data, i) for i in indices]. \nIf true, applies MLUtils to the vector of observations in a batch,  recursively collating arrays in the last dimensions. See MLUtils.batch for more information and examples.\nIf a custom function, it will be used in place of MLUtils.batch. It should take a vector of observations as input.\n\nSe also DataLoader.\n\nExamples\n\njulia> using MLUtils\n\njulia> X, Y = MLUtils.load_iris();\n\njulia> A = BatchView(X, batchsize=30);\n\njulia> @assert eltype(A) <: Matrix{Float64}\n\njulia> @assert length(A) == 5 # Iris has 150 observations\n\njulia> @assert size(A[1]) == (4,30) # Iris has 4 features\n\njulia> for x in BatchView(X, batchsize=30)\n           # 5 batches of size 30 observations\n           @assert size(x) == (4, 30)\n           @assert numobs(x) === 30\n       end\n\njulia> for (x, y) in BatchView((X, Y), batchsize=20, partial=true)\n           # 7 batches of size 20 observations + 1 batch of 10 observations\n           @assert typeof(x) <: Matrix{Float64}\n           @assert typeof(y) <: Vector{String}\n       end\n\njulia> for batch in BatchView((X, Y), batchsize=20, partial=false, collate=false)\n           # 7 batches of size 20 observations\n           @assert length(batch) == 20\n           x1, y1 = batch[1]\n       end\n\njulia> function collate_fn(batch)\n           # collate observations into a custom batch\n           return hcat([x[1] for x in batch]...), join([x[2] for x in batch])\n        end;\n\njulia> for (x, y) in BatchView((rand(10, 4), [\"a\", \"b\", \"c\", \"d\"]), batchsize=2, collate=collate_fn)\n           @assert size(x) == (10, 2)\n           @assert y isa String\n       end\n\n\n\n\n\n","category":"type"},{"location":"api/#MLUtils.eachobs","page":"API","title":"MLUtils.eachobs","text":"eachobs(data; kws...)\n\nReturn an iterator over data.\n\nSupports the same arguments as DataLoader. The batchsize default is -1 here while it is 1 for DataLoader.\n\nExamples\n\nX = rand(4,100)\n\nfor x in eachobs(X)\n    # loop entered 100 times\n    @assert typeof(x) <: Vector{Float64}\n    @assert size(x) == (4,)\nend\n\n# mini-batch iterations\nfor x in eachobs(X, batchsize=10)\n    # loop entered 10 times\n    @assert typeof(x) <: Matrix{Float64}\n    @assert size(x) == (4,10)\nend\n\n# support for tuples, named tuples, dicts\nfor (x, y) in eachobs((X, Y))\n    # ...\nend\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.DataLoader","page":"API","title":"MLUtils.DataLoader","text":"DataLoader(data; [batchsize, buffer, collate, parallel, partial, rng, shuffle])\n\nAn object that iterates over mini-batches of data, each mini-batch containing batchsize observations (except possibly the last one).\n\nTakes as input a single data array, a tuple (or a named tuple) of arrays, or in general any data object that implements the numobs and getobs methods.\n\nThe last dimension in each array is the observation dimension, i.e. the one divided into mini-batches.\n\nThe original data is preserved in the data field of the DataLoader.\n\nArguments\n\ndata: The data to be iterated over. The data type has to be supported by numobs and getobs.\nbatchsize: If less than 0, iterates over individual observations. Otherwise, each iteration (except possibly the last) yields a mini-batch containing batchsize observations. Default 1.\nbuffer: If buffer=true and supported by the type of data, a buffer will be allocated and reused for memory efficiency. May want to set partial=false to avoid size mismatch.  Finally, can pass an external buffer to be used in getobs! (depending on the collate and batchsize options, could be getobs!(buffer, data, idxs) or getobs!(buffer[i], data, idx)). Default false. \ncollate: Defines the batching behavior. Default nothing. \nIf nothing , a batch is getobs(data, indices). \nIf false, each batch is [getobs(data, i) for i in indices]. \nIf true, applies MLUtils.batch to the vector of observations in a batch,  recursively collating arrays in the last dimensions. See MLUtils.batch for more information and examples.\nIf a custom function, it will be used in place of MLUtils.batch. It should take a vector of observations as input.\nparallel: Whether to use load data in parallel using worker threads. Greatly   speeds up data loading by factor of available threads. Requires starting   Julia with multiple threads. Check Threads.nthreads() to see the number of   available threads. Passing parallel = true breaks ordering guarantees.   Default false.\npartial: This argument is used only when batchsize > 0. If partial=false and the number of observations is not divisible by the batchsize, then the last mini-batch is dropped. Default true.\nrng: A random number generator. Default Random.default_rng().\n`shuffle: Whether to shuffle the observations before iterating. Unlike   wrapping the data container with shuffleobs(data), shuffle=true ensures   that the observations are shuffled anew every time you start iterating over   eachobs. Default false.\n\nExamples\n\njulia> Xtrain = rand(10, 100);\n\njulia> array_loader = DataLoader(Xtrain, batchsize=2);\n\njulia> for x in array_loader\n         @assert size(x) == (10, 2)\n         # do something with x, 50 times\n       end\n\njulia> array_loader.data === Xtrain\ntrue\n\njulia> tuple_loader = DataLoader((Xtrain,), batchsize=2);  # similar, but yielding 1-element tuples\n\njulia> for x in tuple_loader\n         @assert x isa Tuple{Matrix}\n         @assert size(x[1]) == (10, 2)\n       end\n\njulia> Ytrain = rand('a':'z', 100);  # now make a DataLoader yielding 2-element named tuples\n\njulia> train_loader = DataLoader((data=Xtrain, label=Ytrain), batchsize=5, shuffle=true);\n\njulia> for epoch in 1:100\n         for (x, y) in train_loader  # access via tuple destructuring\n           @assert size(x) == (10, 5)\n           @assert size(y) == (5,)\n           # loss += f(x, y) # etc, runs 100 * 20 times\n         end\n       end\n\njulia> first(train_loader).label isa Vector{Char}  # access via property name\ntrue\n\njulia> first(train_loader).label == Ytrain[1:5]  # because of shuffle=true\nfalse\n\njulia> foreach(println∘summary, DataLoader(rand(Int8, 10, 64), batchsize=30))  # partial=false would omit last\n10×30 Matrix{Int8}\n10×30 Matrix{Int8}\n10×4 Matrix{Int8}\n\njulia> collate_fn(batch) = join(batch);\n\njulia> first(DataLoader([\"a\", \"b\", \"c\", \"d\"], batchsize=2, collate=collate_fn))\n\"ab\"\n\n\n\n\n\n","category":"type"},{"location":"api/#MLUtils.obsview","page":"API","title":"MLUtils.obsview","text":"obsview(data, [indices])\n\nReturns a lazy view of the observations in data that correspond to the given indices. No data will be copied except of the indices. It is similar to constructing an ObsView,  but returns a SubArray if the type of data is Array or SubArray. Furthermore, this function may be extended for custom types of data that also want to provide their own subset-type.\n\nIn case data is a tuple, the constructor will be mapped over its elements. That means that the constructor returns a tuple of ObsView instead of a ObsView of tuples.\n\nIf instead you want to get the subset of observations corresponding to the given indices in their native type, use getobs.\n\nSee ObsView for more information.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.ObsView","page":"API","title":"MLUtils.ObsView","text":"ObsView(data, [indices])\n\nUsed to represent a subset of some data of arbitrary type by storing which observation-indices the subset spans. Furthermore, subsequent subsettings are accumulated without needing to access actual data.\n\nThe main purpose for the existence of ObsView is to delay data access and movement until an actual batch of data (or single observation) is needed for some computation. This is particularily useful when the data is not located in memory, but on the hard drive or some remote location. In such a scenario one wants to load the required data only when needed.\n\nAny data access is delayed until getindex is called,  and even getindex returns the result of obsview which in general avoids data movement until getobs is called. If used as an iterator, the view will iterate over the dataset once, effectively denoting an epoch. Each iteration will return a lazy subset to the current observation.\n\nArguments\n\ndata : The object describing the dataset. Can be of any   type as long as it implements getobs and   numobs (see Details for more information).\nindices : Optional. The index or indices of the   observation(s) in data that the subset should represent.   Can be of type Int or some subtype of AbstractVector.\n\nMethods\n\ngetindex : Returns the observation(s) of the given   index/indices. No data is copied aside   from the required indices.\nnumobs : Returns the total number observations in the subset.\ngetobs : Returns the underlying data that the   ObsView represents at the given relative indices. Note   that these indices are in \"subset space\", and in general will   not directly correspond to the same indices in the underlying   data set.\n\nDetails\n\nFor ObsView to work on some data structure, the desired type MyType must implement the following interface:\n\ngetobs(data::MyType, idx) :   Should return the observation(s) indexed by idx.   In what form is up to the user.   Note that idx can be of type Int or AbstractVector.\nnumobs(data::MyType) :   Should return the total number of observations in data\n\nThe following methods can also be provided and are optional:\n\ngetobs(data::MyType) :   By default this function is the identity function.   If that is not the behaviour that you want for your type,   you need to provide this method as well.\nobsview(data::MyType, idx) :   If your custom type has its own kind of subset type, you can   return it here. An example for such a case are SubArray for   representing a subset of some AbstractArray.\ngetobs!(buffer, data::MyType, [idx]) :   Inplace version of getobs(data, idx). If this method   is provided for MyType, then eachobs can preallocate a buffer that is then reused   every iteration. Note: buffer should be equivalent to the   return value of getobs(::MyType, ...), since this is how   buffer is preallocated by default.\n\nExamples\n\nX, Y = MLUtils.load_iris()\n\n# The iris set has 150 observations and 4 features\n@assert size(X) == (4,150)\n\n# Represents the 80 observations as a ObsView\nv = ObsView(X, 21:100)\n@assert numobs(v) == 80\n@assert typeof(v) <: ObsView\n# getobs indexes into v\n@assert getobs(v, 1:10) == X[:, 21:30]\n\n# Use `obsview` to avoid boxing into ObsView\n# for types that provide a custom \"subset\", such as arrays.\n# Here it instead creates a native SubArray.\nv = obsview(X, 1:100)\n@assert numobs(v) == 100\n@assert typeof(v) <: SubArray\n\n# Also works for tuples of arbitrary length\nsubset = obsview((X, Y), 1:100)\n@assert numobs(subset) == 100\n@assert typeof(subset) <: Tuple # tuple of SubArray\n\n# Use as iterator\nfor x in ObsView(X)\n    @assert typeof(x) <: SubArray{Float64,1}\nend\n\n# iterate over each individual labeled observation\nfor (x, y) in ObsView((X, Y))\n    @assert typeof(x) <: SubArray{Float64,1}\n    @assert typeof(y) <: String\nend\n\n# same but in random order\nfor (x, y) in ObsView(shuffleobs((X, Y)))\n    @assert typeof(x) <: SubArray{Float64,1}\n    @assert typeof(y) <: String\nend\n\n# Indexing: take first 10 observations\nx, y = ObsView((X, Y))[1:10]\n\nSee also\n\nobsview,  getobs, numobs, splitobs, shuffleobs, kfolds.\n\n\n\n\n\n","category":"type"},{"location":"api/#MLUtils.randobs","page":"API","title":"MLUtils.randobs","text":"randobs(data, [n])\n\nPick a random observation or a batch of n random observations from data. For this function to work, the type of data must implement numobs and getobs.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.slidingwindow","page":"API","title":"MLUtils.slidingwindow","text":"slidingwindow(data; size, stride=1) -> SlidingWindow\n\nReturn a vector-like view of the data for which each element is a fixed size \"window\" of size adjacent observations. Note that only complete windows are included in the output, which implies that it is possible for excess observations to be omitted from the view.\n\nNote that the windows are not materialized at construction time.  To actually get a copy of the data at some window use indexing or getobs.\n\njulia> s = slidingwindow(1:20, size=6)\nslidingwindow(1:20, size=6, stride=1)\n\njulia> s[1]\n1:6\n\njulia> s[2]\n2:7\n\nThe optional parameter stride can be used to specify the distance between the start elements of each adjacent window. By default the stride is equal to 1.\n\njulia> s = slidingwindow(1:20, size=6, stride=3)\nslidingwindow(1:20, size=6, stride=3)\n\njulia> for w in s; println(w); end\n1:6\n4:9\n7:12\n10:15\n13:18\n\n\n\n\n\n","category":"function"},{"location":"api/#Partitioning","page":"API","title":"Partitioning","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"leavepout\nkfolds\nsplitobs","category":"page"},{"location":"api/#MLUtils.leavepout","page":"API","title":"MLUtils.leavepout","text":"leavepout(n::Integer, [size = 1]) -> Tuple\n\nCompute the train/validation assignments for k ≈ n/size repartitions of n observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. Each validation subset will have either size or size+1 observations assigned to it. The following code snippet generates the index-vectors for size = 2.\n\njulia> train_idx, val_idx = leavepout(10, 2);\n\nEach observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range 1:n. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.\n\njulia> train_idx\n5-element Array{Array{Int64,1},1}:\n [3,4,5,6,7,8,9,10]\n [1,2,5,6,7,8,9,10]\n [1,2,3,4,7,8,9,10]\n [1,2,3,4,5,6,9,10]\n [1,2,3,4,5,6,7,8]\n\njulia> val_idx\n5-element Array{UnitRange{Int64},1}:\n 1:2\n 3:4\n 5:6\n 7:8\n 9:10\n\n\n\n\n\nleavepout(data, p = 1)\n\nRepartition a data container using a k-fold strategy, where k is chosen in such a way, that each validation subset of the resulting folds contains roughly p observations. Defaults to p = 1, which is also known as \"leave-one-out\" partitioning.\n\nThe resulting sequence of folds is returned as a lazy iterator. Only data subsets are created. That means no actual data is copied until getobs is invoked.\n\nfor (train, val) in leavepout(X, p=2)\n    # if numobs(X) is dividable by 2,\n    # then numobs(val) will be 2 for each iteraton,\n    # otherwise it may be 3 for the first few iterations.\nend\n\nSeekfolds for a related function.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.kfolds","page":"API","title":"MLUtils.kfolds","text":"kfolds(n::Integer, k = 5) -> Tuple\n\nCompute the train/validation assignments for k repartitions of n observations, and return them in the form of two vectors. The first vector contains the index-vectors for the training subsets, and the second vector the index-vectors for the validation subsets respectively. A general rule of thumb is to use either k = 5 or k = 10. The following code snippet generates the indices assignments for k = 5\n\njulia> train_idx, val_idx = kfolds(10, 5);\n\nEach observation is assigned to the validation subset once (and only once). Thus, a union over all validation index-vectors reproduces the full range 1:n. Note that there is no random assignment of observations to subsets, which means that adjacent observations are likely to be part of the same validation subset.\n\njulia> train_idx\n5-element Array{Array{Int64,1},1}:\n [3,4,5,6,7,8,9,10]\n [1,2,5,6,7,8,9,10]\n [1,2,3,4,7,8,9,10]\n [1,2,3,4,5,6,9,10]\n [1,2,3,4,5,6,7,8]\n\njulia> val_idx\n5-element Array{UnitRange{Int64},1}:\n 1:2\n 3:4\n 5:6\n 7:8\n 9:10\n\n\n\n\n\nkfolds(data, [k = 5])\n\nRepartition a data container k times using a k folds strategy and return the sequence of folds as a lazy iterator.  Only data subsets are created, which means that no actual data is copied until getobs is invoked.\n\nConceptually, a k-folds repartitioning strategy divides the given data into k roughly equal-sized parts. Each part will serve as validation set once, while the remaining parts are used for training. This results in k different partitions of data.\n\nIn the case that the size of the dataset is not dividable by the specified k, the remaining observations will be evenly distributed among the parts.\n\nfor (x_train, x_val) in kfolds(X, k=10)\n    # code called 10 times\n    # numobs(x_val) may differ up to ±1 over iterations\nend\n\nMultiple variables are supported (e.g. for labeled data)\n\nfor ((x_train, y_train), val) in kfolds((X, Y), k=10)\n    # ...\nend\n\nBy default the folds are created using static splits. Use shuffleobs to randomly assign observations to the folds.\n\nfor (x_train, x_val) in kfolds(shuffleobs(X), k = 10)\n    # ...\nend\n\nSee leavepout for a related function.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.splitobs","page":"API","title":"MLUtils.splitobs","text":"splitobs(n::Int; at) -> Tuple\n\nCompute the indices for two or more disjoint subsets of the range 1:n with split sizes determined by at.\n\nExamples\n\njulia> splitobs(100, at=0.7)\n(1:70, 71:100)\n\njulia> splitobs(100, at=(0.1, 0.4))\n(1:10, 11:50, 51:100)\n\n\n\n\n\nsplitobs([rng,] data; at, shuffle=false, stratified=nothing) -> Tuple\n\nPartition the data into two or more subsets.\n\nThe argument at specifies how to split the data:\n\nWhen at is a number between 0 and 1, this specifies the proportion in the first subset.\nWhen at is an integer, it specifies the number of observations in the first subset.\nWhen at is a tuple, entries specifies the number or proportion in each subset, except\n\nfor the last which will contain the remaning observations.  The number of returned subsets is length(at)+1.\n\nIf shuffle=true, randomly permute the observations before splitting. A random number generator rng can be optionally passed as the first argument.\n\nIf stratified is not nothing, it should be an array of labels with the same length as the data. The observations will be split in such a way that the proportion of each label is preserved in each subset.\n\nSupports any datatype implementing numobs. \n\nIt relies on obsview to create views of the data.\n\nExamples\n\njulia> splitobs(reshape(1:100, 1, :); at=0.7)  # simple 70%-30% split, of a matrix\n([1 2 … 69 70], [71 72 … 99 100])\n\njulia> data = (x=ones(2,10), n=1:10)  # a NamedTuple, consistent last dimension\n(x = [1.0 1.0 … 1.0 1.0; 1.0 1.0 … 1.0 1.0], n = 1:10)\n\njulia> splitobs(data, at=(0.5, 0.3))  # a 50%-30%-20% split, e.g. train/test/validation\n((x = [1.0 1.0 … 1.0 1.0; 1.0 1.0 … 1.0 1.0], n = 1:5), (x = [1.0 1.0 1.0; 1.0 1.0 1.0], n = 6:8), (x = [1.0 1.0; 1.0 1.0], n = 9:10))\n\njulia> train, test = splitobs((reshape(1.0:100.0, 1, :), 101:200), at=0.7, shuffle=true);  # split a Tuple\n\njulia> vec(test[1]) .+ 100 == test[2]\ntrue\n\njulia> splitobs(1:10, at=0.5, stratified=[0,0,0,0,1,1,1,1,1,1]) # 2 zeros and 3 ones in each subset\n([1, 2, 5, 6, 7], [3, 4, 8, 9, 10])\n\n\n\n\n\n","category":"function"},{"location":"api/#Array-Constructors","page":"API","title":"Array Constructors","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"falses_like\nfill_like\nones_like\nrand_like\nrandn_like\ntrues_like\nzeros_like","category":"page"},{"location":"api/#MLUtils.falses_like","page":"API","title":"MLUtils.falses_like","text":"falses_like(x, [dims=size(x)])\n\nEquivalent to fill_like(x, false, Bool, dims).\n\nSee also [fill_like] and trues_like.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.fill_like","page":"API","title":"MLUtils.fill_like","text":"fill_like(x, val, [element_type=eltype(x)], [dims=size(x)]))\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to val.  The third and fourth arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nSee also zeros_like and ones_like.\n\nExamples\n\njulia> x = rand(Float32, 2)\n2-element Vector{Float32}:\n 0.16087806\n 0.89916044\n\njulia> fill_like(x, 1.7, (3, 3))\n3×3 Matrix{Float32}:\n 1.7  1.7  1.7\n 1.7  1.7  1.7\n 1.7  1.7  1.7\n\njulia> using CUDA\n\njulia> x = CUDA.rand(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 0.803167  0.476101\n 0.303041  0.317581\n\njulia> fill_like(x, 1.7, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 1.7  1.7\n 1.7  1.7\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.ones_like","page":"API","title":"MLUtils.ones_like","text":"ones_like(x, [element_type=eltype(x)], [dims=size(x)]))\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to 1.  The second and third arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nSee also zeros_like and fill_like.\n\nExamples\n\njulia> x = rand(Float32, 2)\n2-element Vector{Float32}:\n 0.8621633\n 0.5158395\n\njulia> ones_like(x, (3, 3))\n3×3 Matrix{Float32}:\n 1.0  1.0  1.0\n 1.0  1.0  1.0\n 1.0  1.0  1.0\n\njulia> using CUDA\n\njulia> x = CUDA.rand(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 0.82297   0.656143\n 0.701828  0.391335\n\njulia> ones_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 1.0  1.0\n 1.0  1.0\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.rand_like","page":"API","title":"MLUtils.rand_like","text":"rand_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to a random value. The last two arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nThe default random number generator is used, unless a custom one is passed in explicitly as the first argument.\n\nSee also Base.rand and randn_like.\n\nExamples\n\njulia> x = ones(Float32, 2)\n2-element Vector{Float32}:\n 1.0\n 1.0\n\njulia> rand_like(x, (3, 3))\n3×3 Matrix{Float32}:\n 0.780032  0.920552  0.53689\n 0.121451  0.741334  0.5449\n 0.55348   0.138136  0.556404\n\njulia> using CUDA\n\njulia> CUDA.ones(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 1.0  1.0\n 1.0  1.0\n\njulia> rand_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 0.429274  0.135379\n 0.718895  0.0098756\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.randn_like","page":"API","title":"MLUtils.randn_like","text":"randn_like([rng=default_rng()], x, [element_type=eltype(x)], [dims=size(x)])\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to a random value drawn from a normal distribution. The last two arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nThe default random number generator is used, unless a custom one is passed in explicitly as the first argument.\n\nSee also Base.randn and rand_like.\n\nExamples\n\njulia> x = ones(Float32, 2)\n2-element Vector{Float32}:\n 1.0\n 1.0\n\njulia> randn_like(x, (3, 3))\n3×3 Matrix{Float32}:\n -0.385331    0.956231   0.0745102\n  1.43756    -0.967328   2.06311\n  0.0482372   1.78728   -0.902547\n\njulia> using CUDA\n\njulia> CUDA.ones(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 1.0  1.0\n 1.0  1.0\n\njulia> randn_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n -0.578527   0.823445\n -1.01338   -0.612053\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.trues_like","page":"API","title":"MLUtils.trues_like","text":"trues_like(x, [dims=size(x)])\n\nEquivalent to fill_like(x, true, Bool, dims).\n\nSee also [fill_like] and falses_like.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.zeros_like","page":"API","title":"MLUtils.zeros_like","text":"zeros_like(x, [element_type=eltype(x)], [dims=size(x)]))\n\nCreate an array with the given element type and size, based upon the given source array x. All element of the new array will be set to 0.  The second and third arguments are both optional, defaulting to the given array's eltype and size. The dimensions may be specified as an integer or as a tuple argument.\n\nSee also ones_like and fill_like.\n\nExamples\n\njulia> x = rand(Float32, 2)\n2-element Vector{Float32}:\n 0.4005432\n 0.36934233\n\njulia> zeros_like(x, (3, 3))\n3×3 Matrix{Float32}:\n 0.0  0.0  0.0\n 0.0  0.0  0.0\n 0.0  0.0  0.0\n\njulia> using CUDA\n\njulia> x = CUDA.rand(2, 2)\n2×2 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:\n 0.0695155  0.667979\n 0.558468   0.59903\n\njulia> zeros_like(x, Float64)\n2×2 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:\n 0.0  0.0\n 0.0  0.0\n\n\n\n\n\n","category":"function"},{"location":"api/#Resampling","page":"API","title":"Resampling","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"oversample\nundersample","category":"page"},{"location":"api/#MLUtils.oversample","page":"API","title":"MLUtils.oversample","text":"oversample([rng], data, classes; fraction=1, shuffle=true)\noversample([rng], data::Tuple; fraction=1, shuffle=true)\n\nGenerate a re-balanced version of data by repeatedly sampling existing observations in such a way that every class will have at least fraction times the number observations of the largest class in classes. This way, all classes will have a minimum number of observations in the resulting data set relative to what largest class has in the given (original) data.\n\nAs an example, by default (i.e. with fraction = 1) the resulting dataset will be near perfectly balanced. On the other hand, with fraction = 0.5 every class in the resulting data with have at least 50% as many observations as the largest class.\n\nThe classes input is an array with the same length as numobs(data).  \n\nThe convenience parameter shuffle determines if the resulting data will be shuffled after its creation; if it is not shuffled then all the repeated samples will be together at the end, sorted by class. Defaults to true.\n\nThe random number generator rng can be optionally passed as the first argument. \n\nThe output will contain both the resampled data and classes.\n\n# 6 observations with 3 features each\nX = rand(3, 6)\n# 2 classes, severely imbalanced\nY = [\"a\", \"b\", \"b\", \"b\", \"b\", \"a\"]\n\n# oversample the class \"a\" to match \"b\"\nX_bal, Y_bal = oversample(X, Y)\n\n# this results in a bigger dataset with repeated data\n@assert size(X_bal) == (3,8)\n@assert length(Y_bal) == 8\n\n# now both \"a\", and \"b\" have 4 observations each\n@assert sum(Y_bal .== \"a\") == 4\n@assert sum(Y_bal .== \"b\") == 4\n\nFor this function to work, the type of data must implement numobs and getobs. \n\nIf data is a tuple and classes is not given,  then it will be assumed that the last element of the tuple contains the classes.\n\njulia> data = DataFrame(X1=rand(6), X2=rand(6), Y=[:a,:b,:b,:b,:b,:a])\n6×3 DataFrames.DataFrame\n│ Row │ X1        │ X2          │ Y │\n├─────┼───────────┼─────────────┼───┤\n│ 1   │ 0.226582  │ 0.0443222   │ a │\n│ 2   │ 0.504629  │ 0.722906    │ b │\n│ 3   │ 0.933372  │ 0.812814    │ b │\n│ 4   │ 0.522172  │ 0.245457    │ b │\n│ 5   │ 0.505208  │ 0.11202     │ b │\n│ 6   │ 0.0997825 │ 0.000341996 │ a │\n\njulia> getobs(oversample(data, data.Y))\n8×3 DataFrame\n Row │ X1        X2         Y      \n     │ Float64   Float64    Symbol \n─────┼─────────────────────────────\n   1 │ 0.376304  0.100022   a\n   2 │ 0.467095  0.185437   b\n   3 │ 0.481957  0.319906   b\n   4 │ 0.336762  0.390811   b\n   5 │ 0.376304  0.100022   a\n   6 │ 0.427064  0.0648339  a\n   7 │ 0.427064  0.0648339  a\n   8 │ 0.457043  0.490688   b\n\nSee ObsView for more information on data subsets. See also undersample.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.undersample","page":"API","title":"MLUtils.undersample","text":"undersample([rng], data, classes; shuffle=true)\nundersample([rng], data::Tuple; shuffle=true)\n\nGenerate a class-balanced version of data by subsampling its observations in such a way that the resulting number of observations will be the same number for every class. This way, all classes will have as many observations in the resulting data set as the smallest class has in the given (original) data.\n\nThe convenience parameter shuffle determines if the resulting data will be shuffled after its creation; if it is not shuffled then all the observations will be in their original order. Defaults to false.\n\nIf data is a tuple and classes is not given,  then it will be assumed that the last element of the tuple contains the classes.\n\nThe output will contain both the resampled data and classes.\n\n# 6 observations with 3 features each\nX = rand(3, 6)\n# 2 classes, severely imbalanced\nY = [\"a\", \"b\", \"b\", \"b\", \"b\", \"a\"]\n\n# subsample the class \"b\" to match \"a\"\nX_bal, Y_bal = undersample(X, Y)\n\n# this results in a smaller dataset\n@assert size(X_bal) == (3,4)\n@assert length(Y_bal) == 4\n\n# now both \"a\", and \"b\" have 2 observations each\n@assert sum(Y_bal .== \"a\") == 2\n@assert sum(Y_bal .== \"b\") == 2\n\nFor this function to work, the type of data must implement numobs and getobs. \n\nNote that if data is a tuple, then it will be assumed that the last element of the tuple contains the targets.\n\njulia> data = DataFrame(X1=rand(6), X2=rand(6), Y=[:a,:b,:b,:b,:b,:a])\n6×3 DataFrames.DataFrame\n│ Row │ X1        │ X2          │ Y │\n├─────┼───────────┼─────────────┼───┤\n│ 1   │ 0.226582  │ 0.0443222   │ a │\n│ 2   │ 0.504629  │ 0.722906    │ b │\n│ 3   │ 0.933372  │ 0.812814    │ b │\n│ 4   │ 0.522172  │ 0.245457    │ b │\n│ 5   │ 0.505208  │ 0.11202     │ b │\n│ 6   │ 0.0997825 │ 0.000341996 │ a │\n\njulia> getobs(undersample(data, data.Y))\n4×3 DataFrame\n Row │ X1        X2         Y      \n     │ Float64   Float64    Symbol \n─────┼─────────────────────────────\n   1 │ 0.427064  0.0648339  a\n   2 │ 0.376304  0.100022   a\n   3 │ 0.467095  0.185437   b\n   4 │ 0.457043  0.490688   b\n\nSee ObsView for more information on data subsets. See also oversample.\n\n\n\n\n\n","category":"function"},{"location":"api/#Operations","page":"API","title":"Operations","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"chunk\nflatten\ngroup_counts\ngroup_indices\nnormalise\nrpad_constant\nunbatch\nunsqueeze\nunstack","category":"page"},{"location":"api/#MLUtils.chunk","page":"API","title":"MLUtils.chunk","text":"chunk(x, n; [dims])\nchunk(x; [size, dims])\n\nSplit x into n parts or alternatively, if size is an integer, into equal chunks of size size.  The parts contain the same number of elements except possibly for the last one that can be smaller.\n\nIn case size is a collection of integers instead, the elements of x are split into chunks of the given sizes.\n\nIf x is an array, dims can be used to specify along which dimension to  split (defaults to the last dimension).\n\nExamples\n\njulia> chunk(1:10, 3)\n3-element Vector{UnitRange{Int64}}:\n 1:4\n 5:8\n 9:10\n\njulia> chunk(1:10; size = 2)\n5-element Vector{UnitRange{Int64}}:\n 1:2\n 3:4\n 5:6\n 7:8\n 9:10\n\njulia> x = reshape(collect(1:20), (5, 4))\n5×4 Matrix{Int64}:\n 1   6  11  16\n 2   7  12  17\n 3   8  13  18\n 4   9  14  19\n 5  10  15  20\n\njulia> xs = chunk(x, 2, dims=1)\n2-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}}:\n [1 6 11 16; 2 7 12 17; 3 8 13 18]\n [4 9 14 19; 5 10 15 20]\n\njulia> xs[1]\n3×4 view(::Matrix{Int64}, 1:3, :) with eltype Int64:\n 1  6  11  16\n 2  7  12  17\n 3  8  13  18\n\njulia> xes = chunk(x; size = 2, dims = 2)\n2-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}}:\n [1 6; 2 7; … ; 4 9; 5 10]\n [11 16; 12 17; … ; 14 19; 15 20]\n\njulia> xes[2]\n5×2 view(::Matrix{Int64}, :, 3:4) with eltype Int64:\n 11  16\n 12  17\n 13  18\n 14  19\n 15  20\n\njulia> chunk(1:6; size = [2, 4])\n2-element Vector{UnitRange{Int64}}:\n 1:2\n 3:6\n\n\n\n\n\nchunk(x, partition_idxs; [npartitions, dims])\n\nPartition the array x along the dimension dims according to the indexes  in partition_idxs.\n\npartition_idxs must be sorted and contain only positive integers  between 1 and the number of partitions. \n\nIf the number of partition npartitions is not provided,  it is inferred from partition_idxs.\n\nIf dims is not provided, it defaults to the last dimension.\n\nSee also unbatch.\n\nExamples\n\njulia> x = reshape([1:10;], 2, 5)\n2×5 Matrix{Int64}:\n 1  3  5  7   9\n 2  4  6  8  10\n\njulia> chunk(x, [1, 2, 2, 3, 3])\n3-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}}:\n [1; 2;;]\n [3 5; 4 6]\n [7 9; 8 10]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.flatten","page":"API","title":"MLUtils.flatten","text":"flatten(x::AbstractArray)\n\nReshape arbitrarly-shaped input into a matrix-shaped output, preserving the size of the last dimension.\n\nSee also unsqueeze.\n\nExamples\n\njulia> rand(3,4,5) |> flatten |> size\n(12, 5)\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.group_counts","page":"API","title":"MLUtils.group_counts","text":"group_counts(x)\n\nCount the number of times that each element of x appears.\n\nSee also group_indices\n\nExamples\n\njulia> group_counts(['a', 'b', 'b'])\nDict{Char, Int64} with 2 entries:\n  'a' => 1\n  'b' => 2\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.group_indices","page":"API","title":"MLUtils.group_indices","text":"group_indices(x) -> Dict\n\nComputes the indices of elements in the vector x for each distinct value contained.  This information is useful for resampling strategies, such as stratified sampling.\n\nSee also group_counts.\n\nExamples\n\njulia> x = [:yes, :no, :maybe, :yes];\n\njulia> group_indices(x)\nDict{Symbol, Vector{Int64}} with 3 entries:\n  :yes   => [1, 4]\n  :maybe => [3]\n  :no    => [2]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.normalise","page":"API","title":"MLUtils.normalise","text":"normalise(x; dims=ndims(x), ϵ=1e-5)\n\nNormalise the array x to mean 0 and standard deviation 1 across the dimension(s) given by dims. Per default, dims is the last dimension. \n\nϵ is a small additive factor added to the denominator for numerical stability.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.rpad_constant","page":"API","title":"MLUtils.rpad_constant","text":"rpad_constant(v::AbstractArray, n::Union{Integer, Tuple}, val = 0; dims=:)\n\nReturn the given sequence padded with val along the dimensions dims up to a maximum length in each direction specified by n.\n\nExamples\n\njulia> rpad_constant([1, 2], 4, -1) # passing with -1 up to size 4\n4-element Vector{Int64}:\n  1\n  2\n -1\n -1\n\njulia> rpad_constant([1, 2, 3], 2) # no padding if length is already greater than n\n3-element Vector{Int64}:\n 1\n 2\n 3\n\njulia> rpad_constant([1 2; 3 4], 4; dims=1) # padding along the first dimension\n4×2 Matrix{Int64}:\n 1  2\n 3  4\n 0  0\n 0  0\n\njulia> rpad_constant([1 2; 3 4], 4) # padding along all dimensions by default\n4×4 Matrix{Int64}:\n 1  2  0  0\n 3  4  0  0\n 0  0  0  0\n 0  0  0  0\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.unbatch","page":"API","title":"MLUtils.unbatch","text":"unbatch(x)\n\nReverse of the batch operation, unstacking the last dimension of the array x.\n\nSee also unstack and chunk.\n\nExamples\n\njulia> unbatch([1 3 5 7;\n                2 4 6 8])\n4-element Vector{Vector{Int64}}:\n [1, 2]\n [3, 4]\n [5, 6]\n [7, 8]\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.unsqueeze","page":"API","title":"MLUtils.unsqueeze","text":"unsqueeze(x; dims)\n\nReturn x reshaped into an array one dimensionality higher than x, where dims indicates in which dimension x is extended. dims can be an integer between 1 and ndims(x)+1.\n\nSee also flatten, stack.\n\nExamples\n\njulia> unsqueeze([1 2; 3 4], dims=2)\n2×1×2 Array{Int64, 3}:\n[:, :, 1] =\n 1\n 3\n\n[:, :, 2] =\n 2\n 4\n\n\njulia> xs = [[1, 2], [3, 4], [5, 6]]\n3-element Vector{Vector{Int64}}:\n [1, 2]\n [3, 4]\n [5, 6]\n\njulia> unsqueeze(xs, dims=1)\n1×3 Matrix{Vector{Int64}}:\n [1, 2]  [3, 4]  [5, 6]\n\n\n\n\n\nunsqueeze(; dims)\n\nReturns a function which, acting on an array, inserts a dimension of size 1 at dims.\n\nExamples\n\njulia> rand(21, 22, 23) |> unsqueeze(dims=2) |> size\n(21, 1, 22, 23)\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.unstack","page":"API","title":"MLUtils.unstack","text":"unstack(xs; dims)\n\nUnroll the given xs into an array of arrays along the given dimension dims.\n\nIt is the inverse operation of stack.\n\nSee also unbatch and chunk.\n\nExamples\n\njulia> unstack([1 3 5 7; 2 4 6 8], dims=2)\n4-element Vector{Vector{Int64}}:\n [1, 2]\n [3, 4]\n [5, 6]\n [7, 8]\n\n\n\n\n\n","category":"function"},{"location":"api/#Datasets","page":"API","title":"Datasets","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"Datasets.load_iris\nDatasets.make_sin\nDatasets.make_spiral\nDatasets.make_poly\nDatasets.make_moons","category":"page"},{"location":"api/#MLUtils.Datasets.load_iris","page":"API","title":"MLUtils.Datasets.load_iris","text":"load_iris() -> X, y, names\n\nLoads the first 150 observations from the Iris flower data set introduced by Ronald Fisher (1936). The 4 by 150 matrix X contains the numeric measurements, in which each individual column denotes an observation. The vector y contains the class labels as strings. The vector names contains the names of the features (i.e. rows of X)\n\n[1] Fisher, Ronald A. \"The use of multiple measurements in taxonomic problems.\" Annals of eugenics 7.2 (1936): 179-188.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_sin","page":"API","title":"MLUtils.Datasets.make_sin","text":"make_sin(n, start, stop; noise = 0.3, f_rand = randn) -> x, y\n\nGenerates n noisy equally spaces samples of a sinus from start to stop by adding noise .* f_rand(length(x)) to the result of sin(x).\n\nReturns the vector x with the samples and the noisy response y.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_spiral","page":"API","title":"MLUtils.Datasets.make_spiral","text":"make_spiral(n, a, theta, b; noise = 0.01, f_rand = randn) -> x, y\n\nGenerates n noisy responses for a spiral with two labels. Uses the radius, angle and scaling arguments to space the points in 2D space and adding noise .* f_randn(n) to the response.\n\nReturns the 2 x n matrix x with the coordinates of the samples and the vector y with the labels.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_poly","page":"API","title":"MLUtils.Datasets.make_poly","text":"make_poly(coef, x; noise = 0.01, f_rand = randn) -> x, y\n\nGenerates a noisy response for a polynomial of degree length(coef) and with the coefficients given by coef. The response is generated by elmentwise computation of the polynome on the elements of x and adding noise .* f_randn(length(x)) to the result.\n\nThe vector coef contains the coefficients for the terms of the polynome. The first element of coef denotes the coefficient for the term with the highest degree, while the last element of coef denotes the intercept.\n\nReturn the input x and the noisy response y.\n\n\n\n\n\n","category":"function"},{"location":"api/#MLUtils.Datasets.make_moons","page":"API","title":"MLUtils.Datasets.make_moons","text":"make_moons(n; noise=0.0, f_rand=randn, shuffle=true) -> x, y\n\nGenerate a dataset with two interleaving half circles. \n\nIf n is an integer, the number of samples is n and the number of samples for each half circle is n ÷ 2. If n is a tuple, the first element of the tuple denotes the number of samples for the first half circle and the second element denotes the number of samples for the second half circle.\n\nThe noise level can be controlled by the noise argument.\n\nSet shuffle=false to keep the order of the samples.\n\nReturns a 2 x n matrix with the the samples. \n\n\n\n\n\n","category":"function"},{"location":"#MLUtils.jl","page":"Home","title":"MLUtils.jl","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"(Image: ) (Image: ) (Image: ) (Image: )","category":"page"},{"location":"","page":"Home","title":"Home","text":"MLUtils.jl defines interfaces and implements common utilities for Machine Learning pipelines.","category":"page"},{"location":"#Features","page":"Home","title":"Features","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"An extensible dataset interface  (numobs and getobs).\nData iteration and dataloaders (eachobs and DataLoader).\nLazy data views (obsview). \nResampling procedures (undersample and oversample).\nTrain/test splits (splitobs) \nData partitioning and aggregation tools (batch, unbatch, chunk, group_counts, group_indices).\nFolds for cross-validation (kfolds, leavepout).\nDatasets lazy tranformations (mapobs, filterobs, groupobs, joinobs, shuffleobs).\nToy datasets for demonstration purpose. \nOther data handling utilities (flatten, normalise, unsqueeze, stack, unstack).","category":"page"},{"location":"#Examples","page":"Home","title":"Examples","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Let us take a look at a hello world example to get a feeling for  how to use this package in a typical ML scenario. ","category":"page"},{"location":"","page":"Home","title":"Home","text":"using MLUtils\n\n# X is a matrix of floats\n# Y is a vector of strings\nX, Y = load_iris()\n\n# The iris dataset is ordered according to their labels,\n# which means that we should shuffle the dataset before\n# partitioning it into training- and test-set.\nXs, Ys = shuffleobs((X, Y))\n\n# We leave out 15 % of the data for testing\ncv_data, test_data = splitobs((Xs, Ys); at=0.85)\n\n# Next we partition the data using a 10-fold scheme.\nfor (train_data, val_data) in kfolds(cv_data; k=10)\n\n    # We apply a lazy transform for data augmentation\n    train_data = mapobs(xy -> (xy[1] .+ 0.1 .* randn.(), xy[2]),  train_data)\n\n    for epoch = 1:10\n        # Iterate over the data using mini-batches of 5 observations each\n        for (x, y) in eachobs(train_data, batchsize=5)\n            # ... train supervised model on minibatches here\n        end\n    end\nend","category":"page"},{"location":"","page":"Home","title":"Home","text":"In the above code snippet, the inner loop for eachobs is the only place where data other than indices is actually being copied. In fact, while x and y are materialized arrays,  all the rest are data views. ","category":"page"},{"location":"#Historical-Notes","page":"Home","title":"Historical Notes","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"MLUtils.jl brings together functionalities previously found in LearnBase.jl , MLDataPattern.jl and MLLabelUtils.jl. These packages are now discontinued. ","category":"page"},{"location":"","page":"Home","title":"Home","text":"Other features were ported from the deep learning library Flux.jl, as they are of general use. ","category":"page"},{"location":"#Alternatives-and-Related-Packages","page":"Home","title":"Alternatives and Related Packages","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"MLJ.jl is a more complete package for managing the whole machine learning pipeline if you are looking for a sklearn replacement.\nNNlib.jl provides utility functions for neural networks.\nTableTransforms.jl contains transformations for tabular datasets.\nDataAugmentation.jl. Efficient, composable data augmentation for machine and deep learning with support for n-dimensional images, keypoints and categorical masks.","category":"page"}]
 }