Skip to content

Commit

Permalink
add make_moons generator + some doc reorg (#185)
Browse files Browse the repository at this point in the history
  • Loading branch information
CarloLucibello authored Jan 25, 2025
1 parent 5c20c7b commit bea6ec4
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 37 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
/Manifest.toml
docs/build
.vscode
/test.jl
87 changes: 57 additions & 30 deletions docs/src/api.md
Original file line number Diff line number Diff line change
@@ -1,61 +1,88 @@
```@meta
CollapsedDocStrings = true
```

# API Reference

## Index

```@index
Order = [:type, :function]
Modules = [MLUtils]
Pages = ["api.md"]
## Core API

```@docs
getobs
getobs!
numobs
```

## Docs
## Lazy Transforms

```@docs
filterobs
groupobs
joinobs
mapobs
shuffleobs
```

## Batching, Iteration, and Views

```@docs
batch
batchsize
batchseq
BatchView
chunk
DataLoader
eachobs
DataLoader
obsview
ObsView
randobs
```

## Partitioning

```@docs
leavepout
kfolds
splitobs
```

## Array Constructors

```@docs
falses_like
fill_like
filterobs
ones_like
trues_like
zeros_like
```

## Resampling

```@docs
oversample
undersample
```

## Operations

```@docs
chunk
flatten
getobs
getobs!
joinobs
group_counts
group_indices
groupobs
kfolds
leavepout
mapobs
numobs
normalise
obsview
ObsView
ones_like
oversample
randobs
rpad_constant
shuffleobs
splitobs
stack
unbatch
undersample
unsqueeze
unstack
zeros_like
```


## Datasets Docs
## Datasets

```@docs
Datasets.load_iris
Datasets.make_sin
Datasets.make_spiral
Datasets.make_poly
Datasets.make_moons
```


4 changes: 3 additions & 1 deletion src/Datasets/Datasets.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
module Datasets
using Random
using DelimitedFiles: readdlm
using MLUtils: getobs, shuffleobs

include("load_datasets.jl")
export load_iris

include("generators.jl")
export make_spiral,
make_poly,
make_sin
make_sin,
make_moons

end
67 changes: 61 additions & 6 deletions src/Datasets/generators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
make_sin(n, start, stop; noise = 0.3, f_rand = randn) -> x, y
Generates `n` noisy equally spaces samples of a sinus from `start` to `stop`
by adding `noise .* f_rand(length(x))` to the result of `fun(x)`.
by adding `noise .* f_rand(length(x))` to the result of `sin(x)`.
Returns the vector `x` with the samples and the noisy response `y`.
"""
function make_sin(n::Int = 50, start::Real = 0, stop::Real = 2π; noise::Real = 0.3, f_rand::Function = randn)
x = collect(range(start, stop=stop, length=n))
Expand All @@ -15,10 +17,15 @@ end
make_poly(coef, x; noise = 0.01, f_rand = randn) -> x, y
Generates a noisy response for a polynomial of degree `length(coef)`
using the vector `x` as input and adding `noise .* f_randn(length(x))` to the result.
and with the coefficients given by `coef`. The response is generated
by elmentwise computation of the polynome on the elements of `x`
and adding `noise .* f_randn(length(x))` to the result.
The vector `coef` contains the coefficients for the terms of the polynome.
The first element of `coef` denotes the coefficient for the term with
the highest degree, while the last element of `coef` denotes the intercept.
Return the input `x` and the noisy response `y`.
"""
function make_poly(coef::AbstractVector{R}, x::AbstractVector{T};
noise::Real = 0.1, f_rand::Function = randn) where {T<:Real,R<:Real}
Expand All @@ -32,16 +39,18 @@ function make_poly(coef::AbstractVector{R}, x::AbstractVector{T};
end
end
y .+= noise .* f_rand.()
x_vec, y
return x_vec, y
end


"""
make_spiral(n, a, theta, b; noise = 0.01, f_rand = randn) -> x, y
Generates `n` noisy responses for a spiral with two labels. Uses the radius, angle
and scaling arguments to space the points in 2D space and adding `noise .* f_randn(n)`
to the response.
Returns the 2 x n matrix `x` with the coordinates of the samples and the
vector `y` with the labels.
"""
function make_spiral(n::Int = 97, a::Real = 6.5, theta::Real = 16.0, b::Real=104.0;
noise::Real = 0.1, f_rand::Function = randn)
Expand All @@ -62,5 +71,51 @@ function make_spiral(n::Int = 97, a::Real = 6.5, theta::Real = 16.0, b::Real=104
end
x[1, :] .+= noise .* f_rand.()
x[2, :] .+= noise .* f_rand.()
x, y
end
return x, y
end


"""
make_moons(n; noise=0.0, f_rand=randn, shuffle=true) -> x, y
Generate a dataset with two interleaving half circles.
If `n` is an integer, the number of samples is `n` and the number of samples
for each half circle is `n ÷ 2`. If `n` is a tuple, the first element of the tuple
denotes the number of samples for the first half circle and the second element
denotes the number of samples for the second half circle.
The noise level can be controlled by the `noise` argument.
Set `shuffle=false` to keep the order of the samples.
Returns a 2 x n matrix with the the samples.
"""
function make_moons(n_samples::Union{Int, Tuple{Int, Int}} = 100;
noise::Number = 0.0, f_rand = randn, shuffle::Bool = true)

rng = Random.default_rng()
if n_samples isa Tuple
@assert length(n_samples) == 2
n_samples_1, n_samples_2 = n_samples
else
n_samples_1 = n_samples ÷ 2
n_samples_2 = n_samples - n_samples_1
end
t_min, t_max = 0.0, π
t_inner = rand(rng, n_samples_1) * (t_max - t_min) .+ t_min
t_outer = rand(rng, n_samples_2) * (t_max - t_min) .+ t_min
outer_circ_x = cos.(t_outer)
outer_circ_y = sin.(t_outer)
inner_circ_x = 1 .- cos.(t_inner)
inner_circ_y = 0.5 .- sin.(t_inner)

x = [outer_circ_x outer_circ_y; inner_circ_x inner_circ_y]
x = permutedims(x, (2, 1))
x .+= noise .* f_rand(rng, size(x))
y = [fill(1, n_samples_1); fill(2, n_samples_2)]
if shuffle
x, y = getobs(shuffleobs((x, y)))
end
return x, y
end
17 changes: 17 additions & 0 deletions test/Datasets/generators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,20 @@ end
# test_plot = scatterplot(xtmp[1, 1:97], xtmp[2, 1:97], title="Spiral Function", color=:blue, name="pos")
# print(scatterplot!(test_plot, xtmp[1, 98:194], xtmp[2, 98:194], color=:yellow, name="neg" ))
end


@testset "make_moons" begin
x, y = Datasets.make_moons(100, noise=0, shuffle=false)
@test size(x) == (2, 100)
@test size(y) == (100,)
@test all(==(1), y[1:50])
@test all(==(2), y[51:100])
@test minimum(x[1,1:50]) >= -1
@test maximum(x[1,1:50]) <= 1
@test minimum(x[2,1:50]) >= -1
@test maximum(x[2,1:50]) <= 1
@test minimum(x[1,51:100]) >= 0
@test maximum(x[1,51:100]) <= 2
@test minimum(x[2,51:100]) >= -0.5
@test maximum(x[2,51:100]) <= 0.5
end

0 comments on commit bea6ec4

Please sign in to comment.