From b4067eced05911620692300447d63255c21ae9e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 10:28:22 -0300 Subject: [PATCH 01/15] Refactor codebase --- Project.toml | 4 +- src/LearnBase.jl | 582 +---------------------------------------------- src/costs.jl | 296 ++++++++++++++++++++++++ src/other.jl | 384 +++++++++++++++++++++++++++++++ test/costs.jl | 44 ++++ test/other.jl | 235 +++++++++++++++++++ test/runtests.jl | 309 +------------------------ 7 files changed, 967 insertions(+), 887 deletions(-) create mode 100644 src/costs.jl create mode 100644 src/other.jl create mode 100644 test/costs.jl create mode 100644 test/other.jl diff --git a/Project.toml b/Project.toml index f59f20c..ab19398 100644 --- a/Project.toml +++ b/Project.toml @@ -3,11 +3,9 @@ uuid = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6" version = "0.3.0" [deps] -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" [compat] -StatsBase = "0.33" julia = "1" [extras] diff --git a/src/LearnBase.jl b/src/LearnBase.jl index e849aba..867db18 100644 --- a/src/LearnBase.jl +++ b/src/LearnBase.jl @@ -1,583 +1,11 @@ module LearnBase -# Only reexport required functions by default -import StatsBase: nobs, fit, fit!, predict, params, params! +using Markdown # for docstrings -import LinearAlgebra: issymmetric +# LEARNING COSTS (e.g. loss & penalty) +include("costs.jl") -""" -Baseclass for any kind of cost. Notable examples for -costs are `Loss` and `Penalty`. -""" -abstract type Cost end - -""" -Baseclass for all losses. A loss is some (possibly simplified) -function `L(features, targets, outputs)`, where `outputs` are the -result of some function `f(features)`. -""" -abstract type Loss <: Cost end - -""" -A loss is considered **supervised**, if all the information needed -to compute `L(features, targets, outputs)` are contained in -`targets` and `outputs`, and thus allows for the simplification -`L(targets, outputs)`. -""" -abstract type SupervisedLoss <: Loss end - -""" -A supervised loss, where the targets are in {-1, 1}, and which -can be simplified to `L(targets, outputs) = L(targets * outputs)` -is considered **margin-based**. -""" -abstract type MarginLoss <: SupervisedLoss end - - -""" -A supervised loss that can be simplified to -`L(targets, outputs) = L(targets - outputs)` is considered -distance-based. -""" -abstract type DistanceLoss <: SupervisedLoss end - -""" -A loss is considered **unsupervised**, if all the information needed -to compute `L(features, targets, outputs)` are contained in -`features` and `outputs`, and thus allows for the simplification -`L(features, outputs)`. -""" -abstract type UnsupervisedLoss <: Loss end - -abstract type Penalty <: Cost end - -function scaled end - -function value end -function value! end -function meanvalue end -function sumvalue end -function meanderiv end -function sumderiv end -function deriv end -function deriv2 end -function deriv! end -function value_deriv end -function addgrad! end -function value_grad end -function value_grad! end -function prox end -function prox! end - -"Return the gradient of the learnable parameters w.r.t. some objective" -function grad end -"Do a backward pass, updating the gradients of learnable parameters and/or inputs" -function grad! end - -function isminimizable end -function isdifferentiable end -function istwicedifferentiable end -function isconvex end -function isstrictlyconvex end -function isstronglyconvex end -function isnemitski end -function isunivfishercons end -function isfishercons end -function islipschitzcont end -function islocallylipschitzcont end -function islipschitzcont_deriv end # maybe overkill -function isclipable end -function ismarginbased end -function isclasscalibrated end -function isdistancebased end - -# fallback to supersets -isstrictlyconvex(any) = isstronglyconvex(any) -isconvex(any) = isstrictlyconvex(any) -islocallylipschitzcont(any) = islipschitzcont(any) - -""" -Anything that takes an input and performs some kind -of function to produce an output. For example a linear -prediction function. -""" -abstract type Transformation end -abstract type StochasticTransformation <: Transformation end -abstract type Learnable <: Transformation end - -function transform end -"Do a forward pass, and return the output" -function transform! end - -""" -Baseclass for any prediction model that can be minimized. -This means that an object of a subclass contains all the -information needed to compute its own current loss. -""" -abstract type Minimizable <: Learnable end - -function update end -function update! end -function learn end -function learn! end - -# getobs(data, [idx], [obsdim]) -# -# Return the observations corresponding to the observation-index -# `idx`. Note that `idx` can be of type `Int` or `AbstractVector`. -# Both options must be supported by a custom type. -# -# The returned observation(s) should be in the form intended to -# be passed as-is to some learning algorithm. There is no strict -# interface requirement on how this "actual data" must look like. -# Every author behind some custom data container can make this -# decision him-/herself. We do, however, expect it to be consistent -# for `idx` being an integer, as well as `idx` being an abstract -# vector, respectively. -# -# If it makes sense for the type of `data`, `obsdim` can be used -# to disptach on which dimension of `data` denotes the observations. -# See `?ObsDim` -# -# This function is implemented in MLDataPattern -function getobs end - -# getobs!(buffer, data, [idx], [obsdim]) -# -# Inplace version of `getobs(data, idx, obsdim)`. If this method -# is defined for the type of `data`, then `buffer` should be used -# to store the result, instead of allocating a dedicated object. -# -# Implementing this function is optional. In the case no such -# method is provided for the type of `data`, then `buffer` will be -# *ignored* and the result of `getobs` returned. This could be -# because the type of `data` may not lend itself to the concept -# of `copy!`. Thus supporting a custom `getobs!(::MyType, ...)` -# is optional and not required. -# -# If it makes sense for the type of `data`, `obsdim` can be used -# to disptach on which dimension of `data` denotes the observations. -# See `?ObsDim` -# -# This function is implemented in MLDataPattern -function getobs! end - -# -------------------------------------------------------------------- - -# gettarget([f], observation) -# -# Use `f` (if provided) to extract the target from the single -# `observation` and return it. It is used internally by -# `targets` (only if `f` is provided) and by -# `eachtarget` (always) on each individual observation. -# -# Even though this function is not exported, it is intended to be -# extended by users to support their custom data storage types. -# -# This function is implemented in MLDataPattern -function gettarget end # not exported - -# gettargets(data, [idx], [obsdim]) -# -# Return the targets corresponding to the observation-index `idx`. -# Note that `idx` can be of type `Int` or `AbstractVector`. -# -# Implementing this function for a custom type of `data` is -# optional. It is particularly useful if the targets in `data` can -# be provided without invoking `getobs`. For example if you have a -# remote data-source where the labels are part of some metadata -# that is locally available. -# -# If it makes sense for the type of `data`, `obsdim` can be used -# to disptach on which dimension of `data` denotes the observations. -# See `?ObsDim` -# -# This function is implemented in MLDataPattern -function gettargets end # not exported - -# targets([f], data, [obsdim]) -# -# This function is implemented in MLDataPattern -function targets end - -# -------------------------------------------------------------------- - -""" - abstract DataView{TElem, TData} <: AbstractVector{TElem} - -Baseclass for all vector-like views of some data structure. -This allow for example to see some design matrix as a vector of -individual observation-vectors instead of one matrix. - -see `MLDataPattern.ObsView` and `MLDataPattern.BatchView` for examples. -""" -abstract type DataView{TElem, TData} <: AbstractVector{TElem} end - -""" - abstract AbstractObsView{TElem, TData} <: DataView{TElem, TData} - -Baseclass for all vector-like views of some data structure, -that views it as some form or vector of observations. - -see `MLDataPattern.ObsView` for a concrete example. -""" -abstract type AbstractObsView{TElem, TData} <: DataView{TElem, TData} end - -""" - abstract AbstractBatchView{TElem, TData} <: DataView{TElem, TData} - -Baseclass for all vector-like views of some data structure, -that views it as some form or vector of equally sized batches. - -see `MLDataPattern.BatchView` for a concrete example. -""" -abstract type AbstractBatchView{TElem, TData} <: DataView{TElem, TData} end - -# -------------------------------------------------------------------- - -""" - abstract DataIterator{TElem,TData} - -Baseclass for all types that iterate over a `data` source -in some manner. The total number of observations may or may -not be known or defined and in general there is no contract that -`getobs` or `nobs` has to be supported by the type of `data`. -Furthermore, `length` should be used to query how many elements -the iterator can provide, while `nobs` may return the underlying -true amount of observations available (if known). - -see `MLDataPattern.RandomObs`, `MLDataPattern.RandomBatches` -""" -abstract type DataIterator{TElem,TData} end - -""" - abstract ObsIterator{TElem,TData} <: DataIterator{TElem,TData} - -Baseclass for all types that iterate over some data source -one observation at a time. - -```julia -using MLDataPattern -@assert typeof(RandomObs(X)) <: ObsIterator - -for x in RandomObs(X) - # ... -end -``` - -see `MLDataPattern.RandomObs` -""" -abstract type ObsIterator{TElem,TData} <: DataIterator{TElem,TData} end - -""" - abstract BatchIterator{TElem,TData} <: DataIterator{TElem,TData} - -Baseclass for all types that iterate over of some data source one -batch at a time. - -```julia -@assert typeof(RandomBatches(X, size=10)) <: BatchIterator - -for x in RandomBatches(X, size=10) - @assert nobs(x) == 10 - # ... -end -``` - -see `MLDataPattern.RandomBatches` -""" -abstract type BatchIterator{TElem,TData} <: DataIterator{TElem,TData} end - -# -------------------------------------------------------------------- - -# just for dispatch for those who care to -const AbstractDataIterator{E,T} = Union{DataIterator{E,T}, DataView{E,T}} -const AbstractObsIterator{E,T} = Union{ObsIterator{E,T}, AbstractObsView{E,T}} -const AbstractBatchIterator{E,T} = Union{BatchIterator{E,T},AbstractBatchView{E,T}} - -# -------------------------------------------------------------------- - -# datasubset(data, [idx], [obsdim]) -# -# Return a lazy subset of the observations in `data` that correspond -# to the given `idx`. No data should be copied except of the -# indices. Note that `idx` can be of type `Int` or `AbstractVector`. -# Both options must be supported by a custom type. -# -# If it makes sense for the type of `data`, `obsdim` can be used -# to disptach on which dimension of `data` denotes the observations. -# See `?ObsDim` -# -# This function is implemented in MLDataPattern -function datasubset end - -""" - default_obsdim(data) - -The specify the default obsdim for a specific type of data. -Defaults to `ObsDim.Undefined()` -""" -function default_obsdim end - -# just for dispatch for those who care to -"see `?ObsDim`" -abstract type ObsDimension end - -""" - module ObsDim - -Singleton types to define which dimension of some data structure -(e.g. some `Array`) denotes the observations. - -- `ObsDim.First()` -- `ObsDim.Last()` -- `ObsDim.Constant(dim)` - -Used for efficient dispatching -""" -module ObsDim - using ..LearnBase: ObsDimension - - """ - Default value for most functions. Denotes that the concept of - an observation dimension is not defined for the given data. - """ - struct Undefined <: ObsDimension end - - """ - ObsDim.Last <: ObsDimension - - Defines that the last dimension denotes the observations - """ - struct Last <: ObsDimension end - - """ - ObsDim.Constant{DIM} <: ObsDimension - - Defines that the dimension `DIM` denotes the observations - """ - struct Constant{DIM} <: ObsDimension end - Constant(dim::Int) = Constant{dim}() - - """ - ObsDim.First <: ObsDimension - - Defines that the first dimension denotes the observations - """ - const First = Constant{1} -end - -Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim")) -Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim -Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined() -Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim) -Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim))) -Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims) -function Base.convert(::Type{ObsDimension}, dim::Symbol) - if dim == :first || dim == :begin - ObsDim.First() - elseif dim == Symbol("end") || dim == :last - ObsDim.Last() - elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined - ObsDim.Undefined() - else - throw(ArgumentError("Unknown way to specify a obsdim: $dim")) - end -end - -@deprecate obs_dim(dim) convert(ObsDimension, dim) - -default_obsdim(data) = ObsDim.Undefined() -default_obsdim(A::AbstractArray) = ObsDim.Last() -default_obsdim(tup::Tuple) = map(default_obsdim, tup) - - -import Base: AbstractSet - -"A continuous range (inclusive) between a lo and a hi" -struct IntervalSet{T} <: AbstractSet{T} - lo::T - hi::T -end -function IntervalSet(lo::A, hi::B) where {A,B} - T = promote_type(A,B) - IntervalSet{T}(convert(T,lo), convert(T,hi)) -end - -# numeric interval -randtype(s::IntervalSet{T}) where T <: Number = Float64 -Base.rand(s::IntervalSet{T}, dims::Integer...) where T <: Number = rand(dims...) .* (s.hi - s.lo) .+ s.lo -Base.in(x::Number, s::IntervalSet{T}) where T <: Number = s.lo <= x <= s.hi -Base.length(s::IntervalSet{T}) where T <: Number = 1 -Base.:(==)(s1::IntervalSet{T}, s2::IntervalSet{T}) where T = s1.lo == s2.lo && s1.hi == s2.hi - - -# vector of intervals -randtype(s::IntervalSet{T}) where T <: AbstractVector = Vector{Float64} -Base.rand(s::IntervalSet{T}) where T <: AbstractVector = Float64[rand() * (s.hi[i] - s.lo[i]) + s.lo[i] for i=1:length(s)] -Base.in(x::AbstractVector, s::IntervalSet{T}) where T <: AbstractVector = all(i -> s.lo[i] <= x[i] <= s.hi[i], 1:length(s)) -Base.length(s::IntervalSet{T}) where T <: AbstractVector = length(s.lo) - - -"Set of discrete items" -struct DiscreteSet{T<:AbstractArray} <: AbstractSet{T} - items::T -end -randtype(s::DiscreteSet) = eltype(s.items) -Base.rand(s::DiscreteSet, dims::Integer...) = rand(s.items, dims...) -Base.in(x, s::DiscreteSet) = x in s.items -Base.length(s::DiscreteSet) = length(s.items) -Base.getindex(s::DiscreteSet, i::Int) = s.items[i] -Base.:(==)(s1::DiscreteSet, s2::DiscreteSet) = s1.items == s2.items - - -# operations on arrays of sets -randtype(sets::AbstractArray{S,N}) where {S <: AbstractSet, N} = Array{promote_type(map(randtype, sets)...), N} -Base.rand(sets::AbstractArray{S}) where S <: AbstractSet = eltype(randtype(sets))[rand(s) for s in sets] -function Base.rand(sets::AbstractArray{S}, dim1::Integer, dims::Integer...) where S <: AbstractSet - A = Array{randtype(sets)}(undef, dim1, dims...) - for i in eachindex(A) - A[i] = rand(sets) - end - A -end -function Base.in(xs::AbstractArray, sets::AbstractArray{S}) where S <: AbstractSet - size(xs) == size(sets) && all(map(in, xs, sets)) -end - - -"Groups several heterogenous sets. Used mainly for proper dispatch." -struct TupleSet{T<:Tuple} <: AbstractSet{T} - sets::T -end -TupleSet(sets::AbstractSet...) = TupleSet(sets) - -# rand can return arrays or tuples, but defaults to arrays -randtype(sets::TupleSet, ::Type{Vector}) = Vector{promote_type(map(randtype, sets.sets)...)} -Base.rand(sets::TupleSet, ::Type{Vector}) = eltype(randtype(sets, Vector))[rand(s) for s in sets.sets] -randtype(sets::TupleSet, ::Type{Tuple}) = Tuple{map(randtype, sets.sets)...} -Base.rand(sets::TupleSet, ::Type{Tuple}) = map(rand, sets.sets) -function Base.rand(sets::TupleSet, ::Type{OT}, dim1::Integer, dims::Integer...) where OT - A = Array{randtype(sets, OT)}(undef, dim1, dims...) - for i in eachindex(A) - A[i] = rand(sets, OT) - end - A -end -Base.length(sets::TupleSet) = sum(length(s) for s in sets.sets) -Base.iterate(sets::TupleSet) = iterate(sets.sets) -Base.iterate(sets::TupleSet, i) = iterate(sets.sets, i) - -randtype(sets::TupleSet) = randtype(sets, Vector) -Base.rand(sets::TupleSet, dims::Integer...) = rand(sets, Vector, dims...) -Base.in(x, sets::TupleSet) = all(map(in, x, sets.sets)) - -"Returns an AbstractSet representing valid input values" -function inputdomain end - -"Returns an AbstractSet representing valid output/target values" -function targetdomain end - - -export - - # Types - Cost, - Loss, - SupervisedLoss, - MarginLoss, - DistanceLoss, - UnsupervisedLoss, - Penalty, - - Transformation, - Learnable, - StochasticTransformation, - - Minimizable, - - AbstractSet, - IntervalSet, - DiscreteSet, - TupleSet, - - # Functions - getobs, - getobs!, - - scaled, - - learn, - learn!, - update, - update!, - transform, - transform!, - value, - value!, - meanvalue, - sumvalue, - meanderiv, - sumderiv, - deriv, - deriv!, - params, - params!, - grad, - grad!, - addgrad!, - deriv2, - value_deriv, - value_deriv!, - value_grad, - value_grad!, - prox, - prox!, - inputdomain, - targetdomain, - - isminimizable, - isdifferentiable, - istwicedifferentiable, - isconvex, - isstrictlyconvex, - isstronglyconvex, - isnemitski, - isunivfishercons, - isfishercons, - islipschitzcont, - islocallylipschitzcont, - islipschitzcont_deriv, - isclipable, - ismarginbased, - isclasscalibrated, - isdistancebased, - - # LinearAlgebra - issymmetric, - - getobs, - getobs!, - datasubset, - ObsDim, - - targets, - - AbstractDataIterator, - AbstractObsIterator, - AbstractBatchIterator, - - DataView, - AbstractObsView, - AbstractBatchView, - - DataIterator, - ObsIterator, - BatchIterator, - - # StatsBase - fit, - fit!, - nobs +# OTHER CONCEPTS +include("other.jl") end # module diff --git a/src/costs.jl b/src/costs.jl new file mode 100644 index 0000000..525b4e3 --- /dev/null +++ b/src/costs.jl @@ -0,0 +1,296 @@ +@doc doc""" +Baseclass for any kind of cost. Notable examples for +costs are `Loss` and `Penalty`. +""" +abstract type Cost end + +@doc doc""" +Baseclass for all losses. A loss is some (possibly simplified) +function `L(features, targets, outputs)`, where `outputs` are the +result of some function `f(features)`. +""" +abstract type Loss <: Cost end + +@doc doc""" +A loss is considered **supervised**, if all the information needed +to compute `L(features, targets, outputs)` are contained in +`targets` and `outputs`, and thus allows for the simplification +`L(targets, outputs)`. +""" +abstract type SupervisedLoss <: Loss end + +@doc doc""" +A supervised loss is considered **unary** if it can be written as a composition +`L(Ψ(output, target))` for some binary function `Ψ`. In this case the loss can +be evaluated with a single argument termed the **agreement** `Ψ(output, target)`. +Notable examples for unary supervised losses are distance-based (`Ψ(ŷ,y) = ŷ - y`) +and margin-based (`Ψ(ŷ,y) = ŷ*y`) losses. +""" +abstract type UnarySupervisedLoss <: SupervisedLoss end + +@doc doc""" +A supervised loss that can be simplified to +`L(targets, outputs) = L(targets - outputs)` is considered +**distance-based**. +""" +abstract type DistanceLoss <: UnarySupervisedLoss end + +@doc doc""" +A supervised loss, where the targets are in {-1, 1}, and which +can be simplified to `L(targets, outputs) = L(targets * outputs)` +is considered **margin-based**. +""" +abstract type MarginLoss <: UnarySupervisedLoss end + +@doc doc""" +A loss is considered **unsupervised**, if all the information needed +to compute `L(features, targets, outputs)` are contained in +`features` and `outputs`, and thus allows for the simplification +`L(features, outputs)`. +""" +abstract type UnsupervisedLoss <: Loss end + +@doc doc""" +Baseclass for all penalties. +""" +abstract type Penalty <: Cost end + +function value end +function value! end + +function deriv end +function deriv! end + +function deriv2 end +function deriv2! end + +function value_deriv end +function value_deriv! end + +@doc doc""" + isconvex(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` denotes a convex function. +A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is convex if +its domain is a convex set and if for all ``x, y`` in that +domain, with ``\theta`` such that for ``0 \leq \theta \leq 1``, +we have + +```math +f(\theta x + (1 - \theta) y) \leq \theta f(x) + (1 - \theta) f(y) +``` +""" +isconvex(l) = isstrictlyconvex(l) + +@doc doc""" + isstrictlyconvex(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` denotes a strictly convex function. +A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is +strictly convex if its domain is a convex set and if for all +``x, y`` in that domain where ``x \neq y``, with +``\theta`` such that for ``0 < \theta < 1``, we have + +```math +f(\theta x + (1 - \theta) y) < \theta f(x) + (1 - \theta) f(y) +``` +""" +isstrictlyconvex(l) = isstronglyconvex(l) + +@doc doc""" + isstronglyconvex(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` denotes a strongly convex function. +A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is +``m``-strongly convex if its domain is a convex set and if +``\forall x,y \in`` **dom** ``f`` where ``x \neq y``, +and ``\theta`` such that for ``0 \le \theta \le 1`` , we have + +```math +f(\theta x + (1 - \theta)y) < \theta f(x) + (1 - \theta) f(y) - 0.5 m \cdot \theta (1 - \theta) {\| x - y \|}_2^2 +``` + +In a more familiar setting, if the loss function is +differentiable we have + +```math +\left( \nabla f(x) - \nabla f(y) \right)^\top (x - y) \ge m {\| x - y\|}_2^2 +``` +""" +isstronglyconvex(::SupervisedLoss) = false + +@doc doc""" + isdifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool + +Return `true` if the given `loss` is differentiable +(optionally limited to the given point `x` if specified). + +A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}^{m}`` is +differentiable at a point ``x \in`` **int dom** ``f``, if there +exists a matrix ``Df(x) \in \mathbb{R}^{m \times n}`` such that +it satisfies: + +```math +\lim_{z \neq x, z \to x} \frac{{\|f(z) - f(x) - Df(x)(z-x)\|}_2}{{\|z - x\|}_2} = 0 +``` + +A function is differentiable if its domain is open and it is +differentiable at every point ``x``. +""" +isdifferentiable(l::SupervisedLoss) = istwicedifferentiable(l) +isdifferentiable(l::SupervisedLoss, at) = isdifferentiable(l) + +@doc doc""" + istwicedifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool + +Return `true` if the given `loss` is differentiable +(optionally limited to the given point `x` if specified). + +A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}`` is +said to be twice differentiable at a point ``x \in`` **int +dom** ``f``, if the function derivative for ``\nabla f`` +exists at ``x``. + +```math +\nabla^2 f(x) = D \nabla f(x) +``` + +A function is twice differentiable if its domain is open and it +is twice differentiable at every point ``x``. +""" +istwicedifferentiable(::SupervisedLoss) = false +istwicedifferentiable(l::SupervisedLoss, at) = istwicedifferentiable(l) + +@doc doc""" + islocallylipschitzcont(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` function is locally-Lipschitz +continous. + +A supervised loss ``L : Y \times \mathbb{R} \rightarrow [0, \infty)`` +is called locally Lipschitz continuous if ``\forall a \ge 0`` +there exists a constant :math:`c_a \ge 0`, such that + +```math +\sup_{y \in Y} \left| L(y,t) − L(y,t′) \right| \le c_a |t − t′|, \qquad t,t′ \in [−a,a] +``` + +Every convex function is locally lipschitz continuous. +""" +islocallylipschitzcont(l) = isconvex(l) || islipschitzcont(l) + +@doc doc""" + islipschitzcont(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` function is Lipschitz continuous. + +A supervised loss function ``L : Y \times \mathbb{R} \rightarrow +[0, \infty)`` is Lipschitz continous, if there exists a finite +constant ``M < \infty`` such that + +```math +|L(y, t) - L(y, t′)| \le M |t - t′|, \qquad \forall (y, t) \in Y \times \mathbb{R} +``` +""" +islipschitzcont(::SupervisedLoss) = false + +@doc doc""" + isnemitski(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` denotes a Nemitski loss function. + +We call a supervised loss function ``L : Y \times \mathbb{R} +\rightarrow [0,\infty)`` a Nemitski loss if there exist a +measurable function ``b : Y \rightarrow [0, \infty)`` and an +increasing function ``h : [0, \infty) \rightarrow [0, \infty)`` +such that + +```math +L(y,\hat{y}) \le b(y) + h(|\hat{y}|), \qquad (y, \hat{y}) \in Y \times \mathbb{R}. +``` + +If a loss if locally lipsschitz continuous then it is a Nemitski loss +""" +isnemitski(l::SupervisedLoss) = islocallylipschitzcont(l) + +@doc doc""" + isclipable(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` function is clipable. A +supervised loss ``L : Y \times \mathbb{R} \rightarrow [0, +\infty)`` can be clipped at ``M > 0`` if, for all ``(y,t) +\in Y \times \mathbb{R}``, + +```math +L(y, \hat{t}) \le L(y, t) +``` + +where ``\hat{t}`` denotes the clipped value of ``t`` at +``\pm M``. That is + +```math +\hat{t} = \begin{cases} -M & \quad \text{if } t < -M \\ t & \quad \text{if } t \in [-M, M] \\ M & \quad \text{if } t > M \end{cases} +``` +""" +isclipable(::SupervisedLoss) = false + +@doc doc""" + isdistancebased(loss::SupervisedLoss) -> Bool + +Return `true` ifthe given `loss` is a distance-based loss. + +A supervised loss function ``L : Y \times \mathbb{R} \rightarrow +[0, \infty)`` is said to be **distance-based**, if there exists a +representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)`` +satisfying ``\psi (0) = 0`` and + +```math +L(y, \hat{y}) = \psi (\hat{y} - y), \qquad (y, \hat{y}) \in Y \times \mathbb{R} +``` +""" +isdistancebased(::SupervisedLoss) = false + +@doc doc""" + ismarginbased(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` is a margin-based loss. + +A supervised loss function ``L : Y \times \mathbb{R} \rightarrow +[0, \infty)`` is said to be **margin-based**, if there exists a +representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)`` +satisfying + +```math +L(y, \hat{y}) = \psi (y \cdot \hat{y}), \qquad (y, \hat{y}) \in Y \times \mathbb{R} +``` +""" +ismarginbased(::SupervisedLoss) = false + +@doc doc""" + isclasscalibrated(loss::SupervisedLoss) -> Bool +""" +isclasscalibrated(::SupervisedLoss) = false + +@doc doc""" + issymmetric(loss::SupervisedLoss) -> Bool + +Return `true` if the given loss is a symmetric loss. + +A function ``f : \mathbb{R} \rightarrow [0,\infty)`` is said +to be symmetric about origin if we have + +```math +f(x) = f(-x), \qquad \forall x \in \mathbb{R} +``` + +A distance-based loss is said to be symmetric if its representing +function is symmetric. +""" +issymmetric(::SupervisedLoss) = false + +@doc doc""" + isminimizable(loss::SupervisedLoss) -> Bool + +Return `true` if the given `loss` is a minimizable loss. +""" +isminimizable(l) = isconvex(l) diff --git a/src/other.jl b/src/other.jl new file mode 100644 index 0000000..ec7e87a --- /dev/null +++ b/src/other.jl @@ -0,0 +1,384 @@ +"Return the gradient of the learnable parameters w.r.t. some objective" +function grad end +"Do a backward pass, updating the gradients of learnable parameters and/or inputs" +function grad! end + +""" +Anything that takes an input and performs some kind +of function to produce an output. For example a linear +prediction function. +""" +abstract type Transformation end +abstract type StochasticTransformation <: Transformation end +abstract type Learnable <: Transformation end + +function transform end +"Do a forward pass, and return the output" +function transform! end + +""" +Baseclass for any prediction model that can be minimized. +This means that an object of a subclass contains all the +information needed to compute its own current loss. +""" +abstract type Minimizable <: Learnable end + +function update end +function update! end +function learn end +function learn! end + +# getobs(data, [idx], [obsdim]) +# +# Return the observations corresponding to the observation-index +# `idx`. Note that `idx` can be of type `Int` or `AbstractVector`. +# Both options must be supported by a custom type. +# +# The returned observation(s) should be in the form intended to +# be passed as-is to some learning algorithm. There is no strict +# interface requirement on how this "actual data" must look like. +# Every author behind some custom data container can make this +# decision him-/herself. We do, however, expect it to be consistent +# for `idx` being an integer, as well as `idx` being an abstract +# vector, respectively. +# +# If it makes sense for the type of `data`, `obsdim` can be used +# to disptach on which dimension of `data` denotes the observations. +# See `?ObsDim` +# +# This function is implemented in MLDataPattern +function getobs end + +# getobs!(buffer, data, [idx], [obsdim]) +# +# Inplace version of `getobs(data, idx, obsdim)`. If this method +# is defined for the type of `data`, then `buffer` should be used +# to store the result, instead of allocating a dedicated object. +# +# Implementing this function is optional. In the case no such +# method is provided for the type of `data`, then `buffer` will be +# *ignored* and the result of `getobs` returned. This could be +# because the type of `data` may not lend itself to the concept +# of `copy!`. Thus supporting a custom `getobs!(::MyType, ...)` +# is optional and not required. +# +# If it makes sense for the type of `data`, `obsdim` can be used +# to disptach on which dimension of `data` denotes the observations. +# See `?ObsDim` +# +# This function is implemented in MLDataPattern +function getobs! end + +# -------------------------------------------------------------------- + +# gettarget([f], observation) +# +# Use `f` (if provided) to extract the target from the single +# `observation` and return it. It is used internally by +# `targets` (only if `f` is provided) and by +# `eachtarget` (always) on each individual observation. +# +# Even though this function is not exported, it is intended to be +# extended by users to support their custom data storage types. +# +# This function is implemented in MLDataPattern +function gettarget end # not exported + +# gettargets(data, [idx], [obsdim]) +# +# Return the targets corresponding to the observation-index `idx`. +# Note that `idx` can be of type `Int` or `AbstractVector`. +# +# Implementing this function for a custom type of `data` is +# optional. It is particularly useful if the targets in `data` can +# be provided without invoking `getobs`. For example if you have a +# remote data-source where the labels are part of some metadata +# that is locally available. +# +# If it makes sense for the type of `data`, `obsdim` can be used +# to disptach on which dimension of `data` denotes the observations. +# See `?ObsDim` +# +# This function is implemented in MLDataPattern +function gettargets end # not exported + +# targets([f], data, [obsdim]) +# +# This function is implemented in MLDataPattern +function targets end + +# -------------------------------------------------------------------- + +""" + abstract DataView{TElem, TData} <: AbstractVector{TElem} + +Baseclass for all vector-like views of some data structure. +This allow for example to see some design matrix as a vector of +individual observation-vectors instead of one matrix. + +see `MLDataPattern.ObsView` and `MLDataPattern.BatchView` for examples. +""" +abstract type DataView{TElem, TData} <: AbstractVector{TElem} end + +""" + abstract AbstractObsView{TElem, TData} <: DataView{TElem, TData} + +Baseclass for all vector-like views of some data structure, +that views it as some form or vector of observations. + +see `MLDataPattern.ObsView` for a concrete example. +""" +abstract type AbstractObsView{TElem, TData} <: DataView{TElem, TData} end + +""" + abstract AbstractBatchView{TElem, TData} <: DataView{TElem, TData} + +Baseclass for all vector-like views of some data structure, +that views it as some form or vector of equally sized batches. + +see `MLDataPattern.BatchView` for a concrete example. +""" +abstract type AbstractBatchView{TElem, TData} <: DataView{TElem, TData} end + +# -------------------------------------------------------------------- + +""" + abstract DataIterator{TElem,TData} + +Baseclass for all types that iterate over a `data` source +in some manner. The total number of observations may or may +not be known or defined and in general there is no contract that +`getobs` or `nobs` has to be supported by the type of `data`. +Furthermore, `length` should be used to query how many elements +the iterator can provide, while `nobs` may return the underlying +true amount of observations available (if known). + +see `MLDataPattern.RandomObs`, `MLDataPattern.RandomBatches` +""" +abstract type DataIterator{TElem,TData} end + +""" + abstract ObsIterator{TElem,TData} <: DataIterator{TElem,TData} + +Baseclass for all types that iterate over some data source +one observation at a time. + +```julia +using MLDataPattern +@assert typeof(RandomObs(X)) <: ObsIterator + +for x in RandomObs(X) + # ... +end +``` + +see `MLDataPattern.RandomObs` +""" +abstract type ObsIterator{TElem,TData} <: DataIterator{TElem,TData} end + +""" + abstract BatchIterator{TElem,TData} <: DataIterator{TElem,TData} + +Baseclass for all types that iterate over of some data source one +batch at a time. + +```julia +@assert typeof(RandomBatches(X, size=10)) <: BatchIterator + +for x in RandomBatches(X, size=10) + @assert nobs(x) == 10 + # ... +end +``` + +see `MLDataPattern.RandomBatches` +""" +abstract type BatchIterator{TElem,TData} <: DataIterator{TElem,TData} end + +# -------------------------------------------------------------------- + +# just for dispatch for those who care to +const AbstractDataIterator{E,T} = Union{DataIterator{E,T}, DataView{E,T}} +const AbstractObsIterator{E,T} = Union{ObsIterator{E,T}, AbstractObsView{E,T}} +const AbstractBatchIterator{E,T} = Union{BatchIterator{E,T},AbstractBatchView{E,T}} + +# -------------------------------------------------------------------- + +# datasubset(data, [idx], [obsdim]) +# +# Return a lazy subset of the observations in `data` that correspond +# to the given `idx`. No data should be copied except of the +# indices. Note that `idx` can be of type `Int` or `AbstractVector`. +# Both options must be supported by a custom type. +# +# If it makes sense for the type of `data`, `obsdim` can be used +# to disptach on which dimension of `data` denotes the observations. +# See `?ObsDim` +# +# This function is implemented in MLDataPattern +function datasubset end + +""" + default_obsdim(data) + +The specify the default obsdim for a specific type of data. +Defaults to `ObsDim.Undefined()` +""" +function default_obsdim end + +# just for dispatch for those who care to +"see `?ObsDim`" +abstract type ObsDimension end + +""" + module ObsDim + +Singleton types to define which dimension of some data structure +(e.g. some `Array`) denotes the observations. + +- `ObsDim.First()` +- `ObsDim.Last()` +- `ObsDim.Constant(dim)` + +Used for efficient dispatching +""" +module ObsDim + using ..LearnBase: ObsDimension + + """ + Default value for most functions. Denotes that the concept of + an observation dimension is not defined for the given data. + """ + struct Undefined <: ObsDimension end + + """ + ObsDim.Last <: ObsDimension + + Defines that the last dimension denotes the observations + """ + struct Last <: ObsDimension end + + """ + ObsDim.Constant{DIM} <: ObsDimension + + Defines that the dimension `DIM` denotes the observations + """ + struct Constant{DIM} <: ObsDimension end + Constant(dim::Int) = Constant{dim}() + + """ + ObsDim.First <: ObsDimension + + Defines that the first dimension denotes the observations + """ + const First = Constant{1} +end + +Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim")) +Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim +Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined() +Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim) +Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim))) +Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims) +function Base.convert(::Type{ObsDimension}, dim::Symbol) + if dim == :first || dim == :begin + ObsDim.First() + elseif dim == Symbol("end") || dim == :last + ObsDim.Last() + elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined + ObsDim.Undefined() + else + throw(ArgumentError("Unknown way to specify a obsdim: $dim")) + end +end + +default_obsdim(data) = ObsDim.Undefined() +default_obsdim(A::AbstractArray) = ObsDim.Last() +default_obsdim(tup::Tuple) = map(default_obsdim, tup) + + +import Base: AbstractSet + +"A continuous range (inclusive) between a lo and a hi" +struct IntervalSet{T} <: AbstractSet{T} + lo::T + hi::T +end +function IntervalSet(lo::A, hi::B) where {A,B} + T = promote_type(A,B) + IntervalSet{T}(convert(T,lo), convert(T,hi)) +end + +# numeric interval +randtype(s::IntervalSet{T}) where T <: Number = Float64 +Base.rand(s::IntervalSet{T}, dims::Integer...) where T <: Number = rand(dims...) .* (s.hi - s.lo) .+ s.lo +Base.in(x::Number, s::IntervalSet{T}) where T <: Number = s.lo <= x <= s.hi +Base.length(s::IntervalSet{T}) where T <: Number = 1 +Base.:(==)(s1::IntervalSet{T}, s2::IntervalSet{T}) where T = s1.lo == s2.lo && s1.hi == s2.hi + + +# vector of intervals +randtype(s::IntervalSet{T}) where T <: AbstractVector = Vector{Float64} +Base.rand(s::IntervalSet{T}) where T <: AbstractVector = Float64[rand() * (s.hi[i] - s.lo[i]) + s.lo[i] for i=1:length(s)] +Base.in(x::AbstractVector, s::IntervalSet{T}) where T <: AbstractVector = all(i -> s.lo[i] <= x[i] <= s.hi[i], 1:length(s)) +Base.length(s::IntervalSet{T}) where T <: AbstractVector = length(s.lo) + + +"Set of discrete items" +struct DiscreteSet{T<:AbstractArray} <: AbstractSet{T} + items::T +end +randtype(s::DiscreteSet) = eltype(s.items) +Base.rand(s::DiscreteSet, dims::Integer...) = rand(s.items, dims...) +Base.in(x, s::DiscreteSet) = x in s.items +Base.length(s::DiscreteSet) = length(s.items) +Base.getindex(s::DiscreteSet, i::Int) = s.items[i] +Base.:(==)(s1::DiscreteSet, s2::DiscreteSet) = s1.items == s2.items + + +# operations on arrays of sets +randtype(sets::AbstractArray{S,N}) where {S <: AbstractSet, N} = Array{promote_type(map(randtype, sets)...), N} +Base.rand(sets::AbstractArray{S}) where S <: AbstractSet = eltype(randtype(sets))[rand(s) for s in sets] +function Base.rand(sets::AbstractArray{S}, dim1::Integer, dims::Integer...) where S <: AbstractSet + A = Array{randtype(sets)}(undef, dim1, dims...) + for i in eachindex(A) + A[i] = rand(sets) + end + A +end +function Base.in(xs::AbstractArray, sets::AbstractArray{S}) where S <: AbstractSet + size(xs) == size(sets) && all(map(in, xs, sets)) +end + + +"Groups several heterogenous sets. Used mainly for proper dispatch." +struct TupleSet{T<:Tuple} <: AbstractSet{T} + sets::T +end +TupleSet(sets::AbstractSet...) = TupleSet(sets) + +# rand can return arrays or tuples, but defaults to arrays +randtype(sets::TupleSet, ::Type{Vector}) = Vector{promote_type(map(randtype, sets.sets)...)} +Base.rand(sets::TupleSet, ::Type{Vector}) = eltype(randtype(sets, Vector))[rand(s) for s in sets.sets] +randtype(sets::TupleSet, ::Type{Tuple}) = Tuple{map(randtype, sets.sets)...} +Base.rand(sets::TupleSet, ::Type{Tuple}) = map(rand, sets.sets) +function Base.rand(sets::TupleSet, ::Type{OT}, dim1::Integer, dims::Integer...) where OT + A = Array{randtype(sets, OT)}(undef, dim1, dims...) + for i in eachindex(A) + A[i] = rand(sets, OT) + end + A +end +Base.length(sets::TupleSet) = sum(length(s) for s in sets.sets) +Base.iterate(sets::TupleSet) = iterate(sets.sets) +Base.iterate(sets::TupleSet, i) = iterate(sets.sets, i) + +randtype(sets::TupleSet) = randtype(sets, Vector) +Base.rand(sets::TupleSet, dims::Integer...) = rand(sets, Vector, dims...) +Base.in(x, sets::TupleSet) = all(map(in, x, sets.sets)) + +"Returns an AbstractSet representing valid input values" +function inputdomain end + +"Returns an AbstractSet representing valid output/target values" +function targetdomain end diff --git a/test/costs.jl b/test/costs.jl new file mode 100644 index 0000000..cd94bad --- /dev/null +++ b/test/costs.jl @@ -0,0 +1,44 @@ +@testset "Costs" begin + @test LearnBase.Cost <: Any + @test LearnBase.Penalty <: LearnBase.Cost + @test LearnBase.Loss <: LearnBase.Cost + @test LearnBase.UnsupervisedLoss <: LearnBase.Loss + @test LearnBase.SupervisedLoss <: LearnBase.Loss + @test LearnBase.UnarySupervisedLoss <: LearnBase.SupervisedLoss + @test LearnBase.MarginLoss <: LearnBase.UnarySupervisedLoss + @test LearnBase.DistanceLoss <: LearnBase.UnarySupervisedLoss + + @test typeof(LearnBase.value) <: Function + @test typeof(LearnBase.value!) <: Function + @test typeof(LearnBase.deriv) <: Function + @test typeof(LearnBase.deriv!) <: Function + @test typeof(LearnBase.deriv2) <: Function + @test typeof(LearnBase.deriv2!) <: Function + @test typeof(LearnBase.value_deriv) <: Function + @test typeof(LearnBase.value_deriv!) <: Function + + @test typeof(LearnBase.isminimizable) <: Function + @test typeof(LearnBase.isdifferentiable) <: Function + @test typeof(LearnBase.istwicedifferentiable) <: Function + @test typeof(LearnBase.isconvex) <: Function + @test typeof(LearnBase.isstrictlyconvex) <: Function + @test typeof(LearnBase.isstronglyconvex) <: Function + @test typeof(LearnBase.isnemitski) <: Function + @test typeof(LearnBase.islipschitzcont) <: Function + @test typeof(LearnBase.islocallylipschitzcont) <: Function + @test typeof(LearnBase.isclipable) <: Function + @test typeof(LearnBase.ismarginbased) <: Function + @test typeof(LearnBase.isclasscalibrated) <: Function + @test typeof(LearnBase.isdistancebased) <: Function + @test typeof(LearnBase.issymmetric) <: Function + + # test fallback methods + struct MyStronglyConvexType end + LearnBase.isstronglyconvex(::MyStronglyConvexType) = true + LearnBase.islipschitzcont(::MyStronglyConvexType) = true + @test LearnBase.isstronglyconvex(MyStronglyConvexType()) + @test LearnBase.isstrictlyconvex(MyStronglyConvexType()) + @test LearnBase.isconvex(MyStronglyConvexType()) + @test LearnBase.islipschitzcont(MyStronglyConvexType()) + @test LearnBase.islocallylipschitzcont(MyStronglyConvexType()) +end diff --git a/test/other.jl b/test/other.jl new file mode 100644 index 0000000..7f3ff21 --- /dev/null +++ b/test/other.jl @@ -0,0 +1,235 @@ +@test LearnBase.Minimizable <: Any +@test LearnBase.Transformation <: Any +@test LearnBase.StochasticTransformation <: LearnBase.Transformation + +@test typeof(LearnBase.transform) <: Function +@test typeof(LearnBase.transform!) <: Function +@test typeof(LearnBase.getobs) <: Function +@test typeof(LearnBase.getobs!) <: Function +@test typeof(LearnBase.learn) <: Function +@test typeof(LearnBase.learn!) <: Function +@test typeof(LearnBase.update) <: Function +@test typeof(LearnBase.update!) <: Function + +@test typeof(LearnBase.grad) <: Function +@test typeof(LearnBase.grad!) <: Function + +@test typeof(LearnBase.datasubset) <: Function + +@test typeof(LearnBase.targets) <: Function +@test typeof(LearnBase.gettarget) <: Function +@test typeof(LearnBase.gettargets) <: Function + +@test LearnBase.DataView <: AbstractVector +@test LearnBase.DataView <: LearnBase.AbstractDataIterator +@test LearnBase.DataView{Int} <: AbstractVector{Int} +@test LearnBase.DataView{Int,Vector{Int}} <: LearnBase.AbstractDataIterator{Int,Vector{Int}} +@test LearnBase.AbstractObsView <: LearnBase.DataView +@test LearnBase.AbstractObsView <: LearnBase.AbstractObsIterator +@test LearnBase.AbstractObsView{Int,Vector{Int}} <: LearnBase.DataView{Int,Vector{Int}} +@test LearnBase.AbstractObsView{Int,Vector{Int}} <: LearnBase.AbstractObsIterator{Int,Vector{Int}} +@test LearnBase.AbstractBatchView <: LearnBase.DataView +@test LearnBase.AbstractBatchView <: LearnBase.AbstractBatchIterator +@test LearnBase.AbstractBatchView{Int,Vector{Int}} <: LearnBase.DataView{Int,Vector{Int}} +@test LearnBase.AbstractBatchView{Int,Vector{Int}} <: LearnBase.AbstractBatchIterator{Int,Vector{Int}} + +@test LearnBase.DataIterator <: LearnBase.AbstractDataIterator +@test LearnBase.DataIterator{Int,Vector{Int}} <: LearnBase.AbstractDataIterator{Int,Vector{Int}} +@test LearnBase.ObsIterator <: LearnBase.DataIterator +@test LearnBase.ObsIterator <: LearnBase.AbstractObsIterator +@test LearnBase.ObsIterator{Int,Vector{Int}} <: LearnBase.DataIterator{Int,Vector{Int}} +@test LearnBase.ObsIterator{Int,Vector{Int}} <: LearnBase.AbstractObsIterator{Int,Vector{Int}} +@test LearnBase.BatchIterator <: LearnBase.DataIterator +@test LearnBase.BatchIterator <: LearnBase.AbstractBatchIterator +@test LearnBase.BatchIterator{Int,Vector{Int}} <: LearnBase.DataIterator{Int,Vector{Int}} +@test LearnBase.BatchIterator{Int,Vector{Int}} <: LearnBase.AbstractBatchIterator{Int,Vector{Int}} + +@test LearnBase.ObsDim.Constant <: LearnBase.ObsDimension +@test LearnBase.ObsDim.First <: LearnBase.ObsDimension +@test LearnBase.ObsDim.Last <: LearnBase.ObsDimension +@test LearnBase.ObsDim.Undefined <: LearnBase.ObsDimension +@test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2} + +# IntervalSet +let s = LearnBase.IntervalSet(-1,1) + @test typeof(s) == LearnBase.IntervalSet{Int} + @test typeof(s) <: AbstractSet + for x in (-1,0,0.5,1,1.0) + @test x in s + end + for x in (-1-1e-10, 1+1e-10, -Inf, Inf, 2, NaN) + @test !(x in s) + end + for i=1:10 + x = rand(s) + @test typeof(x) == Float64 + @test x in s + end + xs = rand(s, 10) + @test typeof(xs) == Vector{Float64} + for x in xs + @test typeof(x) == Float64 + @test x in s + end + @test LearnBase.randtype(s) == Float64 + # @show s LearnBase.randtype(s) +end +let s = LearnBase.IntervalSet(-1,1.0) + @test typeof(s) == LearnBase.IntervalSet{Float64} + @test typeof(s) <: AbstractSet + @test 1 in s + # @show s LearnBase.randtype(s) + @test length(s) == 1 +end + +# IntervalSet{Vector} +let s = LearnBase.IntervalSet([-1.,0.], [1.,1.]) + @test typeof(s) == LearnBase.IntervalSet{Vector{Float64}} + @test typeof(s) <: AbstractSet + @test LearnBase.randtype(s) == Vector{Float64} + @test typeof(rand(s)) == Vector{Float64} + @test rand(s) in s + @test [-1, 0] in s + @test !([-1.5,0] in s) + @test !([0,2] in s) + @test length(s) == 2 +end + +# DiscreteSet +let s = LearnBase.DiscreteSet([-1,1]) + @test typeof(s) == LearnBase.DiscreteSet{Vector{Int}} + @test typeof(s) <: AbstractSet + for x in (-1, 1, -1.0, 1.0) + @test x in s + end + for x in (0, Inf, -Inf, NaN) + @test !(x in s) + end + for i=1:10 + x = rand(s) + @test typeof(x) == Int + @test x in s + end + xs = rand(s, 10) + @test typeof(xs) == Vector{Int} + for x in xs + @test typeof(x) == Int + @test x in s + end + @test LearnBase.randtype(s) == Int + @test length(s) == 2 + @test s[1] == -1 +end +let s = LearnBase.DiscreteSet([-1,1.0]) + @test typeof(s) == LearnBase.DiscreteSet{Vector{Float64}} + @test typeof(s) <: AbstractSet + @test typeof(rand(s)) == Float64 + @test typeof(rand(s, 2)) == Vector{Float64} +end + +# TupleSet +let s = LearnBase.TupleSet(LearnBase.IntervalSet(0,1), LearnBase.DiscreteSet([0,1])) + @test typeof(s) == LearnBase.TupleSet{Tuple{LearnBase.IntervalSet{Int}, LearnBase.DiscreteSet{Vector{Int}}}} + @test typeof(s) <: AbstractSet + for x in ([0,0], [0.0,0.0], [0.5,1.0]) + @test x in s + end + for x in ([0,0.5], [-1,0]) + @test !(x in s) + end + @test typeof(rand(s)) == Vector{Float64} + @test typeof(rand(s, 2)) == Vector{Vector{Float64}} + @test typeof(rand(s, Tuple)) == Tuple{Float64,Int} + @test typeof(rand(s, Tuple, 2)) == Vector{Tuple{Float64,Int}} + @test LearnBase.randtype(s) == Vector{Float64} + + tot = 0 + for (i,x) in enumerate(s) + @test x == s.sets[i] + tot += length(x) + end + @test length(s) == tot +end + +# arrays of sets +let s = [LearnBase.IntervalSet(0,1), LearnBase.DiscreteSet([0,1])] + @test typeof(s) == Vector{AbstractSet} + for x in ([0,0], [0.0,0.0], [0.5,1.0]) + @test x in s + end + for x in ([0,0.5], [-1,0]) + @test !(x in s) + end + @test typeof(rand(s)) == Vector{Float64} + @test typeof(rand(s, 2)) == Vector{Vector{Float64}} +end + +@testset "obsdim typetree and Constructor" begin + @test_throws MethodError LearnBase.ObsDim.Constant(2.0) + + @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDimension + @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.First + @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.Constant{1} + + @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDimension + @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDim.Last + + @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDimension + @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2} +end + +@testset "obs_dim helper constructor" begin + @test_throws ArgumentError convert(LearnBase.ObsDimension, "test") + @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0) + + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.First() + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.Constant(1) + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Last())) === LearnBase.ObsDim.Last() + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Constant(2))) === LearnBase.ObsDim.Constant(2) + + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1) + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6) + @test convert(LearnBase.ObsDimension, 1) === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, 2) === LearnBase.ObsDim.Constant(2) + @test convert(LearnBase.ObsDimension, 6) === LearnBase.ObsDim.Constant(6) + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first) + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first") + @test convert(LearnBase.ObsDimension, (:first,:last)) === (LearnBase.ObsDim.First(),LearnBase.ObsDim.Last()) + @test convert(LearnBase.ObsDimension, :first) === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, :begin) === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, "first") === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, "BEGIN") === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, :end) === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, :last) === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, "End") === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, "LAST") === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, :nothing) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :none) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :na) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :null) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :undefined) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined() +end + +struct SomeType end +@testset "obsdim default values" begin + @testset "Arrays, SubArrays, and Sparse Arrays" begin + @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(rand(10,5))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === LearnBase.ObsDim.Last() + end + + @testset "Types with no specified default" begin + @test @inferred(LearnBase.default_obsdim(SomeType())) === LearnBase.ObsDim.Undefined() + end + + @testset "Tuples" begin + @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Undefined()) + @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Last()) + @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Undefined()) + @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Last()) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 2604655..b890291 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,310 +2,5 @@ using LearnBase using SparseArrays using Test -# Test if types are exported properly -@test Cost <: Any -@test Penalty <: Cost -@test Loss <: Cost -@test UnsupervisedLoss <: Loss -@test SupervisedLoss <: Loss -@test MarginLoss <: SupervisedLoss -@test DistanceLoss <: SupervisedLoss - -@test Minimizable <: Any -@test Transformation <: Any -@test StochasticTransformation <: Transformation - -# Test if functions are exported properly -@test typeof(transform) <: Function -@test typeof(transform!) <: Function -@test typeof(getobs) <: Function -@test typeof(getobs!) <: Function -@test typeof(learn) <: Function -@test typeof(learn!) <: Function -@test typeof(update) <: Function -@test typeof(update!) <: Function - -@test typeof(scaled) <: Function - -@test typeof(value) <: Function -@test typeof(value!) <: Function -@test typeof(meanvalue) <: Function -@test typeof(sumvalue) <: Function -@test typeof(meanderiv) <: Function -@test typeof(sumderiv) <: Function -@test typeof(deriv) <: Function -@test typeof(deriv2) <: Function -@test typeof(deriv!) <: Function -@test typeof(value_deriv) <: Function -@test typeof(grad) <: Function -@test typeof(grad!) <: Function -@test typeof(addgrad!) <: Function -@test typeof(value_grad) <: Function -@test typeof(value_grad!) <: Function -@test typeof(prox) <: Function -@test typeof(prox!) <: Function - -@test typeof(isminimizable) <: Function -@test typeof(isdifferentiable) <: Function -@test typeof(istwicedifferentiable) <: Function -@test typeof(isconvex) <: Function -@test typeof(isstrictlyconvex) <: Function -@test typeof(isstronglyconvex) <: Function -@test typeof(isnemitski) <: Function -@test typeof(isunivfishercons) <: Function -@test typeof(isfishercons) <: Function -@test typeof(islipschitzcont) <: Function -@test typeof(islocallylipschitzcont) <: Function -@test typeof(islipschitzcont_deriv) <: Function -@test typeof(isclipable) <: Function -@test typeof(ismarginbased) <: Function -@test typeof(isclasscalibrated) <: Function -@test typeof(isdistancebased) <: Function - -@test typeof(issymmetric) <: Function - -@test typeof(getobs) <: Function -@test typeof(getobs!) <: Function -@test typeof(datasubset) <: Function - -@test typeof(targets) <: Function -@test typeof(LearnBase.gettarget) <: Function -@test typeof(LearnBase.gettargets) <: Function - -@test DataView <: AbstractVector -@test DataView <: AbstractDataIterator -@test DataView{Int} <: AbstractVector{Int} -@test DataView{Int,Vector{Int}} <: AbstractDataIterator{Int,Vector{Int}} -@test AbstractObsView <: DataView -@test AbstractObsView <: AbstractObsIterator -@test AbstractObsView{Int,Vector{Int}} <: DataView{Int,Vector{Int}} -@test AbstractObsView{Int,Vector{Int}} <: AbstractObsIterator{Int,Vector{Int}} -@test AbstractBatchView <: DataView -@test AbstractBatchView <: AbstractBatchIterator -@test AbstractBatchView{Int,Vector{Int}} <: DataView{Int,Vector{Int}} -@test AbstractBatchView{Int,Vector{Int}} <: AbstractBatchIterator{Int,Vector{Int}} - -@test DataIterator <: AbstractDataIterator -@test DataIterator{Int,Vector{Int}} <: AbstractDataIterator{Int,Vector{Int}} -@test ObsIterator <: DataIterator -@test ObsIterator <: AbstractObsIterator -@test ObsIterator{Int,Vector{Int}} <: DataIterator{Int,Vector{Int}} -@test ObsIterator{Int,Vector{Int}} <: AbstractObsIterator{Int,Vector{Int}} -@test BatchIterator <: DataIterator -@test BatchIterator <: AbstractBatchIterator -@test BatchIterator{Int,Vector{Int}} <: DataIterator{Int,Vector{Int}} -@test BatchIterator{Int,Vector{Int}} <: AbstractBatchIterator{Int,Vector{Int}} - -@test typeof(fit) <: Function -@test typeof(fit!) <: Function -@test typeof(nobs) <: Function - -@test ObsDim.Constant <: LearnBase.ObsDimension -@test ObsDim.First <: LearnBase.ObsDimension -@test ObsDim.Last <: LearnBase.ObsDimension -@test ObsDim.Undefined <: LearnBase.ObsDimension -@test typeof(ObsDim.Constant(2)) <: ObsDim.Constant{2} - -# Test that LearnBase reuses StatsBase functions -using StatsBase -@test StatsBase.fit == LearnBase.fit -@test StatsBase.fit! == LearnBase.fit! -@test StatsBase.nobs == LearnBase.nobs - -# Test superset fallbacks -struct MyStronglyConvexType end -LearnBase.isstronglyconvex(::MyStronglyConvexType) = true -LearnBase.islipschitzcont(::MyStronglyConvexType) = true -@test isstronglyconvex(MyStronglyConvexType()) -@test isstrictlyconvex(MyStronglyConvexType()) -@test isconvex(MyStronglyConvexType()) -@test islipschitzcont(MyStronglyConvexType()) -@test islocallylipschitzcont(MyStronglyConvexType()) - -# IntervalSet -let s = IntervalSet(-1,1) - @test typeof(s) == IntervalSet{Int} - @test typeof(s) <: AbstractSet - for x in (-1,0,0.5,1,1.0) - @test x in s - end - for x in (-1-1e-10, 1+1e-10, -Inf, Inf, 2, NaN) - @test !(x in s) - end - for i=1:10 - x = rand(s) - @test typeof(x) == Float64 - @test x in s - end - xs = rand(s, 10) - @test typeof(xs) == Vector{Float64} - for x in xs - @test typeof(x) == Float64 - @test x in s - end - @test LearnBase.randtype(s) == Float64 - # @show s LearnBase.randtype(s) -end -let s = IntervalSet(-1,1.0) - @test typeof(s) == IntervalSet{Float64} - @test typeof(s) <: AbstractSet - @test 1 in s - # @show s LearnBase.randtype(s) - @test length(s) == 1 -end - -# IntervalSet{Vector} -let s = IntervalSet([-1.,0.], [1.,1.]) - @test typeof(s) == IntervalSet{Vector{Float64}} - @test typeof(s) <: AbstractSet - @test LearnBase.randtype(s) == Vector{Float64} - @test typeof(rand(s)) == Vector{Float64} - @test rand(s) in s - @test [-1, 0] in s - @test !([-1.5,0] in s) - @test !([0,2] in s) - @test length(s) == 2 -end - -# DiscreteSet -let s = DiscreteSet([-1,1]) - @test typeof(s) == DiscreteSet{Vector{Int}} - @test typeof(s) <: AbstractSet - for x in (-1, 1, -1.0, 1.0) - @test x in s - end - for x in (0, Inf, -Inf, NaN) - @test !(x in s) - end - for i=1:10 - x = rand(s) - @test typeof(x) == Int - @test x in s - end - xs = rand(s, 10) - @test typeof(xs) == Vector{Int} - for x in xs - @test typeof(x) == Int - @test x in s - end - @test LearnBase.randtype(s) == Int - @test length(s) == 2 - @test s[1] == -1 - # @show s LearnBase.randtype(s) -end -let s = DiscreteSet([-1,1.0]) - @test typeof(s) == DiscreteSet{Vector{Float64}} - @test typeof(s) <: AbstractSet - @test typeof(rand(s)) == Float64 - @test typeof(rand(s, 2)) == Vector{Float64} - # @show s LearnBase.randtype(s) -end - -# TupleSet -let s = TupleSet(IntervalSet(0,1), DiscreteSet([0,1])) - @test typeof(s) == TupleSet{Tuple{IntervalSet{Int}, DiscreteSet{Vector{Int}}}} - @test typeof(s) <: AbstractSet - for x in ([0,0], [0.0,0.0], [0.5,1.0]) - @test x in s - end - for x in ([0,0.5], [-1,0]) - @test !(x in s) - end - @test typeof(rand(s)) == Vector{Float64} - @test typeof(rand(s, 2)) == Vector{Vector{Float64}} - @test typeof(rand(s, Tuple)) == Tuple{Float64,Int} - @test typeof(rand(s, Tuple, 2)) == Vector{Tuple{Float64,Int}} - @test LearnBase.randtype(s) == Vector{Float64} - # @show s LearnBase.randtype(s) - - tot = 0 - for (i,x) in enumerate(s) - @test x == s.sets[i] - tot += length(x) - end - @test length(s) == tot -end - -# arrays of sets -let s = [IntervalSet(0,1), DiscreteSet([0,1])] - @test typeof(s) == Vector{AbstractSet} - for x in ([0,0], [0.0,0.0], [0.5,1.0]) - @test x in s - end - for x in ([0,0.5], [-1,0]) - @test !(x in s) - end - @test typeof(rand(s)) == Vector{Float64} - @test typeof(rand(s, 2)) == Vector{Vector{Float64}} - # @show s LearnBase.randtype(s) -end - -@testset "obsdim typetree and Constructor" begin - @test_throws MethodError ObsDim.Constant(2.0) - - @test typeof(ObsDim.First()) <: LearnBase.ObsDimension - @test typeof(ObsDim.First()) <: ObsDim.First - @test typeof(ObsDim.First()) <: ObsDim.Constant{1} - - @test typeof(ObsDim.Last()) <: LearnBase.ObsDimension - @test typeof(ObsDim.Last()) <: ObsDim.Last - - @test typeof(ObsDim.Constant(2)) <: LearnBase.ObsDimension - @test typeof(ObsDim.Constant(2)) <: ObsDim.Constant{2} -end - -@testset "obs_dim helper constructor" begin - @test_throws ArgumentError convert(LearnBase.ObsDimension, "test") - @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0) - - @test @inferred(convert(LearnBase.ObsDimension, ObsDim.First())) === ObsDim.First() - @test @inferred(convert(LearnBase.ObsDimension, ObsDim.First())) === ObsDim.Constant(1) - @test @inferred(convert(LearnBase.ObsDimension, ObsDim.Last())) === ObsDim.Last() - @test @inferred(convert(LearnBase.ObsDimension, ObsDim.Constant(2))) === ObsDim.Constant(2) - - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1) - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6) - @test convert(LearnBase.ObsDimension, 1) === ObsDim.First() - @test convert(LearnBase.ObsDimension, 2) === ObsDim.Constant(2) - @test convert(LearnBase.ObsDimension, 6) === ObsDim.Constant(6) - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first) - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first") - @test convert(LearnBase.ObsDimension, (:first,:last)) === (ObsDim.First(),ObsDim.Last()) - @test convert(LearnBase.ObsDimension, :first) === ObsDim.First() - @test convert(LearnBase.ObsDimension, :begin) === ObsDim.First() - @test convert(LearnBase.ObsDimension, "first") === ObsDim.First() - @test convert(LearnBase.ObsDimension, "BEGIN") === ObsDim.First() - @test convert(LearnBase.ObsDimension, :end) === ObsDim.Last() - @test convert(LearnBase.ObsDimension, :last) === ObsDim.Last() - @test convert(LearnBase.ObsDimension, "End") === ObsDim.Last() - @test convert(LearnBase.ObsDimension, "LAST") === ObsDim.Last() - @test convert(LearnBase.ObsDimension, :nothing) === ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :none) === ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :na) === ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :null) === ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :undefined) === ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, nothing) === ObsDim.Undefined() -end - -struct SomeType end -@testset "obsdim default values" begin - @testset "Arrays, SubArrays, and Sparse Arrays" begin - @test @inferred(LearnBase.default_obsdim(rand(10))) === ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(rand(10,5))) === ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === ObsDim.Last() - end - - @testset "Types with no specified default" begin - @test @inferred(LearnBase.default_obsdim(SomeType())) === ObsDim.Undefined() - end - - @testset "Tuples" begin - @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (ObsDim.Undefined(), ObsDim.Undefined()) - @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (ObsDim.Undefined(), ObsDim.Last()) - @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (ObsDim.Last(), ObsDim.Undefined()) - @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (ObsDim.Last(), ObsDim.Last()) - end -end +include("costs.jl") +include("other.jl") From dece2909a5ef04c71351d0f57b14698eb981a465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 13:06:31 -0300 Subject: [PATCH 02/15] Add docstrings for value function in costs.jl --- src/costs.jl | 95 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 11 deletions(-) diff --git a/src/costs.jl b/src/costs.jl index 525b4e3..37bf311 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -21,16 +21,16 @@ abstract type SupervisedLoss <: Loss end @doc doc""" A supervised loss is considered **unary** if it can be written as a composition -`L(Ψ(output, target))` for some binary function `Ψ`. In this case the loss can -be evaluated with a single argument termed the **agreement** `Ψ(output, target)`. -Notable examples for unary supervised losses are distance-based (`Ψ(ŷ,y) = ŷ - y`) -and margin-based (`Ψ(ŷ,y) = ŷ*y`) losses. +`L(Ψ(target, output))` for some binary function `Ψ`. In this case the loss can +be evaluated with a single argument termed the **agreement** `Ψ(target, output)`. +Notable examples for unary supervised losses are distance-based (`Ψ(y,ŷ) = ŷ - y`) +and margin-based (`Ψ(y,ŷ) = ŷ*y`) losses. """ abstract type UnarySupervisedLoss <: SupervisedLoss end @doc doc""" A supervised loss that can be simplified to -`L(targets, outputs) = L(targets - outputs)` is considered +`L(targets, outputs) = L(outputs - targets)` is considered **distance-based**. """ abstract type DistanceLoss <: UnarySupervisedLoss end @@ -55,16 +55,89 @@ Baseclass for all penalties. """ abstract type Penalty <: Cost end -function value end -function value! end +@doc doc""" + value(loss, target, output) -function deriv end -function deriv! end +Compute the (non-negative) numeric result for the `loss` function +return it. Note that `target` and `output` can be of different +numeric type, in which case promotion is performed in the manner +appropriate for the given loss. -function deriv2 end -function deriv2! end +```math +L : Y \times \mathbb{R} \rightarrow [0,\infty) +``` + +# Arguments + +- `loss::SupervisedLoss`: The loss-function ``L`` we want to + compute the value with. +- `target::Number`: The ground truth ``y \in Y`` of the observation. +- `output::Number`: The predicted output ``\hat{y} \in \mathbb{R}`` + for the observation. +""" +value(loss::SupervisedLoss, target::Number, output::Number) = + MethodError(value, (loss, target, output)) +""" + value(loss, targets, outputs, aggmode) -> Number + +Compute the weighted or unweighted sum or mean (depending on +aggregation mode `aggmode`) of the individual values of the `loss` +function for each pair in `targets` and `outputs`. This method +will not allocate a temporary array. + +In the case that the two parameters are arrays with a different +number of dimensions, broadcast will be performed. Note that the +given parameters are expected to have the same size in the +dimensions they share. + +# Arguments + +- `loss::SupervisedLoss`: The loss-function ``L`` we are working with. +- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``. +- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``. +- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref), + [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or + [`AggMode.WeightedMean`](@ref). +""" +value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) = + MethodError(value, (loss, targets, outputs, aggmode)) + +""" + value(loss, targets, outputs, aggmode, obsdim) -> AbstractVector + +Compute the values of the `loss` function for each pair in +`targets` and `outputs` individually, and return either the +weighted or unweighted sum or mean for each observation (depending on +`aggmode`). This method will not allocate a temporary array, but +it will allocate the resulting vector. + +Both arrays have to be of the same shape and size. Furthermore +they have to have at least two array dimensions (i.e. they must +not be vectors). + +# Arguments + +- `loss::SupervisedLoss`: The loss-function ``L`` we are working with. +- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``. +- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``. +- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref), + [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or + [`AggMode.WeightedMean`](@ref). +- `obsdim::ObsDimension`: Specifies which of the array dimensions + denotes the observations. see `?ObsDim` for more information. +""" +value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, + aggmode::AggregateMode, obsdim::ObsDimension) = + MethodError(value, (loss, targets, outputs, aggmode, obsdim)) + +function deriv end +function deriv2 end function value_deriv end + +function value! end +function deriv! end +function deriv2! end function value_deriv! end @doc doc""" From 5caa914f9ce5cd72f6d879aa5b14375d52b3cc83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 13:15:52 -0300 Subject: [PATCH 03/15] Add docstring for value! function in costs.jl --- src/costs.jl | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/costs.jl b/src/costs.jl index 37bf311..a46945e 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -78,7 +78,7 @@ L : Y \times \mathbb{R} \rightarrow [0,\infty) value(loss::SupervisedLoss, target::Number, output::Number) = MethodError(value, (loss, target, output)) -""" +@doc doc""" value(loss, targets, outputs, aggmode) -> Number Compute the weighted or unweighted sum or mean (depending on @@ -103,7 +103,7 @@ dimensions they share. value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) = MethodError(value, (loss, targets, outputs, aggmode)) -""" +@doc doc""" value(loss, targets, outputs, aggmode, obsdim) -> AbstractVector Compute the values of the `loss` function for each pair in @@ -130,7 +130,37 @@ not be vectors). value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode, obsdim::ObsDimension) = MethodError(value, (loss, targets, outputs, aggmode, obsdim)) - + +@doc doc""" + value!(buffer, loss, targets, outputs, aggmode, obsdim) -> buffer + +Compute the values of the `loss` function for each pair in +`targets` and `outputs` individually, and return either the +weighted or unweighted sum or mean for each observation, +depending on `aggmode`. The results are stored into the given +vector `buffer`. This method will not allocate a temporary array. + +Both arrays have to be of the same shape and size. Furthermore +they have to have at least two array dimensions (i.e. so they +must not be vectors). + +# Arguments + +- `buffer::AbstractArray`: Array to store the computed values in. + Old values will be overwritten and lost. +- `loss::SupervisedLoss`: The loss-function ``L`` we are working with. +- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``. +- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``. +- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref), + [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or + [`AggMode.WeightedMean`](@ref). +- `obsdim::ObsDimension`: Specifies which of the array dimensions + denotes the observations. see `?ObsDim` for more information. +""" +value!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, + aggmode::AggregateMode, obsdim::ObsDimension) = + MethodError(value!, (buffer, loss, targets, outputs, aggmode, obsdim)) + function deriv end function deriv2 end function value_deriv end From 9655883547691b9468e093cdff1c4b49b4f35988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 13:51:51 -0300 Subject: [PATCH 04/15] Add aggmode.jl and obsdim.jl source files --- src/LearnBase.jl | 6 +++ src/aggmode.jl | 111 +++++++++++++++++++++++++++++++++++++++++++++++ src/costs.jl | 1 - src/obsdim.jl | 90 ++++++++++++++++++++++++++++++++++++++ src/other.jl | 93 --------------------------------------- 5 files changed, 207 insertions(+), 94 deletions(-) create mode 100644 src/aggmode.jl create mode 100644 src/obsdim.jl diff --git a/src/LearnBase.jl b/src/LearnBase.jl index 867db18..1113049 100644 --- a/src/LearnBase.jl +++ b/src/LearnBase.jl @@ -2,6 +2,12 @@ module LearnBase using Markdown # for docstrings +# AGGREGATION MODES +include("aggmode.jl") + +# OBSERVATION DIMENSIONS +include("obsdim.jl") + # LEARNING COSTS (e.g. loss & penalty) include("costs.jl") diff --git a/src/aggmode.jl b/src/aggmode.jl new file mode 100644 index 0000000..1e78b43 --- /dev/null +++ b/src/aggmode.jl @@ -0,0 +1,111 @@ +""" +Baseclass for all aggregation modes. +""" +abstract type AggregateMode end + +""" + module AggMode + +Types for aggregation of multiple observations. + +- `AggMode.None()` +- `AggMode.Sum()` +- `AggMode.Mean()` +- `AggMode.WeightedSum(weights)` +- `AggMode.WeightedMean(weights)` +""" +module AggMode + using ..LearnBase: AggregateMode + + """ + AggMode.None() + + Opt-out of aggregation. This is usually the default value. + Using `None` will cause the element-wise results to be returned. + """ + struct None <: AggregateMode end + + """ + AggMode.Sum() + + Causes the method to return the unweighted sum of the + elements instead of the individual elements. Can be used in + combination with `ObsDim`, in which case a vector will be + returned containing the sum for each observation (useful + mainly for multivariable regression). + """ + struct Sum <: AggregateMode end + + """ + AggMode.Mean() + + Causes the method to return the unweighted mean of the + elements instead of the individual elements. Can be used in + combination with `ObsDim`, in which case a vector will be + returned containing the mean for each observation (useful + mainly for multivariable regression). + """ + struct Mean <: AggregateMode end + + """ + AggMode.WeightedSum(weights; [normalize = false]) + + Causes the method to return the weighted sum of all + observations. The variable `weights` has to be a vector of + the same length as the number of observations. + If `normalize = true`, the values of the weight vector will + be normalized in such as way that they sum to one. + + # Arguments + + - `weights::AbstractVector`: Vector of weight values that + can be used to give certain observations a stronger + influence on the sum. + + - `normalize::Bool`: Boolean that specifies if the weight + vector should be transformed in such a way that it sums to + one (i.e. normalized). This will not mutate the weight + vector but instead happen on the fly during the + accumulation. + + Defaults to `false`. Setting it to `true` only really + makes sense in multivalue-regression, otherwise the result + will be the same as for [`WeightedMean`](@ref). + """ + struct WeightedSum{W<:AbstractVector} <: AggregateMode + weights::W + normalize::Bool + end + WeightedSum(weights::AbstractVector; normalize::Bool = false) = WeightedSum(weights, normalize) + + """ + AggMode.WeightedMean(weights; [normalize = true]) + + Causes the method to return the weighted mean of all + observations. The variable `weights` has to be a vector of + the same length as the number of observations. + If `normalize = true`, the values of the weight vector will + be normalized in such as way that they sum to one. + + # Arguments + + - `weights::AbstractVector`: Vector of weight values that can + be used to give certain observations a stronger influence + on the mean. + + - `normalize::Bool`: Boolean that specifies if the weight + vector should be transformed in such a way that it sums to + one (i.e. normalized). This will not mutate the weight + vector but instead happen on the fly during the + accumulation. + + Defaults to `true`. Setting it to `false` only really makes + sense in multivalue-regression, otherwise the result will + be the same as for [`WeightedSum`](@ref). + """ + struct WeightedMean{W<:AbstractVector} <: AggregateMode + weights::W + normalize::Bool + end + WeightedMean(weights::AbstractVector; normalize::Bool = true) = WeightedMean(weights, normalize) +end diff --git a/src/costs.jl b/src/costs.jl index a46945e..22fbeaf 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -165,7 +165,6 @@ function deriv end function deriv2 end function value_deriv end -function value! end function deriv! end function deriv2! end function value_deriv! end diff --git a/src/obsdim.jl b/src/obsdim.jl new file mode 100644 index 0000000..c1a580d --- /dev/null +++ b/src/obsdim.jl @@ -0,0 +1,90 @@ +""" +Baseclass for all observation dimensions. +""" +abstract type ObsDimension end + +""" + module ObsDim + +Singleton types to define which dimension of some data structure +(e.g. some `Array`) denotes the observations. + +- `ObsDim.First()` +- `ObsDim.Last()` +- `ObsDim.Constant(dim)` + +Used for efficient dispatching +""" +module ObsDim + using ..LearnBase: ObsDimension + + """ + Default value for most functions. Denotes that the concept of + an observation dimension is not defined for the given data. + """ + struct Undefined <: ObsDimension end + + """ + ObsDim.Last <: ObsDimension + + Defines that the last dimension denotes the observations + """ + struct Last <: ObsDimension end + + """ + ObsDim.Constant{DIM} <: ObsDimension + + Defines that the dimension `DIM` denotes the observations + """ + struct Constant{DIM} <: ObsDimension end + Constant(dim::Int) = Constant{dim}() + + """ + ObsDim.First <: ObsDimension + + Defines that the first dimension denotes the observations + """ + const First = Constant{1} +end + +Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim")) +Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim +Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined() +Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim) +Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim))) +Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims) +function Base.convert(::Type{ObsDimension}, dim::Symbol) + if dim == :first || dim == :begin + ObsDim.First() + elseif dim == Symbol("end") || dim == :last + ObsDim.Last() + elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined + ObsDim.Undefined() + else + throw(ArgumentError("Unknown way to specify a obsdim: $dim")) + end +end + +""" + default_obsdim(data) + +The specify the default obsdim for a specific type of data. +Defaults to `ObsDim.Undefined()` +""" +default_obsdim(data) = ObsDim.Undefined() +default_obsdim(A::AbstractArray) = ObsDim.Last() +default_obsdim(tup::Tuple) = map(default_obsdim, tup) + +""" + datasubset(data, [idx], [obsdim]) + +Return a lazy subset of the observations in `data` that correspond +to the given `idx`. No data should be copied except of the +indices. Note that `idx` can be of type `Int` or `AbstractVector`. +Both options must be supported by a custom type. + +If it makes sense for the type of `data`, `obsdim` can be used +to disptach on which dimension of `data` denotes the observations. +See `?ObsDim`. +""" +function datasubset end diff --git a/src/other.jl b/src/other.jl index ec7e87a..cd7b914 100644 --- a/src/other.jl +++ b/src/other.jl @@ -204,99 +204,6 @@ const AbstractBatchIterator{E,T} = Union{BatchIterator{E,T},AbstractBatchView{E, # -------------------------------------------------------------------- -# datasubset(data, [idx], [obsdim]) -# -# Return a lazy subset of the observations in `data` that correspond -# to the given `idx`. No data should be copied except of the -# indices. Note that `idx` can be of type `Int` or `AbstractVector`. -# Both options must be supported by a custom type. -# -# If it makes sense for the type of `data`, `obsdim` can be used -# to disptach on which dimension of `data` denotes the observations. -# See `?ObsDim` -# -# This function is implemented in MLDataPattern -function datasubset end - -""" - default_obsdim(data) - -The specify the default obsdim for a specific type of data. -Defaults to `ObsDim.Undefined()` -""" -function default_obsdim end - -# just for dispatch for those who care to -"see `?ObsDim`" -abstract type ObsDimension end - -""" - module ObsDim - -Singleton types to define which dimension of some data structure -(e.g. some `Array`) denotes the observations. - -- `ObsDim.First()` -- `ObsDim.Last()` -- `ObsDim.Constant(dim)` - -Used for efficient dispatching -""" -module ObsDim - using ..LearnBase: ObsDimension - - """ - Default value for most functions. Denotes that the concept of - an observation dimension is not defined for the given data. - """ - struct Undefined <: ObsDimension end - - """ - ObsDim.Last <: ObsDimension - - Defines that the last dimension denotes the observations - """ - struct Last <: ObsDimension end - - """ - ObsDim.Constant{DIM} <: ObsDimension - - Defines that the dimension `DIM` denotes the observations - """ - struct Constant{DIM} <: ObsDimension end - Constant(dim::Int) = Constant{dim}() - - """ - ObsDim.First <: ObsDimension - - Defines that the first dimension denotes the observations - """ - const First = Constant{1} -end - -Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim")) -Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim -Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined() -Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim) -Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim))) -Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims) -function Base.convert(::Type{ObsDimension}, dim::Symbol) - if dim == :first || dim == :begin - ObsDim.First() - elseif dim == Symbol("end") || dim == :last - ObsDim.Last() - elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined - ObsDim.Undefined() - else - throw(ArgumentError("Unknown way to specify a obsdim: $dim")) - end -end - -default_obsdim(data) = ObsDim.Undefined() -default_obsdim(A::AbstractArray) = ObsDim.Last() -default_obsdim(tup::Tuple) = map(default_obsdim, tup) - - import Base: AbstractSet "A continuous range (inclusive) between a lo and a hi" From d7b34be3b1ac9f13d9dbaf5f9147ec9f80fa96d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 14:56:33 -0300 Subject: [PATCH 05/15] Update docstrings and remove Markdown dependency --- Project.toml | 3 - src/LearnBase.jl | 2 - src/costs.jl | 294 ++++++++++++++++++----------------------------- 3 files changed, 113 insertions(+), 186 deletions(-) diff --git a/Project.toml b/Project.toml index ab19398..01fe436 100644 --- a/Project.toml +++ b/Project.toml @@ -2,9 +2,6 @@ name = "LearnBase" uuid = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6" version = "0.3.0" -[deps] -Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" - [compat] julia = "1" diff --git a/src/LearnBase.jl b/src/LearnBase.jl index 1113049..f498db6 100644 --- a/src/LearnBase.jl +++ b/src/LearnBase.jl @@ -1,7 +1,5 @@ module LearnBase -using Markdown # for docstrings - # AGGREGATION MODES include("aggmode.jl") diff --git a/src/costs.jl b/src/costs.jl index 22fbeaf..8bf9f13 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -1,84 +1,76 @@ -@doc doc""" +""" Baseclass for any kind of cost. Notable examples for costs are `Loss` and `Penalty`. """ abstract type Cost end -@doc doc""" +""" Baseclass for all losses. A loss is some (possibly simplified) -function `L(features, targets, outputs)`, where `outputs` are the -result of some function `f(features)`. +function `L(x, y, ŷ)`, of features `x`, targets `y` and outputs +`ŷ = f(x)` for some function `f`. """ abstract type Loss <: Cost end -@doc doc""" +""" A loss is considered **supervised**, if all the information needed -to compute `L(features, targets, outputs)` are contained in -`targets` and `outputs`, and thus allows for the simplification -`L(targets, outputs)`. +to compute `L(x, y, ŷ)` are contained in `y` and `ŷ`, and thus allows +for the simplification `L(y, ŷ)`. """ abstract type SupervisedLoss <: Loss end -@doc doc""" +""" A supervised loss is considered **unary** if it can be written as a composition -`L(Ψ(target, output))` for some binary function `Ψ`. In this case the loss can -be evaluated with a single argument termed the **agreement** `Ψ(target, output)`. -Notable examples for unary supervised losses are distance-based (`Ψ(y,ŷ) = ŷ - y`) -and margin-based (`Ψ(y,ŷ) = ŷ*y`) losses. +`L(β(y, ŷ))` for some binary function `β`. In this case the loss can be evaluated +with a single argument termed the **agreement** `β(y, ŷ)`. Notable +examples for unary supervised losses are distance-based (`L(y,ŷ) = ψ(ŷ - y)`) +and margin-based (`L(y,ŷ) = ψ(y⋅ŷ)`) losses. """ abstract type UnarySupervisedLoss <: SupervisedLoss end -@doc doc""" -A supervised loss that can be simplified to -`L(targets, outputs) = L(outputs - targets)` is considered -**distance-based**. +""" +A supervised loss that can be simplified to `L(y, ŷ) = L(ŷ - y)` +is considered **distance-based**. """ abstract type DistanceLoss <: UnarySupervisedLoss end -@doc doc""" -A supervised loss, where the targets are in {-1, 1}, and which -can be simplified to `L(targets, outputs) = L(targets * outputs)` -is considered **margin-based**. +""" +A supervised loss with targets `y ∈ {-1, 1}`, and which +can be simplified to `L(y, ŷ) = L(y⋅ŷ)` is considered +**margin-based**. """ abstract type MarginLoss <: UnarySupervisedLoss end -@doc doc""" +""" A loss is considered **unsupervised**, if all the information needed -to compute `L(features, targets, outputs)` are contained in -`features` and `outputs`, and thus allows for the simplification -`L(features, outputs)`. +to compute `L(x, y, ŷ)` are contained in `x` and `ŷ`, and thus allows +for the simplification `L(x, ŷ)`. """ abstract type UnsupervisedLoss <: Loss end -@doc doc""" +""" Baseclass for all penalties. """ abstract type Penalty <: Cost end -@doc doc""" +""" value(loss, target, output) Compute the (non-negative) numeric result for the `loss` function return it. Note that `target` and `output` can be of different numeric type, in which case promotion is performed in the manner -appropriate for the given loss. - -```math -L : Y \times \mathbb{R} \rightarrow [0,\infty) -``` +appropriate for the given loss `L: Y × ℝ → [0,∞)`. # Arguments -- `loss::SupervisedLoss`: The loss-function ``L`` we want to - compute the value with. -- `target::Number`: The ground truth ``y \in Y`` of the observation. -- `output::Number`: The predicted output ``\hat{y} \in \mathbb{R}`` +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `target::Number`: The ground truth `y ∈ Y` of the observation. +- `output::Number`: The predicted output `ŷ ∈ ℝ`. for the observation. """ value(loss::SupervisedLoss, target::Number, output::Number) = MethodError(value, (loss, target, output)) -@doc doc""" +""" value(loss, targets, outputs, aggmode) -> Number Compute the weighted or unweighted sum or mean (depending on @@ -93,17 +85,17 @@ dimensions they share. # Arguments -- `loss::SupervisedLoss`: The loss-function ``L`` we are working with. -- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``. -- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``. -- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref), - [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or - [`AggMode.WeightedMean`](@ref). +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). """ value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) = MethodError(value, (loss, targets, outputs, aggmode)) -@doc doc""" +""" value(loss, targets, outputs, aggmode, obsdim) -> AbstractVector Compute the values of the `loss` function for each pair in @@ -118,20 +110,20 @@ not be vectors). # Arguments -- `loss::SupervisedLoss`: The loss-function ``L`` we are working with. -- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``. -- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``. -- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref), - [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or - [`AggMode.WeightedMean`](@ref). +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). - `obsdim::ObsDimension`: Specifies which of the array dimensions - denotes the observations. see `?ObsDim` for more information. + denotes the observations. See `?ObsDim` for more information. """ value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode, obsdim::ObsDimension) = MethodError(value, (loss, targets, outputs, aggmode, obsdim)) -@doc doc""" +""" value!(buffer, loss, targets, outputs, aggmode, obsdim) -> buffer Compute the values of the `loss` function for each pair in @@ -148,14 +140,14 @@ must not be vectors). - `buffer::AbstractArray`: Array to store the computed values in. Old values will be overwritten and lost. -- `loss::SupervisedLoss`: The loss-function ``L`` we are working with. -- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``. -- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``. -- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref), - [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or - [`AggMode.WeightedMean`](@ref). +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). - `obsdim::ObsDimension`: Specifies which of the array dimensions - denotes the observations. see `?ObsDim` for more information. + denotes the observations. See `?ObsDim` for more information. """ value!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode, obsdim::ObsDimension) = @@ -169,228 +161,168 @@ function deriv! end function deriv2! end function value_deriv! end -@doc doc""" +""" isconvex(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` denotes a convex function. -A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is convex if -its domain is a convex set and if for all ``x, y`` in that -domain, with ``\theta`` such that for ``0 \leq \theta \leq 1``, -we have - -```math -f(\theta x + (1 - \theta) y) \leq \theta f(x) + (1 - \theta) f(y) -``` +A function `f: ℝⁿ → ℝ` is convex if its domain is a convex set +and if for all `x, y` in that domain, with `θ` such that for +`0 ≦ θ ≦ 1`, we have `f(θ x + (1 - θ) y) ≦ θ f(x) + (1 - θ) f(y)`. """ isconvex(l) = isstrictlyconvex(l) -@doc doc""" +""" isstrictlyconvex(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` denotes a strictly convex function. -A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is -strictly convex if its domain is a convex set and if for all -``x, y`` in that domain where ``x \neq y``, with -``\theta`` such that for ``0 < \theta < 1``, we have - -```math -f(\theta x + (1 - \theta) y) < \theta f(x) + (1 - \theta) f(y) -``` +A function `f : ℝⁿ → ℝ` is strictly convex if its domain is a convex +set and if for all `x, y` in that domain where `x ≠ y`, with `θ` such +that for `0 < θ < 1`, we have `f(θ x + (1 - θ) y) < θ f(x) + (1 - θ) f(y)`. """ isstrictlyconvex(l) = isstronglyconvex(l) -@doc doc""" +""" isstronglyconvex(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` denotes a strongly convex function. -A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is -``m``-strongly convex if its domain is a convex set and if -``\forall x,y \in`` **dom** ``f`` where ``x \neq y``, -and ``\theta`` such that for ``0 \le \theta \le 1`` , we have - -```math -f(\theta x + (1 - \theta)y) < \theta f(x) + (1 - \theta) f(y) - 0.5 m \cdot \theta (1 - \theta) {\| x - y \|}_2^2 -``` +A function `f : ℝⁿ → ℝ` is `m`-strongly convex if its domain is a convex +set, and if for all `x, y` in that domain where `x ≠ y`, and `θ` such that +for `0 ≤ θ ≤ 1`, we have +`f(θ x + (1 - θ)y) < θ f(x) + (1 - θ) f(y) - 0.5 m ⋅ θ (1 - θ) | x - y |₂²` -In a more familiar setting, if the loss function is -differentiable we have - -```math -\left( \nabla f(x) - \nabla f(y) \right)^\top (x - y) \ge m {\| x - y\|}_2^2 -``` +In a more familiar setting, if the loss function is differentiable we have +`(∇f(x) - ∇f(y))ᵀ (x - y) ≥ m | x - y |₂²` """ isstronglyconvex(::SupervisedLoss) = false -@doc doc""" +""" isdifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool Return `true` if the given `loss` is differentiable (optionally limited to the given point `x` if specified). -A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}^{m}`` is -differentiable at a point ``x \in`` **int dom** ``f``, if there -exists a matrix ``Df(x) \in \mathbb{R}^{m \times n}`` such that +A function `f : ℝⁿ → ℝᵐ` is differentiable at a point `x` in the interior +domain of `f` if there exists a matrix `Df(x) ∈ ℝ^(m × n)` such that it satisfies: -```math -\lim_{z \neq x, z \to x} \frac{{\|f(z) - f(x) - Df(x)(z-x)\|}_2}{{\|z - x\|}_2} = 0 -``` +`lim_{z ≠ x, z → x} (|f(z) - f(x) - Df(x)(z-x)|₂) / |z - x|₂ = 0` A function is differentiable if its domain is open and it is -differentiable at every point ``x``. +differentiable at every point `x`. """ isdifferentiable(l::SupervisedLoss) = istwicedifferentiable(l) isdifferentiable(l::SupervisedLoss, at) = isdifferentiable(l) -@doc doc""" +""" istwicedifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool Return `true` if the given `loss` is differentiable (optionally limited to the given point `x` if specified). -A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}`` is -said to be twice differentiable at a point ``x \in`` **int -dom** ``f``, if the function derivative for ``\nabla f`` -exists at ``x``. - -```math -\nabla^2 f(x) = D \nabla f(x) -``` +A function `f : ℝⁿ → ℝ` is said to be twice differentiable +at a point `x` in the interior domain of `f`, if the function +derivative for `∇f` exists at `x`: `∇²f(x) = D∇f(x)`. A function is twice differentiable if its domain is open and it -is twice differentiable at every point ``x``. +is twice differentiable at every point `x`. """ istwicedifferentiable(::SupervisedLoss) = false istwicedifferentiable(l::SupervisedLoss, at) = istwicedifferentiable(l) -@doc doc""" +""" islocallylipschitzcont(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` function is locally-Lipschitz continous. -A supervised loss ``L : Y \times \mathbb{R} \rightarrow [0, \infty)`` -is called locally Lipschitz continuous if ``\forall a \ge 0`` -there exists a constant :math:`c_a \ge 0`, such that +A supervised loss `L : Y × ℝ → [0, ∞)` is called locally Lipschitz +continuous if for all `a ≥ 0` there exists a constant `cₐ ≥ 0`, +such that -```math -\sup_{y \in Y} \left| L(y,t) − L(y,t′) \right| \le c_a |t − t′|, \qquad t,t′ \in [−a,a] -``` +`sup_{y ∈ Y} | L(y,t) − L(y,t′) | ≤ cₐ |t − t′|, t, t′ ∈ [−a,a]` Every convex function is locally lipschitz continuous. """ islocallylipschitzcont(l) = isconvex(l) || islipschitzcont(l) -@doc doc""" +""" islipschitzcont(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` function is Lipschitz continuous. -A supervised loss function ``L : Y \times \mathbb{R} \rightarrow -[0, \infty)`` is Lipschitz continous, if there exists a finite -constant ``M < \infty`` such that - -```math -|L(y, t) - L(y, t′)| \le M |t - t′|, \qquad \forall (y, t) \in Y \times \mathbb{R} -``` +A supervised loss function `L : Y × ℝ → [0, ∞)` is Lipschitz continous, +if there exists a finite constant `M < ∞` such that +`|L(y, t) - L(y, t′)| ≤ M |t - t′|, ∀ (y, t) ∈ Y × ℝ` """ islipschitzcont(::SupervisedLoss) = false -@doc doc""" +""" isnemitski(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` denotes a Nemitski loss function. -We call a supervised loss function ``L : Y \times \mathbb{R} -\rightarrow [0,\infty)`` a Nemitski loss if there exist a -measurable function ``b : Y \rightarrow [0, \infty)`` and an -increasing function ``h : [0, \infty) \rightarrow [0, \infty)`` -such that - -```math -L(y,\hat{y}) \le b(y) + h(|\hat{y}|), \qquad (y, \hat{y}) \in Y \times \mathbb{R}. -``` +We call a supervised loss function `L : Y × ℝ → [0,∞)` a Nemitski +loss if there exist a measurable function `b : Y → [0, ∞)` and an +increasing function `h : [0, ∞) → [0, ∞)` such that +`L(y,ŷ) ≤ b(y) + h(|ŷ|), (y, ŷ) ∈ Y × ℝ` -If a loss if locally lipsschitz continuous then it is a Nemitski loss +If a loss if locally lipsschitz continuous then it is a Nemitski loss. """ isnemitski(l::SupervisedLoss) = islocallylipschitzcont(l) -@doc doc""" +""" isclipable(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` function is clipable. A -supervised loss ``L : Y \times \mathbb{R} \rightarrow [0, -\infty)`` can be clipped at ``M > 0`` if, for all ``(y,t) -\in Y \times \mathbb{R}``, - -```math -L(y, \hat{t}) \le L(y, t) -``` - -where ``\hat{t}`` denotes the clipped value of ``t`` at -``\pm M``. That is - -```math -\hat{t} = \begin{cases} -M & \quad \text{if } t < -M \\ t & \quad \text{if } t \in [-M, M] \\ M & \quad \text{if } t > M \end{cases} -``` +supervised loss `L : Y × ℝ → [0,∞)` can be clipped at `M > 0` +if, for all `(y,t) ∈ Y × ℝ`, `L(y, t̂) ≤ L(y, t)` where +`t̂` denotes the clipped value of `t` at `± M`. +That is +`t̂ = -M` if `t < -M`, `t̂ = t` if `t ∈ [-M, M]`, and `t = M` if `t > M`. """ isclipable(::SupervisedLoss) = false -@doc doc""" +""" isdistancebased(loss::SupervisedLoss) -> Bool -Return `true` ifthe given `loss` is a distance-based loss. +Return `true` if the given `loss` is a distance-based loss. -A supervised loss function ``L : Y \times \mathbb{R} \rightarrow -[0, \infty)`` is said to be **distance-based**, if there exists a -representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)`` -satisfying ``\psi (0) = 0`` and - -```math -L(y, \hat{y}) = \psi (\hat{y} - y), \qquad (y, \hat{y}) \in Y \times \mathbb{R} -``` +A supervised loss function `L : Y × ℝ → [0,∞)` is said to be +distance-based, if there exists a representing function `ψ : ℝ → [0,∞)` +satisfying `ψ(0) = 0` and `L(y, ŷ) = ψ (ŷ - y), (y, ŷ) ∈ Y × ℝ`. """ isdistancebased(::SupervisedLoss) = false -@doc doc""" +""" ismarginbased(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` is a margin-based loss. -A supervised loss function ``L : Y \times \mathbb{R} \rightarrow -[0, \infty)`` is said to be **margin-based**, if there exists a -representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)`` -satisfying - -```math -L(y, \hat{y}) = \psi (y \cdot \hat{y}), \qquad (y, \hat{y}) \in Y \times \mathbb{R} -``` +A supervised loss function `L : Y × ℝ → [0,∞)` is said to be +margin-based, if there exists a representing function `ψ : ℝ → [0,∞)` +satisfying `L(y, ŷ) = ψ(y⋅ŷ), (y, ŷ) ∈ Y × ℝ`. """ ismarginbased(::SupervisedLoss) = false -@doc doc""" +""" isclasscalibrated(loss::SupervisedLoss) -> Bool """ isclasscalibrated(::SupervisedLoss) = false -@doc doc""" +""" issymmetric(loss::SupervisedLoss) -> Bool Return `true` if the given loss is a symmetric loss. -A function ``f : \mathbb{R} \rightarrow [0,\infty)`` is said -to be symmetric about origin if we have +A function `f : ℝ → [0,∞)` is said to be symmetric +about origin if we have `f(x) = f(-x), ∀ x ∈ ℝ`. -```math -f(x) = f(-x), \qquad \forall x \in \mathbb{R} -``` - -A distance-based loss is said to be symmetric if its representing -function is symmetric. +A distance-based loss is said to be symmetric if its +representing function is symmetric. """ issymmetric(::SupervisedLoss) = false -@doc doc""" +""" isminimizable(loss::SupervisedLoss) -> Bool Return `true` if the given `loss` is a minimizable loss. From abb7cd48b69f0e8898276c822762f1f103bfe232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 16:16:26 -0300 Subject: [PATCH 06/15] Add docstrings for deriv function in costs.jl --- src/costs.jl | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/src/costs.jl b/src/costs.jl index 8bf9f13..01b815f 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -153,7 +153,77 @@ value!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outp aggmode::AggregateMode, obsdim::ObsDimension) = MethodError(value!, (buffer, loss, targets, outputs, aggmode, obsdim)) -function deriv end +""" + deriv(loss, target, output) -> Number + +Compute the derivative for the `loss` function +in respect to the `output`. Note that `target` and +`output` can be of different numeric type, in which +case promotion is performed in the manner appropriate +for the given loss. + +# Arguments + +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `target::Number`: The ground truth `y ∈ Y` of the observation. +- `output::Number`: The predicted output `ŷ ∈ ℝ` for the observation. +""" +deriv(loss::SupervisedLoss, target::Number, output::Number) = + MethodError(deriv, (loss, target, output)) + +""" + deriv(loss, targets, outputs, aggmode) -> Number + +Compute the weighted or unweighted sum or mean (depending on `aggmode`) +of the individual derivatives of the `loss` function for each pair +in `targets` and `outputs`. This method will not allocate a +temporary array. + +In the case that the two parameters are arrays with a different +number of dimensions, broadcast will be performed. Note that the +given parameters are expected to have the same size in the +dimensions they share. + +# Arguments + +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). +""" +deriv(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) = + MethodError(deriv, (loss, targets, outputs, aggmode)) + +""" + deriv(loss, targets, outputs, aggmode, obsdim) -> AbstractVector + +Compute the derivative of the `loss` function for each pair in +`targets` and `outputs` individually, and return either the +weighted or unweighted sum or mean for each observation (depending on +`aggmode`). This method will not allocate a temporary array, but +it will allocate the resulting vector. + +Both arrays have to be of the same shape and size. Furthermore +they have to have at least two array dimensions (i.e. they must +not be vectors). + +# Arguments + +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). +- `obsdim::ObsDimension`: Specifies which of the array dimensions + denotes the observations. See `?ObsDim` for more information. +""" +deriv(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, + aggmode::AggregateMode, obsdim::LearnBase.ObsDimension) = + MethodError(deriv, (loss, targets, outputs, aggmode, obsdim)) + function deriv2 end function value_deriv end From 70bd28a6765ec7ab523439826c490e54a0975224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 16:23:09 -0300 Subject: [PATCH 07/15] Add docstrings for deriv! function in costs.jl --- src/costs.jl | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/costs.jl b/src/costs.jl index 01b815f..91cdcb5 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -221,14 +221,41 @@ not be vectors). denotes the observations. See `?ObsDim` for more information. """ deriv(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, - aggmode::AggregateMode, obsdim::LearnBase.ObsDimension) = + aggmode::AggregateMode, obsdim::ObsDimension) = MethodError(deriv, (loss, targets, outputs, aggmode, obsdim)) -function deriv2 end -function value_deriv end +""" + deriv!(buffer, loss, targets, outputs, aggmode, obsdim) -> buffer + +Compute the derivative of the `loss` function for each pair in +`targets` and `outputs` individually, and return either the +weighted or unweighted sum or mean for each observation, depending on +`aggmode`. The results are stored into the given vector `buffer`. +This method will not allocate a temporary array. -function deriv! end +Both arrays have to be of the same shape and size. Furthermore +they have to have at least two array dimensions (i.e. so they +must not be vectors). + +# Arguments + +- `buffer::AbstractArray`: Array to store the computed values in. +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). +- `obsdim::ObsDimension`: Specifies which of the array dimensions + denotes the observations. See `?ObsDim` for more information. +""" +deriv!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, + aggmode::AggregateMode, obsdim::ObsDimension) = + MethodError(deriv!, (buffer, loss, targets, outputs, aggmode, obsdim)) + +function deriv2 end function deriv2! end +function value_deriv end function value_deriv! end """ From d2ee665ddbffa11060a771c77c9fe4e306a41024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 17:08:45 -0300 Subject: [PATCH 08/15] Add docstrings for deriv2 function in costs.jl --- src/costs.jl | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/src/costs.jl b/src/costs.jl index 91cdcb5..d559fdb 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -253,7 +253,76 @@ deriv!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outp aggmode::AggregateMode, obsdim::ObsDimension) = MethodError(deriv!, (buffer, loss, targets, outputs, aggmode, obsdim)) -function deriv2 end +""" + deriv2(loss, target, output) -> Number + +Compute the second derivative for the `loss` function +in respect to the `output`. Note that `target` and `output` +can be of different numeric type, in which case promotion is +performed in the manner appropriate for the given loss. + +# Arguments + +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `target::Number`: The ground truth `y ∈ Y` of the observation. +- `output::Number`: The predicted output `ŷ ∈ ℝ` for the observation. +""" +deriv2(loss::SupervisedLoss, target::Number, output::Number) = + MethodError(deriv2, (loss, target, output)) + +""" + deriv2(loss, targets, outputs, aggmode) -> Number + +Compute the weighted or unweighted sum or mean (depending on `aggmode`) +of the individual second derivatives of the `loss` function for +each pair in `targets` and `outputs`. This method will not +allocate a temporary array. + +In the case that the two parameters are arrays with a different +number of dimensions, broadcast will be performed. Note that the +given parameters are expected to have the same size in the +dimensions they share. + +# Arguments + +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). +""" +deriv2(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) = + MethodError(deriv2, (loss, targets, outputs, aggmode)) + +""" + deriv2(loss, targets, outputs, aggmode, obsdim) -> AbstractVector + +Compute the second derivative of the `loss` function for each pair in +`targets` and `outputs` individually, and return either the +weighted or unweighted sum or mean for each observation (depending on +`aggmode`). This method will not allocate a temporary array, but +it will allocate the resulting vector. + +Both arrays have to be of the same shape and size. Furthermore +they have to have at least two array dimensions (i.e. they must +not be vectors). + +# Arguments + +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). +- `obsdim::ObsDimension`: Specifies which of the array dimensions + denotes the observations. See `?ObsDim` for more information. +""" +deriv2(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, + aggmode::AggregateMode, obsdim::ObsDimension) = + MethodError(deriv2, (loss, targets, outputs, aggmode, obsdim)) + function deriv2! end function value_deriv end function value_deriv! end From 3d18408c317fd87b0dea1b4dc79e12be99f50d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 17:16:10 -0300 Subject: [PATCH 09/15] Move struct defs to outside testset --- test/costs.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/costs.jl b/test/costs.jl index cd94bad..03bd524 100644 --- a/test/costs.jl +++ b/test/costs.jl @@ -1,3 +1,8 @@ +# dummy types for testing +struct MyStronglyConvexType end +LearnBase.isstronglyconvex(::MyStronglyConvexType) = true +LearnBase.islipschitzcont(::MyStronglyConvexType) = true + @testset "Costs" begin @test LearnBase.Cost <: Any @test LearnBase.Penalty <: LearnBase.Cost @@ -33,9 +38,6 @@ @test typeof(LearnBase.issymmetric) <: Function # test fallback methods - struct MyStronglyConvexType end - LearnBase.isstronglyconvex(::MyStronglyConvexType) = true - LearnBase.islipschitzcont(::MyStronglyConvexType) = true @test LearnBase.isstronglyconvex(MyStronglyConvexType()) @test LearnBase.isstrictlyconvex(MyStronglyConvexType()) @test LearnBase.isconvex(MyStronglyConvexType()) From 0cf5da01f2606474b778fd5e7f619eb4198dc7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 17:20:56 -0300 Subject: [PATCH 10/15] Add docstrings for deriv2! function in costs.jl --- src/costs.jl | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/costs.jl b/src/costs.jl index d559fdb..858895c 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -139,7 +139,6 @@ must not be vectors). # Arguments - `buffer::AbstractArray`: Array to store the computed values in. - Old values will be overwritten and lost. - `loss::SupervisedLoss`: The loss function `L` we want to compute. - `targets::AbstractArray`: The array of ground truths `𝐲`. - `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. @@ -323,7 +322,35 @@ deriv2(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode, obsdim::ObsDimension) = MethodError(deriv2, (loss, targets, outputs, aggmode, obsdim)) -function deriv2! end +""" + deriv2!(buffer, loss, target, output, aggmode, obsdim) -> buffer + +Compute the second derivative of the `loss` function for each pair in +`targets` and `outputs` individually, and return either the +weighted or unweighted sum or mean for each observation, depending on +`aggmode`. The results are stored into the given vector `buffer`. +This method will not allocate a temporary array. + +Both arrays have to be of the same shape and size. Furthermore +they have to have at least two array dimensions (i.e. so they +must not be vectors). + +# Arguments + +- `buffer::AbstractArray`: Array to store the computed values in. +- `loss::SupervisedLoss`: The loss function `L` we want to compute. +- `targets::AbstractArray`: The array of ground truths `𝐲`. +- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`. +- `aggmode::AggregateMode`: Must be one of the following: + [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref), + [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref). +- `obsdim::ObsDimension`: Specifies which of the array dimensions + denotes the observations. See `?ObsDim` for more information. +""" +deriv2!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, + aggmode::AggregateMode, obsdim::ObsDimension) = + MethodError(deriv2!, (buffer, loss, targets, outputs, aggmode, obsdim)) + function value_deriv end function value_deriv! end From 22f5acebd0fcf06b4b46bf355e270cea49129ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 14 Apr 2020 17:40:01 -0300 Subject: [PATCH 11/15] Remove value_deriv and value_deriv! names --- src/costs.jl | 3 --- test/costs.jl | 2 -- 2 files changed, 5 deletions(-) diff --git a/src/costs.jl b/src/costs.jl index 858895c..10ec735 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -351,9 +351,6 @@ deriv2!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, out aggmode::AggregateMode, obsdim::ObsDimension) = MethodError(deriv2!, (buffer, loss, targets, outputs, aggmode, obsdim)) -function value_deriv end -function value_deriv! end - """ isconvex(loss::SupervisedLoss) -> Bool diff --git a/test/costs.jl b/test/costs.jl index 03bd524..02dacf2 100644 --- a/test/costs.jl +++ b/test/costs.jl @@ -19,8 +19,6 @@ LearnBase.islipschitzcont(::MyStronglyConvexType) = true @test typeof(LearnBase.deriv!) <: Function @test typeof(LearnBase.deriv2) <: Function @test typeof(LearnBase.deriv2!) <: Function - @test typeof(LearnBase.value_deriv) <: Function - @test typeof(LearnBase.value_deriv!) <: Function @test typeof(LearnBase.isminimizable) <: Function @test typeof(LearnBase.isdifferentiable) <: Function From 81ea0ea121bb41420a955efd6351a78606736ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Wed, 15 Apr 2020 09:26:54 -0300 Subject: [PATCH 12/15] Tests for ObsDim module --- test/aggmode.jl | 0 test/obsdim.jl | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ test/other.jl | 70 ----------------------------------------------- test/runtests.jl | 2 ++ 4 files changed, 73 insertions(+), 70 deletions(-) create mode 100644 test/aggmode.jl create mode 100644 test/obsdim.jl diff --git a/test/aggmode.jl b/test/aggmode.jl new file mode 100644 index 0000000..e69de29 diff --git a/test/obsdim.jl b/test/obsdim.jl new file mode 100644 index 0000000..ea856b7 --- /dev/null +++ b/test/obsdim.jl @@ -0,0 +1,71 @@ +@testset "ObsDim" begin + @testset "Type tree" begin + @test_throws MethodError LearnBase.ObsDim.Constant(2.0) + + @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDimension + @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.First + @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.Constant{1} + + @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDimension + @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDim.Last + + @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDimension + @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2} + end + + @testset "Constructors" begin + @test_throws ArgumentError convert(LearnBase.ObsDimension, "test") + @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0) + + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.First() + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.Constant(1) + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Last())) === LearnBase.ObsDim.Last() + @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Constant(2))) === LearnBase.ObsDim.Constant(2) + + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1) + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6) + @test convert(LearnBase.ObsDimension, 1) === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, 2) === LearnBase.ObsDim.Constant(2) + @test convert(LearnBase.ObsDimension, 6) === LearnBase.ObsDim.Constant(6) + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first) + @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first") + @test convert(LearnBase.ObsDimension, (:first,:last)) === (LearnBase.ObsDim.First(),LearnBase.ObsDim.Last()) + @test convert(LearnBase.ObsDimension, :first) === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, :begin) === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, "first") === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, "BEGIN") === LearnBase.ObsDim.First() + @test convert(LearnBase.ObsDimension, :end) === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, :last) === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, "End") === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, "LAST") === LearnBase.ObsDim.Last() + @test convert(LearnBase.ObsDimension, :nothing) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :none) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :na) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :null) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, :undefined) === LearnBase.ObsDim.Undefined() + @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined() + end + + struct SomeType end + @testset "Default values" begin + @testset "Arrays, SubArrays, and Sparse Arrays" begin + @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(rand(10,5))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === LearnBase.ObsDim.Last() + @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === LearnBase.ObsDim.Last() + end + + @testset "Types with no specified default" begin + @test @inferred(LearnBase.default_obsdim(SomeType())) === LearnBase.ObsDim.Undefined() + end + + @testset "Tuples" begin + @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Undefined()) + @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Last()) + @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Undefined()) + @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Last()) + end + end +end diff --git a/test/other.jl b/test/other.jl index 7f3ff21..bcae197 100644 --- a/test/other.jl +++ b/test/other.jl @@ -163,73 +163,3 @@ let s = [LearnBase.IntervalSet(0,1), LearnBase.DiscreteSet([0,1])] @test typeof(rand(s)) == Vector{Float64} @test typeof(rand(s, 2)) == Vector{Vector{Float64}} end - -@testset "obsdim typetree and Constructor" begin - @test_throws MethodError LearnBase.ObsDim.Constant(2.0) - - @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDimension - @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.First - @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.Constant{1} - - @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDimension - @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDim.Last - - @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDimension - @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2} -end - -@testset "obs_dim helper constructor" begin - @test_throws ArgumentError convert(LearnBase.ObsDimension, "test") - @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0) - - @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.First() - @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.Constant(1) - @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Last())) === LearnBase.ObsDim.Last() - @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Constant(2))) === LearnBase.ObsDim.Constant(2) - - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1) - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6) - @test convert(LearnBase.ObsDimension, 1) === LearnBase.ObsDim.First() - @test convert(LearnBase.ObsDimension, 2) === LearnBase.ObsDim.Constant(2) - @test convert(LearnBase.ObsDimension, 6) === LearnBase.ObsDim.Constant(6) - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first) - @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first") - @test convert(LearnBase.ObsDimension, (:first,:last)) === (LearnBase.ObsDim.First(),LearnBase.ObsDim.Last()) - @test convert(LearnBase.ObsDimension, :first) === LearnBase.ObsDim.First() - @test convert(LearnBase.ObsDimension, :begin) === LearnBase.ObsDim.First() - @test convert(LearnBase.ObsDimension, "first") === LearnBase.ObsDim.First() - @test convert(LearnBase.ObsDimension, "BEGIN") === LearnBase.ObsDim.First() - @test convert(LearnBase.ObsDimension, :end) === LearnBase.ObsDim.Last() - @test convert(LearnBase.ObsDimension, :last) === LearnBase.ObsDim.Last() - @test convert(LearnBase.ObsDimension, "End") === LearnBase.ObsDim.Last() - @test convert(LearnBase.ObsDimension, "LAST") === LearnBase.ObsDim.Last() - @test convert(LearnBase.ObsDimension, :nothing) === LearnBase.ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :none) === LearnBase.ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :na) === LearnBase.ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :null) === LearnBase.ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, :undefined) === LearnBase.ObsDim.Undefined() - @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined() -end - -struct SomeType end -@testset "obsdim default values" begin - @testset "Arrays, SubArrays, and Sparse Arrays" begin - @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === LearnBase.ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(rand(10,5))) === LearnBase.ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === LearnBase.ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === LearnBase.ObsDim.Last() - @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === LearnBase.ObsDim.Last() - end - - @testset "Types with no specified default" begin - @test @inferred(LearnBase.default_obsdim(SomeType())) === LearnBase.ObsDim.Undefined() - end - - @testset "Tuples" begin - @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Undefined()) - @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Last()) - @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Undefined()) - @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Last()) - end -end diff --git a/test/runtests.jl b/test/runtests.jl index b890291..7b1a1d3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,5 +2,7 @@ using LearnBase using SparseArrays using Test +include("aggmode.jl") +include("obsdim.jl") include("costs.jl") include("other.jl") From 4d762478e3360eac4abbad0caf8bfdcfdcf0706a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Wed, 15 Apr 2020 09:30:57 -0300 Subject: [PATCH 13/15] Tests for AggMode module --- test/aggmode.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/aggmode.jl b/test/aggmode.jl index e69de29..c801413 100644 --- a/test/aggmode.jl +++ b/test/aggmode.jl @@ -0,0 +1,7 @@ +@testset "AggMode" begin + @test typeof(LearnBase.AggMode.None()) <: LearnBase.AggregateMode + @test typeof(LearnBase.AggMode.Sum()) <: LearnBase.AggregateMode + @test typeof(LearnBase.AggMode.Mean()) <: LearnBase.AggregateMode + @test typeof(LearnBase.AggMode.WeightedSum([1,2,3])) <: LearnBase.AggregateMode + @test typeof(LearnBase.AggMode.WeightedMean([1,2,3])) <: LearnBase.AggregateMode +end From def0159bd177afbe71829b7097396afdedda7895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Wed, 15 Apr 2020 10:04:34 -0300 Subject: [PATCH 14/15] Update tests --- test/obsdim.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/obsdim.jl b/test/obsdim.jl index ea856b7..7be0952 100644 --- a/test/obsdim.jl +++ b/test/obsdim.jl @@ -1,3 +1,4 @@ +struct SomeType end @testset "ObsDim" begin @testset "Type tree" begin @test_throws MethodError LearnBase.ObsDim.Constant(2.0) @@ -46,7 +47,6 @@ @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined() end - struct SomeType end @testset "Default values" begin @testset "Arrays, SubArrays, and Sparse Arrays" begin @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last() From f8983e4b8cccf32f564e659cab246365eda900c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Wed, 15 Apr 2020 10:25:55 -0300 Subject: [PATCH 15/15] Remove UnarySupervisedLoss --- src/costs.jl | 76 ++++++++++++++++++++++++--------------------------- test/costs.jl | 7 ++--- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/src/costs.jl b/src/costs.jl index 10ec735..6b9d76b 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -18,27 +18,18 @@ for the simplification `L(y, ŷ)`. """ abstract type SupervisedLoss <: Loss end -""" -A supervised loss is considered **unary** if it can be written as a composition -`L(β(y, ŷ))` for some binary function `β`. In this case the loss can be evaluated -with a single argument termed the **agreement** `β(y, ŷ)`. Notable -examples for unary supervised losses are distance-based (`L(y,ŷ) = ψ(ŷ - y)`) -and margin-based (`L(y,ŷ) = ψ(y⋅ŷ)`) losses. -""" -abstract type UnarySupervisedLoss <: SupervisedLoss end - """ A supervised loss that can be simplified to `L(y, ŷ) = L(ŷ - y)` is considered **distance-based**. """ -abstract type DistanceLoss <: UnarySupervisedLoss end +abstract type DistanceLoss <: SupervisedLoss end """ A supervised loss with targets `y ∈ {-1, 1}`, and which can be simplified to `L(y, ŷ) = L(y⋅ŷ)` is considered **margin-based**. """ -abstract type MarginLoss <: UnarySupervisedLoss end +abstract type MarginLoss <: SupervisedLoss end """ A loss is considered **unsupervised**, if all the information needed @@ -352,27 +343,27 @@ deriv2!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, out MethodError(deriv2!, (buffer, loss, targets, outputs, aggmode, obsdim)) """ - isconvex(loss::SupervisedLoss) -> Bool + isconvex(loss) -> Bool Return `true` if the given `loss` denotes a convex function. A function `f: ℝⁿ → ℝ` is convex if its domain is a convex set and if for all `x, y` in that domain, with `θ` such that for `0 ≦ θ ≦ 1`, we have `f(θ x + (1 - θ) y) ≦ θ f(x) + (1 - θ) f(y)`. """ -isconvex(l) = isstrictlyconvex(l) +isconvex(loss::SupervisedLoss) = isstrictlyconvex(loss) """ - isstrictlyconvex(loss::SupervisedLoss) -> Bool + isstrictlyconvex(loss) -> Bool Return `true` if the given `loss` denotes a strictly convex function. A function `f : ℝⁿ → ℝ` is strictly convex if its domain is a convex set and if for all `x, y` in that domain where `x ≠ y`, with `θ` such that for `0 < θ < 1`, we have `f(θ x + (1 - θ) y) < θ f(x) + (1 - θ) f(y)`. """ -isstrictlyconvex(l) = isstronglyconvex(l) +isstrictlyconvex(loss::SupervisedLoss) = isstronglyconvex(loss) """ - isstronglyconvex(loss::SupervisedLoss) -> Bool + isstronglyconvex(loss) -> Bool Return `true` if the given `loss` denotes a strongly convex function. A function `f : ℝⁿ → ℝ` is `m`-strongly convex if its domain is a convex @@ -383,10 +374,10 @@ for `0 ≤ θ ≤ 1`, we have In a more familiar setting, if the loss function is differentiable we have `(∇f(x) - ∇f(y))ᵀ (x - y) ≥ m | x - y |₂²` """ -isstronglyconvex(::SupervisedLoss) = false +isstronglyconvex(loss::SupervisedLoss) = false """ - isdifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool + isdifferentiable(loss, [x]) -> Bool Return `true` if the given `loss` is differentiable (optionally limited to the given point `x` if specified). @@ -400,11 +391,11 @@ it satisfies: A function is differentiable if its domain is open and it is differentiable at every point `x`. """ -isdifferentiable(l::SupervisedLoss) = istwicedifferentiable(l) -isdifferentiable(l::SupervisedLoss, at) = isdifferentiable(l) +isdifferentiable(loss::SupervisedLoss) = istwicedifferentiable(loss) +isdifferentiable(loss::SupervisedLoss, at) = isdifferentiable(loss) """ - istwicedifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool + istwicedifferentiable(loss, [x]) -> Bool Return `true` if the given `loss` is differentiable (optionally limited to the given point `x` if specified). @@ -416,11 +407,11 @@ derivative for `∇f` exists at `x`: `∇²f(x) = D∇f(x)`. A function is twice differentiable if its domain is open and it is twice differentiable at every point `x`. """ -istwicedifferentiable(::SupervisedLoss) = false -istwicedifferentiable(l::SupervisedLoss, at) = istwicedifferentiable(l) +istwicedifferentiable(loss::SupervisedLoss) = false +istwicedifferentiable(loss::SupervisedLoss, at) = istwicedifferentiable(loss) """ - islocallylipschitzcont(loss::SupervisedLoss) -> Bool + islocallylipschitzcont(loss) -> Bool Return `true` if the given `loss` function is locally-Lipschitz continous. @@ -433,10 +424,11 @@ such that Every convex function is locally lipschitz continuous. """ -islocallylipschitzcont(l) = isconvex(l) || islipschitzcont(l) +islocallylipschitzcont(loss::SupervisedLoss) = + isconvex(loss) || islipschitzcont(loss) """ - islipschitzcont(loss::SupervisedLoss) -> Bool + islipschitzcont(loss) -> Bool Return `true` if the given `loss` function is Lipschitz continuous. @@ -444,10 +436,10 @@ A supervised loss function `L : Y × ℝ → [0, ∞)` is Lipschitz continous, if there exists a finite constant `M < ∞` such that `|L(y, t) - L(y, t′)| ≤ M |t - t′|, ∀ (y, t) ∈ Y × ℝ` """ -islipschitzcont(::SupervisedLoss) = false +islipschitzcont(loss::SupervisedLoss) = false """ - isnemitski(loss::SupervisedLoss) -> Bool + isnemitski(loss) -> Bool Return `true` if the given `loss` denotes a Nemitski loss function. @@ -458,10 +450,10 @@ increasing function `h : [0, ∞) → [0, ∞)` such that If a loss if locally lipsschitz continuous then it is a Nemitski loss. """ -isnemitski(l::SupervisedLoss) = islocallylipschitzcont(l) +isnemitski(loss::SupervisedLoss) = islocallylipschitzcont(loss) """ - isclipable(loss::SupervisedLoss) -> Bool + isclipable(loss) -> Bool Return `true` if the given `loss` function is clipable. A supervised loss `L : Y × ℝ → [0,∞)` can be clipped at `M > 0` @@ -470,10 +462,10 @@ if, for all `(y,t) ∈ Y × ℝ`, `L(y, t̂) ≤ L(y, t)` where That is `t̂ = -M` if `t < -M`, `t̂ = t` if `t ∈ [-M, M]`, and `t = M` if `t > M`. """ -isclipable(::SupervisedLoss) = false +isclipable(loss::SupervisedLoss) = false """ - isdistancebased(loss::SupervisedLoss) -> Bool + isdistancebased(loss) -> Bool Return `true` if the given `loss` is a distance-based loss. @@ -481,10 +473,11 @@ A supervised loss function `L : Y × ℝ → [0,∞)` is said to be distance-based, if there exists a representing function `ψ : ℝ → [0,∞)` satisfying `ψ(0) = 0` and `L(y, ŷ) = ψ (ŷ - y), (y, ŷ) ∈ Y × ℝ`. """ -isdistancebased(::SupervisedLoss) = false +isdistancebased(loss::Loss) = false +isdistancebased(loss::DistanceLoss) = true """ - ismarginbased(loss::SupervisedLoss) -> Bool + ismarginbased(loss) -> Bool Return `true` if the given `loss` is a margin-based loss. @@ -492,15 +485,16 @@ A supervised loss function `L : Y × ℝ → [0,∞)` is said to be margin-based, if there exists a representing function `ψ : ℝ → [0,∞)` satisfying `L(y, ŷ) = ψ(y⋅ŷ), (y, ŷ) ∈ Y × ℝ`. """ -ismarginbased(::SupervisedLoss) = false +ismarginbased(loss::Loss) = false +ismarginbased(loss::MarginLoss) = true """ - isclasscalibrated(loss::SupervisedLoss) -> Bool + isclasscalibrated(loss) -> Bool """ -isclasscalibrated(::SupervisedLoss) = false +isclasscalibrated(loss::SupervisedLoss) = false """ - issymmetric(loss::SupervisedLoss) -> Bool + issymmetric(loss) -> Bool Return `true` if the given loss is a symmetric loss. @@ -510,11 +504,11 @@ about origin if we have `f(x) = f(-x), ∀ x ∈ ℝ`. A distance-based loss is said to be symmetric if its representing function is symmetric. """ -issymmetric(::SupervisedLoss) = false +issymmetric(loss::SupervisedLoss) = false """ - isminimizable(loss::SupervisedLoss) -> Bool + isminimizable(loss) -> Bool Return `true` if the given `loss` is a minimizable loss. """ -isminimizable(l) = isconvex(l) +isminimizable(loss::SupervisedLoss) = isconvex(loss) diff --git a/test/costs.jl b/test/costs.jl index 02dacf2..8b3261c 100644 --- a/test/costs.jl +++ b/test/costs.jl @@ -1,5 +1,5 @@ # dummy types for testing -struct MyStronglyConvexType end +struct MyStronglyConvexType <: LearnBase.SupervisedLoss end LearnBase.isstronglyconvex(::MyStronglyConvexType) = true LearnBase.islipschitzcont(::MyStronglyConvexType) = true @@ -9,9 +9,8 @@ LearnBase.islipschitzcont(::MyStronglyConvexType) = true @test LearnBase.Loss <: LearnBase.Cost @test LearnBase.UnsupervisedLoss <: LearnBase.Loss @test LearnBase.SupervisedLoss <: LearnBase.Loss - @test LearnBase.UnarySupervisedLoss <: LearnBase.SupervisedLoss - @test LearnBase.MarginLoss <: LearnBase.UnarySupervisedLoss - @test LearnBase.DistanceLoss <: LearnBase.UnarySupervisedLoss + @test LearnBase.MarginLoss <: LearnBase.SupervisedLoss + @test LearnBase.DistanceLoss <: LearnBase.SupervisedLoss @test typeof(LearnBase.value) <: Function @test typeof(LearnBase.value!) <: Function