From b4067eced05911620692300447d63255c21ae9e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 10:28:22 -0300
Subject: [PATCH 01/15] Refactor codebase

---
 Project.toml     |   4 +-
 src/LearnBase.jl | 582 +----------------------------------------------
 src/costs.jl     | 296 ++++++++++++++++++++++++
 src/other.jl     | 384 +++++++++++++++++++++++++++++++
 test/costs.jl    |  44 ++++
 test/other.jl    | 235 +++++++++++++++++++
 test/runtests.jl | 309 +------------------------
 7 files changed, 967 insertions(+), 887 deletions(-)
 create mode 100644 src/costs.jl
 create mode 100644 src/other.jl
 create mode 100644 test/costs.jl
 create mode 100644 test/other.jl

diff --git a/Project.toml b/Project.toml
index f59f20c..ab19398 100644
--- a/Project.toml
+++ b/Project.toml
@@ -3,11 +3,9 @@ uuid = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6"
 version = "0.3.0"
 
 [deps]
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
 [compat]
-StatsBase = "0.33"
 julia = "1"
 
 [extras]
diff --git a/src/LearnBase.jl b/src/LearnBase.jl
index e849aba..867db18 100644
--- a/src/LearnBase.jl
+++ b/src/LearnBase.jl
@@ -1,583 +1,11 @@
 module LearnBase
 
-# Only reexport required functions by default
-import StatsBase: nobs, fit, fit!, predict, params, params!
+using Markdown # for docstrings
 
-import LinearAlgebra: issymmetric
+# LEARNING COSTS (e.g. loss & penalty)
+include("costs.jl")
 
-"""
-Baseclass for any kind of cost. Notable examples for
-costs are `Loss` and `Penalty`.
-"""
-abstract type Cost end
-
-"""
-Baseclass for all losses. A loss is some (possibly simplified)
-function `L(features, targets, outputs)`, where `outputs` are the
-result of some function `f(features)`.
-"""
-abstract type Loss <: Cost end
-
-"""
-A loss is considered **supervised**, if all the information needed
-to compute `L(features, targets, outputs)` are contained in
-`targets` and `outputs`, and thus allows for the simplification
-`L(targets, outputs)`.
-"""
-abstract type SupervisedLoss <: Loss end
-
-"""
-A supervised loss, where the targets are in {-1, 1}, and which
-can be simplified to `L(targets, outputs) = L(targets * outputs)`
-is considered **margin-based**.
-"""
-abstract type MarginLoss <: SupervisedLoss end
-
-
-"""
-A supervised loss that can be simplified to
-`L(targets, outputs) = L(targets - outputs)` is considered
-distance-based.
-"""
-abstract type DistanceLoss <: SupervisedLoss end
-
-"""
-A loss is considered **unsupervised**, if all the information needed
-to compute `L(features, targets, outputs)` are contained in
-`features` and `outputs`, and thus allows for the simplification
-`L(features, outputs)`.
-"""
-abstract type UnsupervisedLoss <: Loss end
-
-abstract type Penalty <: Cost end
-
-function scaled end
-
-function value end
-function value! end
-function meanvalue end
-function sumvalue end
-function meanderiv end
-function sumderiv end
-function deriv end
-function deriv2 end
-function deriv! end
-function value_deriv end
-function addgrad! end
-function value_grad end
-function value_grad! end
-function prox end
-function prox! end
-
-"Return the gradient of the learnable parameters w.r.t. some objective"
-function grad end
-"Do a backward pass, updating the gradients of learnable parameters and/or inputs"
-function grad! end
-
-function isminimizable end
-function isdifferentiable end
-function istwicedifferentiable end
-function isconvex end
-function isstrictlyconvex end
-function isstronglyconvex end
-function isnemitski end
-function isunivfishercons end
-function isfishercons end
-function islipschitzcont end
-function islocallylipschitzcont end
-function islipschitzcont_deriv end # maybe overkill
-function isclipable end
-function ismarginbased end
-function isclasscalibrated end
-function isdistancebased end
-
-# fallback to supersets
-isstrictlyconvex(any) = isstronglyconvex(any)
-isconvex(any) = isstrictlyconvex(any)
-islocallylipschitzcont(any) = islipschitzcont(any)
-
-"""
-Anything that takes an input and performs some kind
-of function to produce an output. For example a linear
-prediction function.
-"""
-abstract type Transformation end
-abstract type StochasticTransformation <: Transformation end
-abstract type Learnable <: Transformation end
-
-function transform end
-"Do a forward pass, and return the output"
-function transform! end
-
-"""
-Baseclass for any prediction model that can be minimized.
-This means that an object of a subclass contains all the
-information needed to compute its own current loss.
-"""
-abstract type Minimizable <: Learnable end
-
-function update end
-function update! end
-function learn end
-function learn! end
-
-#    getobs(data, [idx], [obsdim])
-#
-# Return the observations corresponding to the observation-index
-# `idx`. Note that `idx` can be of type `Int` or `AbstractVector`.
-# Both options must be supported by a custom type.
-#
-# The returned observation(s) should be in the form intended to
-# be passed as-is to some learning algorithm. There is no strict
-# interface requirement on how this "actual data" must look like.
-# Every author behind some custom data container can make this
-# decision him-/herself. We do, however, expect it to be consistent
-# for `idx` being an integer, as well as `idx` being an abstract
-# vector, respectively.
-#
-# If it makes sense for the type of `data`, `obsdim` can be used
-# to disptach on which dimension of `data` denotes the observations.
-# See `?ObsDim`
-#
-# This function is implemented in MLDataPattern
-function getobs end
-
-#    getobs!(buffer, data, [idx], [obsdim])
-#
-# Inplace version of `getobs(data, idx, obsdim)`. If this method
-# is defined for the type of `data`, then `buffer` should be used
-# to store the result, instead of allocating a dedicated object.
-#
-# Implementing this function is optional. In the case no such
-# method is provided for the type of `data`, then `buffer` will be
-# *ignored* and the result of `getobs` returned. This could be
-# because the type of `data` may not lend itself to the concept
-# of `copy!`. Thus supporting a custom `getobs!(::MyType, ...)`
-# is optional and not required.
-#
-# If it makes sense for the type of `data`, `obsdim` can be used
-# to disptach on which dimension of `data` denotes the observations.
-# See `?ObsDim`
-#
-# This function is implemented in MLDataPattern
-function getobs! end
-
-# --------------------------------------------------------------------
-
-#    gettarget([f], observation)
-#
-# Use `f` (if provided) to extract the target from the single
-# `observation` and return it. It is used internally by
-# `targets` (only if `f` is provided) and by
-# `eachtarget` (always) on each individual observation.
-#
-# Even though this function is not exported, it is intended to be
-# extended by users to support their custom data storage types.
-#
-# This function is implemented in MLDataPattern
-function gettarget end # not exported
-
-#    gettargets(data, [idx], [obsdim])
-#
-# Return the targets corresponding to the observation-index `idx`.
-# Note that `idx` can be of type `Int` or `AbstractVector`.
-#
-# Implementing this function for a custom type of `data` is
-# optional. It is particularly useful if the targets in `data` can
-# be provided without invoking `getobs`. For example if you have a
-# remote data-source where the labels are part of some metadata
-# that is locally available.
-#
-# If it makes sense for the type of `data`, `obsdim` can be used
-# to disptach on which dimension of `data` denotes the observations.
-# See `?ObsDim`
-#
-# This function is implemented in MLDataPattern
-function gettargets end # not exported
-
-#    targets([f], data, [obsdim])
-#
-# This function is implemented in MLDataPattern
-function targets end
-
-# --------------------------------------------------------------------
-
-"""
-    abstract DataView{TElem, TData} <: AbstractVector{TElem}
-
-Baseclass for all vector-like views of some data structure.
-This allow for example to see some design matrix as a vector of
-individual observation-vectors instead of one matrix.
-
-see `MLDataPattern.ObsView` and `MLDataPattern.BatchView` for examples.
-"""
-abstract type DataView{TElem, TData} <: AbstractVector{TElem} end
-
-"""
-    abstract AbstractObsView{TElem, TData} <: DataView{TElem, TData}
-
-Baseclass for all vector-like views of some data structure,
-that views it as some form or vector of observations.
-
-see `MLDataPattern.ObsView` for a concrete example.
-"""
-abstract type AbstractObsView{TElem, TData} <: DataView{TElem, TData} end
-
-"""
-    abstract AbstractBatchView{TElem, TData} <: DataView{TElem, TData}
-
-Baseclass for all vector-like views of some data structure,
-that views it as some form or vector of equally sized batches.
-
-see `MLDataPattern.BatchView` for a concrete example.
-"""
-abstract type AbstractBatchView{TElem, TData} <: DataView{TElem, TData} end
-
-# --------------------------------------------------------------------
-
-"""
-    abstract DataIterator{TElem,TData}
-
-Baseclass for all types that iterate over a `data` source
-in some manner. The total number of observations may or may
-not be known or defined and in general there is no contract that
-`getobs` or `nobs` has to be supported by the type of `data`.
-Furthermore, `length` should be used to query how many elements
-the iterator can provide, while `nobs` may return the underlying
-true amount of observations available (if known).
-
-see `MLDataPattern.RandomObs`, `MLDataPattern.RandomBatches`
-"""
-abstract type DataIterator{TElem,TData} end
-
-"""
-    abstract ObsIterator{TElem,TData} <: DataIterator{TElem,TData}
-
-Baseclass for all types that iterate over some data source
-one observation at a time.
-
-```julia
-using MLDataPattern
-@assert typeof(RandomObs(X)) <: ObsIterator
-
-for x in RandomObs(X)
-    # ...
-end
-```
-
-see `MLDataPattern.RandomObs`
-"""
-abstract type ObsIterator{TElem,TData} <: DataIterator{TElem,TData} end
-
-"""
-    abstract BatchIterator{TElem,TData} <: DataIterator{TElem,TData}
-
-Baseclass for all types that iterate over of some data source one
-batch at a time.
-
-```julia
-@assert typeof(RandomBatches(X, size=10)) <: BatchIterator
-
-for x in RandomBatches(X, size=10)
-    @assert nobs(x) == 10
-    # ...
-end
-```
-
-see `MLDataPattern.RandomBatches`
-"""
-abstract type BatchIterator{TElem,TData} <: DataIterator{TElem,TData} end
-
-# --------------------------------------------------------------------
-
-# just for dispatch for those who care to
-const AbstractDataIterator{E,T}  = Union{DataIterator{E,T}, DataView{E,T}}
-const AbstractObsIterator{E,T}   = Union{ObsIterator{E,T},  AbstractObsView{E,T}}
-const AbstractBatchIterator{E,T} = Union{BatchIterator{E,T},AbstractBatchView{E,T}}
-
-# --------------------------------------------------------------------
-
-#    datasubset(data, [idx], [obsdim])
-#
-# Return a lazy subset of the observations in `data` that correspond
-# to the given `idx`. No data should be copied except of the
-# indices. Note that `idx` can be of type `Int` or `AbstractVector`.
-# Both options must be supported by a custom type.
-#
-# If it makes sense for the type of `data`, `obsdim` can be used
-# to disptach on which dimension of `data` denotes the observations.
-# See `?ObsDim`
-#
-# This function is implemented in MLDataPattern
-function datasubset end
-
-"""
-    default_obsdim(data)
-
-The specify the default obsdim for a specific type of data.
-Defaults to `ObsDim.Undefined()`
-"""
-function default_obsdim end
-
-# just for dispatch for those who care to
-"see `?ObsDim`"
-abstract type ObsDimension end
-
-"""
-    module ObsDim
-
-Singleton types to define which dimension of some data structure
-(e.g. some `Array`) denotes the observations.
-
-- `ObsDim.First()`
-- `ObsDim.Last()`
-- `ObsDim.Constant(dim)`
-
-Used for efficient dispatching
-"""
-module ObsDim
-    using ..LearnBase: ObsDimension
-
-    """
-    Default value for most functions. Denotes that the concept of
-    an observation dimension is not defined for the given data.
-    """
-    struct Undefined <: ObsDimension end
-
-    """
-        ObsDim.Last <: ObsDimension
-
-    Defines that the last dimension denotes the observations
-    """
-    struct Last <: ObsDimension end
-
-    """
-        ObsDim.Constant{DIM} <: ObsDimension
-
-    Defines that the dimension `DIM` denotes the observations
-    """
-    struct Constant{DIM} <: ObsDimension end
-    Constant(dim::Int) = Constant{dim}()
-
-    """
-        ObsDim.First <: ObsDimension
-
-    Defines that the first dimension denotes the observations
-    """
-    const First = Constant{1}
-end
-
-Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
-Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim
-Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined()
-Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim)
-Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim)))
-Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims)
-function Base.convert(::Type{ObsDimension}, dim::Symbol)
-    if dim == :first || dim == :begin
-        ObsDim.First()
-    elseif dim == Symbol("end") || dim == :last
-        ObsDim.Last()
-    elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined
-        ObsDim.Undefined()
-    else
-        throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
-    end
-end
-
-@deprecate obs_dim(dim) convert(ObsDimension, dim)
-
-default_obsdim(data) = ObsDim.Undefined()
-default_obsdim(A::AbstractArray) = ObsDim.Last()
-default_obsdim(tup::Tuple) = map(default_obsdim, tup)
-
-
-import Base: AbstractSet
-
-"A continuous range (inclusive) between a lo and a hi"
-struct IntervalSet{T} <: AbstractSet{T}
-    lo::T
-    hi::T
-end
-function IntervalSet(lo::A, hi::B) where {A,B}
-    T = promote_type(A,B)
-    IntervalSet{T}(convert(T,lo), convert(T,hi))
-end
-
-# numeric interval
-randtype(s::IntervalSet{T}) where T <: Number = Float64
-Base.rand(s::IntervalSet{T}, dims::Integer...) where T <: Number = rand(dims...) .* (s.hi - s.lo) .+ s.lo
-Base.in(x::Number, s::IntervalSet{T}) where T <: Number = s.lo <= x <= s.hi
-Base.length(s::IntervalSet{T}) where T <: Number = 1
-Base.:(==)(s1::IntervalSet{T}, s2::IntervalSet{T}) where T = s1.lo == s2.lo && s1.hi == s2.hi
-
-
-# vector of intervals
-randtype(s::IntervalSet{T}) where T <: AbstractVector = Vector{Float64}
-Base.rand(s::IntervalSet{T}) where T <: AbstractVector = Float64[rand() * (s.hi[i] - s.lo[i]) + s.lo[i] for i=1:length(s)]
-Base.in(x::AbstractVector, s::IntervalSet{T}) where T <: AbstractVector = all(i -> s.lo[i] <= x[i] <= s.hi[i], 1:length(s))
-Base.length(s::IntervalSet{T}) where T <: AbstractVector = length(s.lo)
-
-
-"Set of discrete items"
-struct DiscreteSet{T<:AbstractArray} <: AbstractSet{T}
-    items::T
-end
-randtype(s::DiscreteSet) = eltype(s.items)
-Base.rand(s::DiscreteSet, dims::Integer...) = rand(s.items, dims...)
-Base.in(x, s::DiscreteSet) = x in s.items
-Base.length(s::DiscreteSet) = length(s.items)
-Base.getindex(s::DiscreteSet, i::Int) = s.items[i]
-Base.:(==)(s1::DiscreteSet, s2::DiscreteSet) = s1.items == s2.items
-
-
-# operations on arrays of sets
-randtype(sets::AbstractArray{S,N}) where {S <: AbstractSet, N} = Array{promote_type(map(randtype, sets)...), N}
-Base.rand(sets::AbstractArray{S}) where S <: AbstractSet = eltype(randtype(sets))[rand(s) for s in sets]
-function Base.rand(sets::AbstractArray{S}, dim1::Integer, dims::Integer...) where S <: AbstractSet
-    A = Array{randtype(sets)}(undef, dim1, dims...)
-    for i in eachindex(A)
-        A[i] = rand(sets)
-    end
-    A
-end
-function Base.in(xs::AbstractArray, sets::AbstractArray{S}) where S <: AbstractSet
-    size(xs) == size(sets) && all(map(in, xs, sets))
-end
-
-
-"Groups several heterogenous sets. Used mainly for proper dispatch."
-struct TupleSet{T<:Tuple} <: AbstractSet{T}
-    sets::T
-end
-TupleSet(sets::AbstractSet...) = TupleSet(sets)
-
-# rand can return arrays or tuples, but defaults to arrays
-randtype(sets::TupleSet, ::Type{Vector}) = Vector{promote_type(map(randtype, sets.sets)...)}
-Base.rand(sets::TupleSet, ::Type{Vector}) = eltype(randtype(sets, Vector))[rand(s) for s in sets.sets]
-randtype(sets::TupleSet, ::Type{Tuple}) = Tuple{map(randtype, sets.sets)...}
-Base.rand(sets::TupleSet, ::Type{Tuple}) = map(rand, sets.sets)
-function Base.rand(sets::TupleSet, ::Type{OT}, dim1::Integer, dims::Integer...) where OT
-    A = Array{randtype(sets, OT)}(undef, dim1, dims...)
-    for i in eachindex(A)
-        A[i] = rand(sets, OT)
-    end
-    A
-end
-Base.length(sets::TupleSet) = sum(length(s) for s in sets.sets)
-Base.iterate(sets::TupleSet) = iterate(sets.sets)
-Base.iterate(sets::TupleSet, i) = iterate(sets.sets, i)
-
-randtype(sets::TupleSet) = randtype(sets, Vector)
-Base.rand(sets::TupleSet, dims::Integer...) = rand(sets, Vector, dims...)
-Base.in(x, sets::TupleSet) = all(map(in, x, sets.sets))
-
-"Returns an AbstractSet representing valid input values"
-function inputdomain end
-
-"Returns an AbstractSet representing valid output/target values"
-function targetdomain end
-
-
-export
-
-    # Types
-    Cost,
-        Loss,
-            SupervisedLoss,
-                MarginLoss,
-                DistanceLoss,
-            UnsupervisedLoss,
-        Penalty,
-
-    Transformation,
-        Learnable,
-        StochasticTransformation,
-
-    Minimizable,
-
-    AbstractSet,
-        IntervalSet,
-        DiscreteSet,
-        TupleSet,
-
-    # Functions
-    getobs,
-    getobs!,
-
-    scaled,
-
-    learn,
-    learn!,
-    update,
-    update!,
-    transform,
-    transform!,
-    value,
-    value!,
-    meanvalue,
-    sumvalue,
-    meanderiv,
-    sumderiv,
-    deriv,
-    deriv!,
-    params,
-    params!,
-    grad,
-    grad!,
-    addgrad!,
-    deriv2,
-    value_deriv,
-    value_deriv!,
-    value_grad,
-    value_grad!,
-    prox,
-    prox!,
-    inputdomain,
-    targetdomain,
-
-    isminimizable,
-    isdifferentiable,
-    istwicedifferentiable,
-    isconvex,
-    isstrictlyconvex,
-    isstronglyconvex,
-    isnemitski,
-    isunivfishercons,
-    isfishercons,
-    islipschitzcont,
-    islocallylipschitzcont,
-    islipschitzcont_deriv,
-    isclipable,
-    ismarginbased,
-    isclasscalibrated,
-    isdistancebased,
-
-    # LinearAlgebra
-    issymmetric,
-
-    getobs,
-    getobs!,
-    datasubset,
-    ObsDim,
-
-    targets,
-
-    AbstractDataIterator,
-        AbstractObsIterator,
-        AbstractBatchIterator,
-
-    DataView,
-        AbstractObsView,
-        AbstractBatchView,
-
-    DataIterator,
-        ObsIterator,
-        BatchIterator,
-
-    # StatsBase
-    fit,
-    fit!,
-    nobs
+# OTHER CONCEPTS
+include("other.jl")
 
 end # module
diff --git a/src/costs.jl b/src/costs.jl
new file mode 100644
index 0000000..525b4e3
--- /dev/null
+++ b/src/costs.jl
@@ -0,0 +1,296 @@
+@doc doc"""
+Baseclass for any kind of cost. Notable examples for
+costs are `Loss` and `Penalty`.
+"""
+abstract type Cost end
+
+@doc doc"""
+Baseclass for all losses. A loss is some (possibly simplified)
+function `L(features, targets, outputs)`, where `outputs` are the
+result of some function `f(features)`.
+"""
+abstract type Loss <: Cost end
+
+@doc doc"""
+A loss is considered **supervised**, if all the information needed
+to compute `L(features, targets, outputs)` are contained in
+`targets` and `outputs`, and thus allows for the simplification
+`L(targets, outputs)`.
+"""
+abstract type SupervisedLoss <: Loss end
+
+@doc doc"""
+A supervised loss is considered **unary** if it can be written as a composition
+`L(Ψ(output, target))` for some binary function `Ψ`. In this case the loss can
+be evaluated with a single argument termed the **agreement** `Ψ(output, target)`.
+Notable examples for unary supervised losses are distance-based (`Ψ(ŷ,y) = ŷ - y`)
+and margin-based (`Ψ(ŷ,y) = ŷ*y`) losses.
+"""
+abstract type UnarySupervisedLoss <: SupervisedLoss end
+
+@doc doc"""
+A supervised loss that can be simplified to
+`L(targets, outputs) = L(targets - outputs)` is considered
+**distance-based**.
+"""
+abstract type DistanceLoss <: UnarySupervisedLoss end
+
+@doc doc"""
+A supervised loss, where the targets are in {-1, 1}, and which
+can be simplified to `L(targets, outputs) = L(targets * outputs)`
+is considered **margin-based**.
+"""
+abstract type MarginLoss <: UnarySupervisedLoss end
+
+@doc doc"""
+A loss is considered **unsupervised**, if all the information needed
+to compute `L(features, targets, outputs)` are contained in
+`features` and `outputs`, and thus allows for the simplification
+`L(features, outputs)`.
+"""
+abstract type UnsupervisedLoss <: Loss end
+
+@doc doc"""
+Baseclass for all penalties.
+"""
+abstract type Penalty <: Cost end
+
+function value end
+function value! end
+
+function deriv end
+function deriv! end
+
+function deriv2 end
+function deriv2! end
+
+function value_deriv end
+function value_deriv! end
+
+@doc doc"""
+    isconvex(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` denotes a convex function.
+A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is convex if
+its domain is a convex set and if for all ``x, y`` in that
+domain, with ``\theta`` such that for ``0 \leq \theta \leq 1``,
+we have
+
+```math
+f(\theta x + (1 - \theta) y) \leq \theta f(x) + (1 - \theta) f(y)
+```
+"""
+isconvex(l) = isstrictlyconvex(l)
+
+@doc doc"""
+    isstrictlyconvex(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` denotes a strictly convex function.
+A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is
+strictly convex if its domain is a convex set and if for all
+``x, y`` in that domain where ``x \neq y``, with
+``\theta`` such that for ``0 < \theta < 1``, we have
+
+```math
+f(\theta x + (1 - \theta) y) < \theta f(x) + (1 - \theta) f(y)
+```
+"""
+isstrictlyconvex(l) = isstronglyconvex(l)
+
+@doc doc"""
+    isstronglyconvex(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` denotes a strongly convex function.
+A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is
+``m``-strongly convex if its domain is a convex set and if
+``\forall x,y \in`` **dom** ``f`` where ``x \neq y``,
+and ``\theta`` such that for ``0 \le \theta \le 1`` , we have
+
+```math
+f(\theta x + (1 - \theta)y) < \theta f(x) + (1 - \theta) f(y) - 0.5 m \cdot \theta (1 - \theta) {\| x - y \|}_2^2
+```
+
+In a more familiar setting, if the loss function is
+differentiable we have
+
+```math
+\left( \nabla f(x) - \nabla f(y) \right)^\top (x - y) \ge m {\| x - y\|}_2^2
+```
+"""
+isstronglyconvex(::SupervisedLoss) = false
+
+@doc doc"""
+    isdifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool
+
+Return `true` if the given `loss` is differentiable
+(optionally limited to the given point `x` if specified).
+
+A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}^{m}`` is
+differentiable at a point ``x \in`` **int dom** ``f``, if there
+exists a matrix ``Df(x) \in \mathbb{R}^{m \times n}`` such that
+it satisfies:
+
+```math
+\lim_{z \neq x, z \to x} \frac{{\|f(z) - f(x) - Df(x)(z-x)\|}_2}{{\|z - x\|}_2} = 0
+```
+
+A function is differentiable if its domain is open and it is
+differentiable at every point ``x``.
+"""
+isdifferentiable(l::SupervisedLoss) = istwicedifferentiable(l)
+isdifferentiable(l::SupervisedLoss, at) = isdifferentiable(l)
+
+@doc doc"""
+    istwicedifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool
+
+Return `true` if the given `loss` is differentiable
+(optionally limited to the given point `x` if specified).
+
+A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}`` is
+said to be twice differentiable at a point ``x \in`` **int
+dom** ``f``, if the function derivative for ``\nabla f``
+exists at ``x``.
+
+```math
+\nabla^2 f(x) = D \nabla f(x)
+```
+
+A function is twice differentiable if its domain is open and it
+is twice differentiable at every point ``x``.
+"""
+istwicedifferentiable(::SupervisedLoss) = false
+istwicedifferentiable(l::SupervisedLoss, at) = istwicedifferentiable(l)
+
+@doc doc"""
+    islocallylipschitzcont(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` function is locally-Lipschitz
+continous.
+
+A supervised loss ``L : Y \times \mathbb{R} \rightarrow [0, \infty)``
+is called locally Lipschitz continuous if ``\forall a \ge 0``
+there exists a constant :math:`c_a \ge 0`, such that
+
+```math
+\sup_{y \in Y} \left| L(y,t) − L(y,t′) \right| \le c_a |t − t′|,  \qquad  t,t′ \in [−a,a]
+```
+
+Every convex function is locally lipschitz continuous.
+"""
+islocallylipschitzcont(l) = isconvex(l) || islipschitzcont(l)
+
+@doc doc"""
+    islipschitzcont(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` function is Lipschitz continuous.
+
+A supervised loss function ``L : Y \times \mathbb{R} \rightarrow
+[0, \infty)`` is Lipschitz continous, if there exists a finite
+constant ``M < \infty`` such that
+
+```math
+|L(y, t) - L(y, t′)| \le M |t - t′|,  \qquad  \forall (y, t) \in Y \times \mathbb{R}
+```
+"""
+islipschitzcont(::SupervisedLoss) = false
+
+@doc doc"""
+    isnemitski(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` denotes a Nemitski loss function.
+
+We call a supervised loss function ``L : Y \times \mathbb{R}
+\rightarrow [0,\infty)`` a Nemitski loss if there exist a
+measurable function ``b : Y \rightarrow [0, \infty)`` and an
+increasing function ``h : [0, \infty) \rightarrow [0, \infty)``
+such that
+
+```math
+L(y,\hat{y}) \le b(y) + h(|\hat{y}|),  \qquad  (y, \hat{y}) \in Y \times \mathbb{R}.
+```
+
+If a loss if locally lipsschitz continuous then it is a Nemitski loss
+"""
+isnemitski(l::SupervisedLoss) = islocallylipschitzcont(l)
+
+@doc doc"""
+    isclipable(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` function is clipable. A
+supervised loss ``L : Y \times \mathbb{R} \rightarrow [0,
+\infty)`` can be clipped at ``M > 0`` if, for all ``(y,t)
+\in Y \times \mathbb{R}``,
+
+```math
+L(y, \hat{t}) \le L(y, t)
+```
+
+where ``\hat{t}`` denotes the clipped value of ``t`` at
+``\pm M``. That is
+
+```math
+\hat{t} = \begin{cases} -M & \quad \text{if } t < -M \\ t & \quad \text{if } t \in [-M, M] \\ M & \quad \text{if } t > M \end{cases}
+```
+"""
+isclipable(::SupervisedLoss) = false
+
+@doc doc"""
+    isdistancebased(loss::SupervisedLoss) -> Bool
+
+Return `true` ifthe given `loss` is a distance-based loss.
+
+A supervised loss function ``L : Y \times \mathbb{R} \rightarrow
+[0, \infty)`` is said to be **distance-based**, if there exists a
+representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)``
+satisfying ``\psi (0) = 0`` and
+
+```math
+L(y, \hat{y}) = \psi (\hat{y} - y),  \qquad  (y, \hat{y}) \in Y \times \mathbb{R}
+```
+"""
+isdistancebased(::SupervisedLoss) = false
+
+@doc doc"""
+    ismarginbased(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` is a margin-based loss.
+
+A supervised loss function ``L : Y \times \mathbb{R} \rightarrow
+[0, \infty)`` is said to be **margin-based**, if there exists a
+representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)``
+satisfying
+
+```math
+L(y, \hat{y}) = \psi (y \cdot \hat{y}),  \qquad  (y, \hat{y}) \in Y \times \mathbb{R}
+```
+"""
+ismarginbased(::SupervisedLoss) = false
+
+@doc doc"""
+    isclasscalibrated(loss::SupervisedLoss) -> Bool
+"""
+isclasscalibrated(::SupervisedLoss) = false
+
+@doc doc"""
+    issymmetric(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given loss is a symmetric loss.
+
+A function ``f : \mathbb{R} \rightarrow [0,\infty)`` is said
+to be symmetric about origin if we have
+
+```math
+f(x) = f(-x), \qquad  \forall x \in \mathbb{R}
+```
+
+A distance-based loss is said to be symmetric if its representing
+function is symmetric.
+"""
+issymmetric(::SupervisedLoss) = false
+
+@doc doc"""
+    isminimizable(loss::SupervisedLoss) -> Bool
+
+Return `true` if the given `loss` is a minimizable loss.
+"""
+isminimizable(l) = isconvex(l)
diff --git a/src/other.jl b/src/other.jl
new file mode 100644
index 0000000..ec7e87a
--- /dev/null
+++ b/src/other.jl
@@ -0,0 +1,384 @@
+"Return the gradient of the learnable parameters w.r.t. some objective"
+function grad end
+"Do a backward pass, updating the gradients of learnable parameters and/or inputs"
+function grad! end
+
+"""
+Anything that takes an input and performs some kind
+of function to produce an output. For example a linear
+prediction function.
+"""
+abstract type Transformation end
+abstract type StochasticTransformation <: Transformation end
+abstract type Learnable <: Transformation end
+
+function transform end
+"Do a forward pass, and return the output"
+function transform! end
+
+"""
+Baseclass for any prediction model that can be minimized.
+This means that an object of a subclass contains all the
+information needed to compute its own current loss.
+"""
+abstract type Minimizable <: Learnable end
+
+function update end
+function update! end
+function learn end
+function learn! end
+
+#    getobs(data, [idx], [obsdim])
+#
+# Return the observations corresponding to the observation-index
+# `idx`. Note that `idx` can be of type `Int` or `AbstractVector`.
+# Both options must be supported by a custom type.
+#
+# The returned observation(s) should be in the form intended to
+# be passed as-is to some learning algorithm. There is no strict
+# interface requirement on how this "actual data" must look like.
+# Every author behind some custom data container can make this
+# decision him-/herself. We do, however, expect it to be consistent
+# for `idx` being an integer, as well as `idx` being an abstract
+# vector, respectively.
+#
+# If it makes sense for the type of `data`, `obsdim` can be used
+# to disptach on which dimension of `data` denotes the observations.
+# See `?ObsDim`
+#
+# This function is implemented in MLDataPattern
+function getobs end
+
+#    getobs!(buffer, data, [idx], [obsdim])
+#
+# Inplace version of `getobs(data, idx, obsdim)`. If this method
+# is defined for the type of `data`, then `buffer` should be used
+# to store the result, instead of allocating a dedicated object.
+#
+# Implementing this function is optional. In the case no such
+# method is provided for the type of `data`, then `buffer` will be
+# *ignored* and the result of `getobs` returned. This could be
+# because the type of `data` may not lend itself to the concept
+# of `copy!`. Thus supporting a custom `getobs!(::MyType, ...)`
+# is optional and not required.
+#
+# If it makes sense for the type of `data`, `obsdim` can be used
+# to disptach on which dimension of `data` denotes the observations.
+# See `?ObsDim`
+#
+# This function is implemented in MLDataPattern
+function getobs! end
+
+# --------------------------------------------------------------------
+
+#    gettarget([f], observation)
+#
+# Use `f` (if provided) to extract the target from the single
+# `observation` and return it. It is used internally by
+# `targets` (only if `f` is provided) and by
+# `eachtarget` (always) on each individual observation.
+#
+# Even though this function is not exported, it is intended to be
+# extended by users to support their custom data storage types.
+#
+# This function is implemented in MLDataPattern
+function gettarget end # not exported
+
+#    gettargets(data, [idx], [obsdim])
+#
+# Return the targets corresponding to the observation-index `idx`.
+# Note that `idx` can be of type `Int` or `AbstractVector`.
+#
+# Implementing this function for a custom type of `data` is
+# optional. It is particularly useful if the targets in `data` can
+# be provided without invoking `getobs`. For example if you have a
+# remote data-source where the labels are part of some metadata
+# that is locally available.
+#
+# If it makes sense for the type of `data`, `obsdim` can be used
+# to disptach on which dimension of `data` denotes the observations.
+# See `?ObsDim`
+#
+# This function is implemented in MLDataPattern
+function gettargets end # not exported
+
+#    targets([f], data, [obsdim])
+#
+# This function is implemented in MLDataPattern
+function targets end
+
+# --------------------------------------------------------------------
+
+"""
+    abstract DataView{TElem, TData} <: AbstractVector{TElem}
+
+Baseclass for all vector-like views of some data structure.
+This allow for example to see some design matrix as a vector of
+individual observation-vectors instead of one matrix.
+
+see `MLDataPattern.ObsView` and `MLDataPattern.BatchView` for examples.
+"""
+abstract type DataView{TElem, TData} <: AbstractVector{TElem} end
+
+"""
+    abstract AbstractObsView{TElem, TData} <: DataView{TElem, TData}
+
+Baseclass for all vector-like views of some data structure,
+that views it as some form or vector of observations.
+
+see `MLDataPattern.ObsView` for a concrete example.
+"""
+abstract type AbstractObsView{TElem, TData} <: DataView{TElem, TData} end
+
+"""
+    abstract AbstractBatchView{TElem, TData} <: DataView{TElem, TData}
+
+Baseclass for all vector-like views of some data structure,
+that views it as some form or vector of equally sized batches.
+
+see `MLDataPattern.BatchView` for a concrete example.
+"""
+abstract type AbstractBatchView{TElem, TData} <: DataView{TElem, TData} end
+
+# --------------------------------------------------------------------
+
+"""
+    abstract DataIterator{TElem,TData}
+
+Baseclass for all types that iterate over a `data` source
+in some manner. The total number of observations may or may
+not be known or defined and in general there is no contract that
+`getobs` or `nobs` has to be supported by the type of `data`.
+Furthermore, `length` should be used to query how many elements
+the iterator can provide, while `nobs` may return the underlying
+true amount of observations available (if known).
+
+see `MLDataPattern.RandomObs`, `MLDataPattern.RandomBatches`
+"""
+abstract type DataIterator{TElem,TData} end
+
+"""
+    abstract ObsIterator{TElem,TData} <: DataIterator{TElem,TData}
+
+Baseclass for all types that iterate over some data source
+one observation at a time.
+
+```julia
+using MLDataPattern
+@assert typeof(RandomObs(X)) <: ObsIterator
+
+for x in RandomObs(X)
+    # ...
+end
+```
+
+see `MLDataPattern.RandomObs`
+"""
+abstract type ObsIterator{TElem,TData} <: DataIterator{TElem,TData} end
+
+"""
+    abstract BatchIterator{TElem,TData} <: DataIterator{TElem,TData}
+
+Baseclass for all types that iterate over of some data source one
+batch at a time.
+
+```julia
+@assert typeof(RandomBatches(X, size=10)) <: BatchIterator
+
+for x in RandomBatches(X, size=10)
+    @assert nobs(x) == 10
+    # ...
+end
+```
+
+see `MLDataPattern.RandomBatches`
+"""
+abstract type BatchIterator{TElem,TData} <: DataIterator{TElem,TData} end
+
+# --------------------------------------------------------------------
+
+# just for dispatch for those who care to
+const AbstractDataIterator{E,T}  = Union{DataIterator{E,T}, DataView{E,T}}
+const AbstractObsIterator{E,T}   = Union{ObsIterator{E,T},  AbstractObsView{E,T}}
+const AbstractBatchIterator{E,T} = Union{BatchIterator{E,T},AbstractBatchView{E,T}}
+
+# --------------------------------------------------------------------
+
+#    datasubset(data, [idx], [obsdim])
+#
+# Return a lazy subset of the observations in `data` that correspond
+# to the given `idx`. No data should be copied except of the
+# indices. Note that `idx` can be of type `Int` or `AbstractVector`.
+# Both options must be supported by a custom type.
+#
+# If it makes sense for the type of `data`, `obsdim` can be used
+# to disptach on which dimension of `data` denotes the observations.
+# See `?ObsDim`
+#
+# This function is implemented in MLDataPattern
+function datasubset end
+
+"""
+    default_obsdim(data)
+
+The specify the default obsdim for a specific type of data.
+Defaults to `ObsDim.Undefined()`
+"""
+function default_obsdim end
+
+# just for dispatch for those who care to
+"see `?ObsDim`"
+abstract type ObsDimension end
+
+"""
+    module ObsDim
+
+Singleton types to define which dimension of some data structure
+(e.g. some `Array`) denotes the observations.
+
+- `ObsDim.First()`
+- `ObsDim.Last()`
+- `ObsDim.Constant(dim)`
+
+Used for efficient dispatching
+"""
+module ObsDim
+    using ..LearnBase: ObsDimension
+
+    """
+    Default value for most functions. Denotes that the concept of
+    an observation dimension is not defined for the given data.
+    """
+    struct Undefined <: ObsDimension end
+
+    """
+        ObsDim.Last <: ObsDimension
+
+    Defines that the last dimension denotes the observations
+    """
+    struct Last <: ObsDimension end
+
+    """
+        ObsDim.Constant{DIM} <: ObsDimension
+
+    Defines that the dimension `DIM` denotes the observations
+    """
+    struct Constant{DIM} <: ObsDimension end
+    Constant(dim::Int) = Constant{dim}()
+
+    """
+        ObsDim.First <: ObsDimension
+
+    Defines that the first dimension denotes the observations
+    """
+    const First = Constant{1}
+end
+
+Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
+Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim
+Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined()
+Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim)
+Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim)))
+Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims)
+function Base.convert(::Type{ObsDimension}, dim::Symbol)
+    if dim == :first || dim == :begin
+        ObsDim.First()
+    elseif dim == Symbol("end") || dim == :last
+        ObsDim.Last()
+    elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined
+        ObsDim.Undefined()
+    else
+        throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
+    end
+end
+
+default_obsdim(data) = ObsDim.Undefined()
+default_obsdim(A::AbstractArray) = ObsDim.Last()
+default_obsdim(tup::Tuple) = map(default_obsdim, tup)
+
+
+import Base: AbstractSet
+
+"A continuous range (inclusive) between a lo and a hi"
+struct IntervalSet{T} <: AbstractSet{T}
+    lo::T
+    hi::T
+end
+function IntervalSet(lo::A, hi::B) where {A,B}
+    T = promote_type(A,B)
+    IntervalSet{T}(convert(T,lo), convert(T,hi))
+end
+
+# numeric interval
+randtype(s::IntervalSet{T}) where T <: Number = Float64
+Base.rand(s::IntervalSet{T}, dims::Integer...) where T <: Number = rand(dims...) .* (s.hi - s.lo) .+ s.lo
+Base.in(x::Number, s::IntervalSet{T}) where T <: Number = s.lo <= x <= s.hi
+Base.length(s::IntervalSet{T}) where T <: Number = 1
+Base.:(==)(s1::IntervalSet{T}, s2::IntervalSet{T}) where T = s1.lo == s2.lo && s1.hi == s2.hi
+
+
+# vector of intervals
+randtype(s::IntervalSet{T}) where T <: AbstractVector = Vector{Float64}
+Base.rand(s::IntervalSet{T}) where T <: AbstractVector = Float64[rand() * (s.hi[i] - s.lo[i]) + s.lo[i] for i=1:length(s)]
+Base.in(x::AbstractVector, s::IntervalSet{T}) where T <: AbstractVector = all(i -> s.lo[i] <= x[i] <= s.hi[i], 1:length(s))
+Base.length(s::IntervalSet{T}) where T <: AbstractVector = length(s.lo)
+
+
+"Set of discrete items"
+struct DiscreteSet{T<:AbstractArray} <: AbstractSet{T}
+    items::T
+end
+randtype(s::DiscreteSet) = eltype(s.items)
+Base.rand(s::DiscreteSet, dims::Integer...) = rand(s.items, dims...)
+Base.in(x, s::DiscreteSet) = x in s.items
+Base.length(s::DiscreteSet) = length(s.items)
+Base.getindex(s::DiscreteSet, i::Int) = s.items[i]
+Base.:(==)(s1::DiscreteSet, s2::DiscreteSet) = s1.items == s2.items
+
+
+# operations on arrays of sets
+randtype(sets::AbstractArray{S,N}) where {S <: AbstractSet, N} = Array{promote_type(map(randtype, sets)...), N}
+Base.rand(sets::AbstractArray{S}) where S <: AbstractSet = eltype(randtype(sets))[rand(s) for s in sets]
+function Base.rand(sets::AbstractArray{S}, dim1::Integer, dims::Integer...) where S <: AbstractSet
+    A = Array{randtype(sets)}(undef, dim1, dims...)
+    for i in eachindex(A)
+        A[i] = rand(sets)
+    end
+    A
+end
+function Base.in(xs::AbstractArray, sets::AbstractArray{S}) where S <: AbstractSet
+    size(xs) == size(sets) && all(map(in, xs, sets))
+end
+
+
+"Groups several heterogenous sets. Used mainly for proper dispatch."
+struct TupleSet{T<:Tuple} <: AbstractSet{T}
+    sets::T
+end
+TupleSet(sets::AbstractSet...) = TupleSet(sets)
+
+# rand can return arrays or tuples, but defaults to arrays
+randtype(sets::TupleSet, ::Type{Vector}) = Vector{promote_type(map(randtype, sets.sets)...)}
+Base.rand(sets::TupleSet, ::Type{Vector}) = eltype(randtype(sets, Vector))[rand(s) for s in sets.sets]
+randtype(sets::TupleSet, ::Type{Tuple}) = Tuple{map(randtype, sets.sets)...}
+Base.rand(sets::TupleSet, ::Type{Tuple}) = map(rand, sets.sets)
+function Base.rand(sets::TupleSet, ::Type{OT}, dim1::Integer, dims::Integer...) where OT
+    A = Array{randtype(sets, OT)}(undef, dim1, dims...)
+    for i in eachindex(A)
+        A[i] = rand(sets, OT)
+    end
+    A
+end
+Base.length(sets::TupleSet) = sum(length(s) for s in sets.sets)
+Base.iterate(sets::TupleSet) = iterate(sets.sets)
+Base.iterate(sets::TupleSet, i) = iterate(sets.sets, i)
+
+randtype(sets::TupleSet) = randtype(sets, Vector)
+Base.rand(sets::TupleSet, dims::Integer...) = rand(sets, Vector, dims...)
+Base.in(x, sets::TupleSet) = all(map(in, x, sets.sets))
+
+"Returns an AbstractSet representing valid input values"
+function inputdomain end
+
+"Returns an AbstractSet representing valid output/target values"
+function targetdomain end
diff --git a/test/costs.jl b/test/costs.jl
new file mode 100644
index 0000000..cd94bad
--- /dev/null
+++ b/test/costs.jl
@@ -0,0 +1,44 @@
+@testset "Costs" begin
+    @test LearnBase.Cost <: Any
+    @test LearnBase.Penalty <: LearnBase.Cost
+    @test LearnBase.Loss <: LearnBase.Cost
+    @test LearnBase.UnsupervisedLoss <: LearnBase.Loss
+    @test LearnBase.SupervisedLoss <: LearnBase.Loss
+    @test LearnBase.UnarySupervisedLoss <: LearnBase.SupervisedLoss
+    @test LearnBase.MarginLoss <: LearnBase.UnarySupervisedLoss
+    @test LearnBase.DistanceLoss <: LearnBase.UnarySupervisedLoss
+
+    @test typeof(LearnBase.value) <: Function
+    @test typeof(LearnBase.value!) <: Function
+    @test typeof(LearnBase.deriv) <: Function
+    @test typeof(LearnBase.deriv!) <: Function
+    @test typeof(LearnBase.deriv2) <: Function
+    @test typeof(LearnBase.deriv2!) <: Function
+    @test typeof(LearnBase.value_deriv) <: Function
+    @test typeof(LearnBase.value_deriv!) <: Function
+
+    @test typeof(LearnBase.isminimizable) <: Function
+    @test typeof(LearnBase.isdifferentiable) <: Function
+    @test typeof(LearnBase.istwicedifferentiable) <: Function
+    @test typeof(LearnBase.isconvex) <: Function
+    @test typeof(LearnBase.isstrictlyconvex) <: Function
+    @test typeof(LearnBase.isstronglyconvex) <: Function
+    @test typeof(LearnBase.isnemitski) <: Function
+    @test typeof(LearnBase.islipschitzcont) <: Function
+    @test typeof(LearnBase.islocallylipschitzcont) <: Function
+    @test typeof(LearnBase.isclipable) <: Function
+    @test typeof(LearnBase.ismarginbased) <: Function
+    @test typeof(LearnBase.isclasscalibrated) <: Function
+    @test typeof(LearnBase.isdistancebased) <: Function
+    @test typeof(LearnBase.issymmetric) <: Function
+
+    # test fallback methods
+    struct MyStronglyConvexType end
+    LearnBase.isstronglyconvex(::MyStronglyConvexType) = true
+    LearnBase.islipschitzcont(::MyStronglyConvexType) = true
+    @test LearnBase.isstronglyconvex(MyStronglyConvexType())
+    @test LearnBase.isstrictlyconvex(MyStronglyConvexType())
+    @test LearnBase.isconvex(MyStronglyConvexType())
+    @test LearnBase.islipschitzcont(MyStronglyConvexType())
+    @test LearnBase.islocallylipschitzcont(MyStronglyConvexType())
+end
diff --git a/test/other.jl b/test/other.jl
new file mode 100644
index 0000000..7f3ff21
--- /dev/null
+++ b/test/other.jl
@@ -0,0 +1,235 @@
+@test LearnBase.Minimizable <: Any
+@test LearnBase.Transformation <: Any
+@test LearnBase.StochasticTransformation <: LearnBase.Transformation
+
+@test typeof(LearnBase.transform) <: Function
+@test typeof(LearnBase.transform!) <: Function
+@test typeof(LearnBase.getobs) <: Function
+@test typeof(LearnBase.getobs!) <: Function
+@test typeof(LearnBase.learn) <: Function
+@test typeof(LearnBase.learn!) <: Function
+@test typeof(LearnBase.update) <: Function
+@test typeof(LearnBase.update!) <: Function
+
+@test typeof(LearnBase.grad) <: Function
+@test typeof(LearnBase.grad!) <: Function
+
+@test typeof(LearnBase.datasubset) <: Function
+
+@test typeof(LearnBase.targets) <: Function
+@test typeof(LearnBase.gettarget) <: Function
+@test typeof(LearnBase.gettargets) <: Function
+
+@test LearnBase.DataView <: AbstractVector
+@test LearnBase.DataView <: LearnBase.AbstractDataIterator
+@test LearnBase.DataView{Int} <: AbstractVector{Int}
+@test LearnBase.DataView{Int,Vector{Int}} <: LearnBase.AbstractDataIterator{Int,Vector{Int}}
+@test LearnBase.AbstractObsView <: LearnBase.DataView
+@test LearnBase.AbstractObsView <: LearnBase.AbstractObsIterator
+@test LearnBase.AbstractObsView{Int,Vector{Int}} <: LearnBase.DataView{Int,Vector{Int}}
+@test LearnBase.AbstractObsView{Int,Vector{Int}} <: LearnBase.AbstractObsIterator{Int,Vector{Int}}
+@test LearnBase.AbstractBatchView <: LearnBase.DataView
+@test LearnBase.AbstractBatchView <: LearnBase.AbstractBatchIterator
+@test LearnBase.AbstractBatchView{Int,Vector{Int}} <: LearnBase.DataView{Int,Vector{Int}}
+@test LearnBase.AbstractBatchView{Int,Vector{Int}} <: LearnBase.AbstractBatchIterator{Int,Vector{Int}}
+
+@test LearnBase.DataIterator <: LearnBase.AbstractDataIterator
+@test LearnBase.DataIterator{Int,Vector{Int}} <: LearnBase.AbstractDataIterator{Int,Vector{Int}}
+@test LearnBase.ObsIterator <: LearnBase.DataIterator
+@test LearnBase.ObsIterator <: LearnBase.AbstractObsIterator
+@test LearnBase.ObsIterator{Int,Vector{Int}} <: LearnBase.DataIterator{Int,Vector{Int}}
+@test LearnBase.ObsIterator{Int,Vector{Int}} <: LearnBase.AbstractObsIterator{Int,Vector{Int}}
+@test LearnBase.BatchIterator <: LearnBase.DataIterator
+@test LearnBase.BatchIterator <: LearnBase.AbstractBatchIterator
+@test LearnBase.BatchIterator{Int,Vector{Int}} <: LearnBase.DataIterator{Int,Vector{Int}}
+@test LearnBase.BatchIterator{Int,Vector{Int}} <: LearnBase.AbstractBatchIterator{Int,Vector{Int}}
+
+@test LearnBase.ObsDim.Constant <: LearnBase.ObsDimension
+@test LearnBase.ObsDim.First <: LearnBase.ObsDimension
+@test LearnBase.ObsDim.Last <: LearnBase.ObsDimension
+@test LearnBase.ObsDim.Undefined <: LearnBase.ObsDimension
+@test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2}
+
+# IntervalSet
+let s = LearnBase.IntervalSet(-1,1)
+    @test typeof(s) == LearnBase.IntervalSet{Int}
+    @test typeof(s) <: AbstractSet
+    for x in (-1,0,0.5,1,1.0)
+        @test x in s
+    end
+    for x in (-1-1e-10, 1+1e-10, -Inf, Inf, 2, NaN)
+        @test !(x in s)
+    end
+    for i=1:10
+        x = rand(s)
+        @test typeof(x) == Float64
+        @test x in s
+    end
+    xs = rand(s, 10)
+    @test typeof(xs) == Vector{Float64}
+    for x in xs
+        @test typeof(x) == Float64
+        @test x in s
+    end
+    @test LearnBase.randtype(s) == Float64
+    # @show s LearnBase.randtype(s)
+end
+let s = LearnBase.IntervalSet(-1,1.0)
+    @test typeof(s) == LearnBase.IntervalSet{Float64}
+    @test typeof(s) <: AbstractSet
+    @test 1 in s
+    # @show s LearnBase.randtype(s)
+    @test length(s) == 1
+end
+
+# IntervalSet{Vector}
+let s = LearnBase.IntervalSet([-1.,0.], [1.,1.])
+    @test typeof(s) == LearnBase.IntervalSet{Vector{Float64}}
+    @test typeof(s) <: AbstractSet
+    @test LearnBase.randtype(s) == Vector{Float64}
+    @test typeof(rand(s)) == Vector{Float64}
+    @test rand(s) in s
+    @test [-1, 0] in s
+    @test !([-1.5,0] in s)
+    @test !([0,2] in s)
+    @test length(s) == 2
+end
+
+# DiscreteSet
+let s = LearnBase.DiscreteSet([-1,1])
+    @test typeof(s) == LearnBase.DiscreteSet{Vector{Int}}
+    @test typeof(s) <: AbstractSet
+    for x in (-1, 1, -1.0, 1.0)
+        @test x in s
+    end
+    for x in (0, Inf, -Inf, NaN)
+        @test !(x in s)
+    end
+    for i=1:10
+        x = rand(s)
+        @test typeof(x) == Int
+        @test x in s
+    end
+    xs = rand(s, 10)
+    @test typeof(xs) == Vector{Int}
+    for x in xs
+        @test typeof(x) == Int
+        @test x in s
+    end
+    @test LearnBase.randtype(s) == Int
+    @test length(s) == 2
+    @test s[1] == -1
+end
+let s = LearnBase.DiscreteSet([-1,1.0])
+    @test typeof(s) == LearnBase.DiscreteSet{Vector{Float64}}
+    @test typeof(s) <: AbstractSet
+    @test typeof(rand(s)) == Float64
+    @test typeof(rand(s, 2)) == Vector{Float64}
+end
+
+# TupleSet
+let s = LearnBase.TupleSet(LearnBase.IntervalSet(0,1), LearnBase.DiscreteSet([0,1]))
+    @test typeof(s) == LearnBase.TupleSet{Tuple{LearnBase.IntervalSet{Int}, LearnBase.DiscreteSet{Vector{Int}}}}
+    @test typeof(s) <: AbstractSet
+    for x in ([0,0], [0.0,0.0], [0.5,1.0])
+        @test x in s
+    end
+    for x in ([0,0.5], [-1,0])
+        @test !(x in s)
+    end
+    @test typeof(rand(s)) == Vector{Float64}
+    @test typeof(rand(s, 2)) == Vector{Vector{Float64}}
+    @test typeof(rand(s, Tuple)) == Tuple{Float64,Int}
+    @test typeof(rand(s, Tuple, 2)) == Vector{Tuple{Float64,Int}}
+    @test LearnBase.randtype(s) == Vector{Float64}
+
+    tot = 0
+    for (i,x) in enumerate(s)
+        @test x == s.sets[i]
+        tot += length(x)
+    end
+    @test length(s) == tot
+end
+
+# arrays of sets
+let s = [LearnBase.IntervalSet(0,1), LearnBase.DiscreteSet([0,1])]
+    @test typeof(s) == Vector{AbstractSet}
+    for x in ([0,0], [0.0,0.0], [0.5,1.0])
+        @test x in s
+    end
+    for x in ([0,0.5], [-1,0])
+        @test !(x in s)
+    end
+    @test typeof(rand(s)) == Vector{Float64}
+    @test typeof(rand(s, 2)) == Vector{Vector{Float64}}
+end
+
+@testset "obsdim typetree and Constructor" begin
+    @test_throws MethodError LearnBase.ObsDim.Constant(2.0)
+
+    @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDimension
+    @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.First
+    @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.Constant{1}
+
+    @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDimension
+    @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDim.Last
+
+    @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDimension
+    @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2}
+end
+
+@testset "obs_dim helper constructor" begin
+    @test_throws ArgumentError convert(LearnBase.ObsDimension, "test")
+    @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0)
+
+    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.First()
+    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.Constant(1)
+    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Last()))  === LearnBase.ObsDim.Last()
+    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Constant(2))) === LearnBase.ObsDim.Constant(2)
+
+    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1)
+    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6)
+    @test convert(LearnBase.ObsDimension, 1) === LearnBase.ObsDim.First()
+    @test convert(LearnBase.ObsDimension, 2) === LearnBase.ObsDim.Constant(2)
+    @test convert(LearnBase.ObsDimension, 6) === LearnBase.ObsDim.Constant(6)
+    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first)
+    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first")
+    @test convert(LearnBase.ObsDimension, (:first,:last))  === (LearnBase.ObsDim.First(),LearnBase.ObsDim.Last())
+    @test convert(LearnBase.ObsDimension, :first)  === LearnBase.ObsDim.First()
+    @test convert(LearnBase.ObsDimension, :begin)  === LearnBase.ObsDim.First()
+    @test convert(LearnBase.ObsDimension, "first") === LearnBase.ObsDim.First()
+    @test convert(LearnBase.ObsDimension, "BEGIN") === LearnBase.ObsDim.First()
+    @test convert(LearnBase.ObsDimension, :end)   === LearnBase.ObsDim.Last()
+    @test convert(LearnBase.ObsDimension, :last)  === LearnBase.ObsDim.Last()
+    @test convert(LearnBase.ObsDimension, "End")  === LearnBase.ObsDim.Last()
+    @test convert(LearnBase.ObsDimension, "LAST") === LearnBase.ObsDim.Last()
+    @test convert(LearnBase.ObsDimension, :nothing) === LearnBase.ObsDim.Undefined()
+    @test convert(LearnBase.ObsDimension, :none) === LearnBase.ObsDim.Undefined()
+    @test convert(LearnBase.ObsDimension, :na) === LearnBase.ObsDim.Undefined()
+    @test convert(LearnBase.ObsDimension, :null) === LearnBase.ObsDim.Undefined()
+    @test convert(LearnBase.ObsDimension, :undefined) === LearnBase.ObsDim.Undefined()
+    @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined()
+end
+
+struct SomeType end
+@testset "obsdim default values" begin
+    @testset "Arrays, SubArrays, and Sparse Arrays" begin
+        @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last()
+        @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === LearnBase.ObsDim.Last()
+        @test @inferred(LearnBase.default_obsdim(rand(10,5))) === LearnBase.ObsDim.Last()
+        @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === LearnBase.ObsDim.Last()
+        @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === LearnBase.ObsDim.Last()
+        @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === LearnBase.ObsDim.Last()
+    end
+
+    @testset "Types with no specified default" begin
+        @test @inferred(LearnBase.default_obsdim(SomeType())) === LearnBase.ObsDim.Undefined()
+    end
+
+    @testset "Tuples" begin
+        @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Undefined())
+        @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Last())
+        @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Undefined())
+        @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Last())
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 2604655..b890291 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,310 +2,5 @@ using LearnBase
 using SparseArrays
 using Test
 
-# Test if types are exported properly
-@test Cost <: Any
-@test Penalty <: Cost
-@test Loss <: Cost
-@test UnsupervisedLoss <: Loss
-@test SupervisedLoss <: Loss
-@test MarginLoss <: SupervisedLoss
-@test DistanceLoss <: SupervisedLoss
-
-@test Minimizable <: Any
-@test Transformation <: Any
-@test StochasticTransformation <: Transformation
-
-# Test if functions are exported properly
-@test typeof(transform) <: Function
-@test typeof(transform!) <: Function
-@test typeof(getobs) <: Function
-@test typeof(getobs!) <: Function
-@test typeof(learn) <: Function
-@test typeof(learn!) <: Function
-@test typeof(update) <: Function
-@test typeof(update!) <: Function
-
-@test typeof(scaled) <: Function
-
-@test typeof(value) <: Function
-@test typeof(value!) <: Function
-@test typeof(meanvalue) <: Function
-@test typeof(sumvalue) <: Function
-@test typeof(meanderiv) <: Function
-@test typeof(sumderiv) <: Function
-@test typeof(deriv) <: Function
-@test typeof(deriv2) <: Function
-@test typeof(deriv!) <: Function
-@test typeof(value_deriv) <: Function
-@test typeof(grad) <: Function
-@test typeof(grad!) <: Function
-@test typeof(addgrad!) <: Function
-@test typeof(value_grad) <: Function
-@test typeof(value_grad!) <: Function
-@test typeof(prox) <: Function
-@test typeof(prox!) <: Function
-
-@test typeof(isminimizable) <: Function
-@test typeof(isdifferentiable) <: Function
-@test typeof(istwicedifferentiable) <: Function
-@test typeof(isconvex) <: Function
-@test typeof(isstrictlyconvex) <: Function
-@test typeof(isstronglyconvex) <: Function
-@test typeof(isnemitski) <: Function
-@test typeof(isunivfishercons) <: Function
-@test typeof(isfishercons) <: Function
-@test typeof(islipschitzcont) <: Function
-@test typeof(islocallylipschitzcont) <: Function
-@test typeof(islipschitzcont_deriv) <: Function
-@test typeof(isclipable) <: Function
-@test typeof(ismarginbased) <: Function
-@test typeof(isclasscalibrated) <: Function
-@test typeof(isdistancebased) <: Function
-
-@test typeof(issymmetric) <: Function
-
-@test typeof(getobs) <: Function
-@test typeof(getobs!) <: Function
-@test typeof(datasubset) <: Function
-
-@test typeof(targets) <: Function
-@test typeof(LearnBase.gettarget) <: Function
-@test typeof(LearnBase.gettargets) <: Function
-
-@test DataView <: AbstractVector
-@test DataView <: AbstractDataIterator
-@test DataView{Int} <: AbstractVector{Int}
-@test DataView{Int,Vector{Int}} <: AbstractDataIterator{Int,Vector{Int}}
-@test AbstractObsView <: DataView
-@test AbstractObsView <: AbstractObsIterator
-@test AbstractObsView{Int,Vector{Int}} <: DataView{Int,Vector{Int}}
-@test AbstractObsView{Int,Vector{Int}} <: AbstractObsIterator{Int,Vector{Int}}
-@test AbstractBatchView <: DataView
-@test AbstractBatchView <: AbstractBatchIterator
-@test AbstractBatchView{Int,Vector{Int}} <: DataView{Int,Vector{Int}}
-@test AbstractBatchView{Int,Vector{Int}} <: AbstractBatchIterator{Int,Vector{Int}}
-
-@test DataIterator <: AbstractDataIterator
-@test DataIterator{Int,Vector{Int}} <: AbstractDataIterator{Int,Vector{Int}}
-@test ObsIterator <: DataIterator
-@test ObsIterator <: AbstractObsIterator
-@test ObsIterator{Int,Vector{Int}} <: DataIterator{Int,Vector{Int}}
-@test ObsIterator{Int,Vector{Int}} <: AbstractObsIterator{Int,Vector{Int}}
-@test BatchIterator <: DataIterator
-@test BatchIterator <: AbstractBatchIterator
-@test BatchIterator{Int,Vector{Int}} <: DataIterator{Int,Vector{Int}}
-@test BatchIterator{Int,Vector{Int}} <: AbstractBatchIterator{Int,Vector{Int}}
-
-@test typeof(fit) <: Function
-@test typeof(fit!) <: Function
-@test typeof(nobs) <: Function
-
-@test ObsDim.Constant <: LearnBase.ObsDimension
-@test ObsDim.First <: LearnBase.ObsDimension
-@test ObsDim.Last <: LearnBase.ObsDimension
-@test ObsDim.Undefined <: LearnBase.ObsDimension
-@test typeof(ObsDim.Constant(2)) <: ObsDim.Constant{2}
-
-# Test that LearnBase reuses StatsBase functions
-using StatsBase
-@test StatsBase.fit  == LearnBase.fit
-@test StatsBase.fit! == LearnBase.fit!
-@test StatsBase.nobs == LearnBase.nobs
-
-# Test superset fallbacks
-struct MyStronglyConvexType end
-LearnBase.isstronglyconvex(::MyStronglyConvexType) = true
-LearnBase.islipschitzcont(::MyStronglyConvexType) = true
-@test isstronglyconvex(MyStronglyConvexType())
-@test isstrictlyconvex(MyStronglyConvexType())
-@test isconvex(MyStronglyConvexType())
-@test islipschitzcont(MyStronglyConvexType())
-@test islocallylipschitzcont(MyStronglyConvexType())
-
-# IntervalSet
-let s = IntervalSet(-1,1)
-    @test typeof(s) == IntervalSet{Int}
-    @test typeof(s) <: AbstractSet
-    for x in (-1,0,0.5,1,1.0)
-        @test x in s
-    end
-    for x in (-1-1e-10, 1+1e-10, -Inf, Inf, 2, NaN)
-        @test !(x in s)
-    end
-    for i=1:10
-        x = rand(s)
-        @test typeof(x) == Float64
-        @test x in s
-    end
-    xs = rand(s, 10)
-    @test typeof(xs) == Vector{Float64}
-    for x in xs
-        @test typeof(x) == Float64
-        @test x in s
-    end
-    @test LearnBase.randtype(s) == Float64
-    # @show s LearnBase.randtype(s)
-end
-let s = IntervalSet(-1,1.0)
-    @test typeof(s) == IntervalSet{Float64}
-    @test typeof(s) <: AbstractSet
-    @test 1 in s
-    # @show s LearnBase.randtype(s)
-    @test length(s) == 1
-end
-
-# IntervalSet{Vector}
-let s = IntervalSet([-1.,0.], [1.,1.])
-    @test typeof(s) == IntervalSet{Vector{Float64}}
-    @test typeof(s) <: AbstractSet
-    @test LearnBase.randtype(s) == Vector{Float64}
-    @test typeof(rand(s)) == Vector{Float64}
-    @test rand(s) in s
-    @test [-1, 0] in s
-    @test !([-1.5,0] in s)
-    @test !([0,2] in s)
-    @test length(s) == 2
-end
-
-# DiscreteSet
-let s = DiscreteSet([-1,1])
-    @test typeof(s) == DiscreteSet{Vector{Int}}
-    @test typeof(s) <: AbstractSet
-    for x in (-1, 1, -1.0, 1.0)
-        @test x in s
-    end
-    for x in (0, Inf, -Inf, NaN)
-        @test !(x in s)
-    end
-    for i=1:10
-        x = rand(s)
-        @test typeof(x) == Int
-        @test x in s
-    end
-    xs = rand(s, 10)
-    @test typeof(xs) == Vector{Int}
-    for x in xs
-        @test typeof(x) == Int
-        @test x in s
-    end
-    @test LearnBase.randtype(s) == Int
-    @test length(s) == 2
-    @test s[1] == -1
-    # @show s LearnBase.randtype(s)
-end
-let s = DiscreteSet([-1,1.0])
-    @test typeof(s) == DiscreteSet{Vector{Float64}}
-    @test typeof(s) <: AbstractSet
-    @test typeof(rand(s)) == Float64
-    @test typeof(rand(s, 2)) == Vector{Float64}
-    # @show s LearnBase.randtype(s)
-end
-
-# TupleSet
-let s = TupleSet(IntervalSet(0,1), DiscreteSet([0,1]))
-    @test typeof(s) == TupleSet{Tuple{IntervalSet{Int}, DiscreteSet{Vector{Int}}}}
-    @test typeof(s) <: AbstractSet
-    for x in ([0,0], [0.0,0.0], [0.5,1.0])
-        @test x in s
-    end
-    for x in ([0,0.5], [-1,0])
-        @test !(x in s)
-    end
-    @test typeof(rand(s)) == Vector{Float64}
-    @test typeof(rand(s, 2)) == Vector{Vector{Float64}}
-    @test typeof(rand(s, Tuple)) == Tuple{Float64,Int}
-    @test typeof(rand(s, Tuple, 2)) == Vector{Tuple{Float64,Int}}
-    @test LearnBase.randtype(s) == Vector{Float64}
-    # @show s LearnBase.randtype(s)
-
-    tot = 0
-    for (i,x) in enumerate(s)
-        @test x == s.sets[i]
-        tot += length(x)
-    end
-    @test length(s) == tot
-end
-
-# arrays of sets
-let s = [IntervalSet(0,1), DiscreteSet([0,1])]
-    @test typeof(s) == Vector{AbstractSet}
-    for x in ([0,0], [0.0,0.0], [0.5,1.0])
-        @test x in s
-    end
-    for x in ([0,0.5], [-1,0])
-        @test !(x in s)
-    end
-    @test typeof(rand(s)) == Vector{Float64}
-    @test typeof(rand(s, 2)) == Vector{Vector{Float64}}
-    # @show s LearnBase.randtype(s)
-end
-
-@testset "obsdim typetree and Constructor" begin
-    @test_throws MethodError ObsDim.Constant(2.0)
-
-    @test typeof(ObsDim.First()) <: LearnBase.ObsDimension
-    @test typeof(ObsDim.First()) <: ObsDim.First
-    @test typeof(ObsDim.First()) <: ObsDim.Constant{1}
-
-    @test typeof(ObsDim.Last()) <: LearnBase.ObsDimension
-    @test typeof(ObsDim.Last()) <: ObsDim.Last
-
-    @test typeof(ObsDim.Constant(2)) <: LearnBase.ObsDimension
-    @test typeof(ObsDim.Constant(2)) <: ObsDim.Constant{2}
-end
-
-@testset "obs_dim helper constructor" begin
-    @test_throws ArgumentError convert(LearnBase.ObsDimension, "test")
-    @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0)
-
-    @test @inferred(convert(LearnBase.ObsDimension, ObsDim.First())) === ObsDim.First()
-    @test @inferred(convert(LearnBase.ObsDimension, ObsDim.First())) === ObsDim.Constant(1)
-    @test @inferred(convert(LearnBase.ObsDimension, ObsDim.Last()))  === ObsDim.Last()
-    @test @inferred(convert(LearnBase.ObsDimension, ObsDim.Constant(2))) === ObsDim.Constant(2)
-
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1)
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6)
-    @test convert(LearnBase.ObsDimension, 1) === ObsDim.First()
-    @test convert(LearnBase.ObsDimension, 2) === ObsDim.Constant(2)
-    @test convert(LearnBase.ObsDimension, 6) === ObsDim.Constant(6)
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first)
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first")
-    @test convert(LearnBase.ObsDimension, (:first,:last))  === (ObsDim.First(),ObsDim.Last())
-    @test convert(LearnBase.ObsDimension, :first)  === ObsDim.First()
-    @test convert(LearnBase.ObsDimension, :begin)  === ObsDim.First()
-    @test convert(LearnBase.ObsDimension, "first") === ObsDim.First()
-    @test convert(LearnBase.ObsDimension, "BEGIN") === ObsDim.First()
-    @test convert(LearnBase.ObsDimension, :end)   === ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, :last)  === ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, "End")  === ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, "LAST") === ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, :nothing) === ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :none) === ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :na) === ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :null) === ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :undefined) === ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, nothing) === ObsDim.Undefined()
-end
-
-struct SomeType end
-@testset "obsdim default values" begin
-    @testset "Arrays, SubArrays, and Sparse Arrays" begin
-        @test @inferred(LearnBase.default_obsdim(rand(10))) === ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(rand(10,5))) === ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === ObsDim.Last()
-    end
-
-    @testset "Types with no specified default" begin
-        @test @inferred(LearnBase.default_obsdim(SomeType())) === ObsDim.Undefined()
-    end
-
-    @testset "Tuples" begin
-        @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (ObsDim.Undefined(), ObsDim.Undefined())
-        @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (ObsDim.Undefined(), ObsDim.Last())
-        @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (ObsDim.Last(), ObsDim.Undefined())
-        @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (ObsDim.Last(), ObsDim.Last())
-    end
-end
+include("costs.jl")
+include("other.jl")

From dece2909a5ef04c71351d0f57b14698eb981a465 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 13:06:31 -0300
Subject: [PATCH 02/15] Add docstrings for value function in costs.jl

---
 src/costs.jl | 95 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 84 insertions(+), 11 deletions(-)

diff --git a/src/costs.jl b/src/costs.jl
index 525b4e3..37bf311 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -21,16 +21,16 @@ abstract type SupervisedLoss <: Loss end
 
 @doc doc"""
 A supervised loss is considered **unary** if it can be written as a composition
-`L(Ψ(output, target))` for some binary function `Ψ`. In this case the loss can
-be evaluated with a single argument termed the **agreement** `Ψ(output, target)`.
-Notable examples for unary supervised losses are distance-based (`Ψ(ŷ,y) = ŷ - y`)
-and margin-based (`Ψ(ŷ,y) = ŷ*y`) losses.
+`L(Ψ(target, output))` for some binary function `Ψ`. In this case the loss can
+be evaluated with a single argument termed the **agreement** `Ψ(target, output)`.
+Notable examples for unary supervised losses are distance-based (`Ψ(y,ŷ) = ŷ - y`)
+and margin-based (`Ψ(y,ŷ) = ŷ*y`) losses.
 """
 abstract type UnarySupervisedLoss <: SupervisedLoss end
 
 @doc doc"""
 A supervised loss that can be simplified to
-`L(targets, outputs) = L(targets - outputs)` is considered
+`L(targets, outputs) = L(outputs - targets)` is considered
 **distance-based**.
 """
 abstract type DistanceLoss <: UnarySupervisedLoss end
@@ -55,16 +55,89 @@ Baseclass for all penalties.
 """
 abstract type Penalty <: Cost end
 
-function value end
-function value! end
+@doc doc"""
+    value(loss, target, output)
 
-function deriv end
-function deriv! end
+Compute the (non-negative) numeric result for the `loss` function
+return it. Note that `target` and `output` can be of different
+numeric type, in which case promotion is performed in the manner
+appropriate for the given loss.
 
-function deriv2 end
-function deriv2! end
+```math
+L : Y \times \mathbb{R} \rightarrow [0,\infty)
+```
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss-function ``L`` we want to
+  compute the value with.
+- `target::Number`: The ground truth ``y \in Y`` of the observation.
+- `output::Number`: The predicted output ``\hat{y} \in \mathbb{R}``
+  for the observation.
+"""
+value(loss::SupervisedLoss, target::Number, output::Number) =
+    MethodError(value, (loss, target, output))
 
+"""
+    value(loss, targets, outputs, aggmode) -> Number
+
+Compute the weighted or unweighted sum or mean (depending on
+aggregation mode `aggmode`) of the individual values of the `loss`
+function for each pair in `targets` and `outputs`. This method
+will not allocate a temporary array.
+
+In the case that the two parameters are arrays with a different
+number of dimensions, broadcast will be performed. Note that the
+given parameters are expected to have the same size in the
+dimensions they share.
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss-function ``L`` we are working with.
+- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``.
+- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``.
+- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref),
+  [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or
+  [`AggMode.WeightedMean`](@ref).
+"""
+value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) =
+    MethodError(value, (loss, targets, outputs, aggmode))
+
+"""
+    value(loss, targets, outputs, aggmode, obsdim) -> AbstractVector
+
+Compute the values of the `loss` function for each pair in
+`targets` and `outputs` individually, and return either the
+weighted or unweighted sum or mean for each observation (depending on
+`aggmode`). This method will not allocate a temporary array, but
+it will allocate the resulting vector.
+
+Both arrays have to be of the same shape and size. Furthermore
+they have to have at least two array dimensions (i.e. they must
+not be vectors).
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss-function ``L`` we are working with.
+- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``.
+- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``.
+- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref),
+  [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or
+  [`AggMode.WeightedMean`](@ref).
+- `obsdim::ObsDimension`: Specifies which of the array dimensions
+  denotes the observations. see `?ObsDim` for more information.
+"""
+value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
+      aggmode::AggregateMode, obsdim::ObsDimension) =
+    MethodError(value, (loss, targets, outputs, aggmode, obsdim))
+    
+function deriv end
+function deriv2 end
 function value_deriv end
+
+function value! end
+function deriv! end
+function deriv2! end
 function value_deriv! end
 
 @doc doc"""

From 5caa914f9ce5cd72f6d879aa5b14375d52b3cc83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 13:15:52 -0300
Subject: [PATCH 03/15] Add docstring for value! function in costs.jl

---
 src/costs.jl | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/costs.jl b/src/costs.jl
index 37bf311..a46945e 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -78,7 +78,7 @@ L : Y \times \mathbb{R} \rightarrow [0,\infty)
 value(loss::SupervisedLoss, target::Number, output::Number) =
     MethodError(value, (loss, target, output))
 
-"""
+@doc doc"""
     value(loss, targets, outputs, aggmode) -> Number
 
 Compute the weighted or unweighted sum or mean (depending on
@@ -103,7 +103,7 @@ dimensions they share.
 value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) =
     MethodError(value, (loss, targets, outputs, aggmode))
 
-"""
+@doc doc"""
     value(loss, targets, outputs, aggmode, obsdim) -> AbstractVector
 
 Compute the values of the `loss` function for each pair in
@@ -130,7 +130,37 @@ not be vectors).
 value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
       aggmode::AggregateMode, obsdim::ObsDimension) =
     MethodError(value, (loss, targets, outputs, aggmode, obsdim))
-    
+
+@doc doc"""
+    value!(buffer, loss, targets, outputs, aggmode, obsdim) -> buffer
+
+Compute the values of the `loss` function for each pair in
+`targets` and `outputs` individually, and return either the
+weighted or unweighted sum or mean for each observation,
+depending on `aggmode`. The results are stored into the given
+vector `buffer`. This method will not allocate a temporary array.
+
+Both arrays have to be of the same shape and size. Furthermore
+they have to have at least two array dimensions (i.e. so they
+must not be vectors).
+
+# Arguments
+
+- `buffer::AbstractArray`: Array to store the computed values in.
+  Old values will be overwritten and lost.
+- `loss::SupervisedLoss`: The loss-function ``L`` we are working with.
+- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``.
+- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``.
+- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref),
+  [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or
+  [`AggMode.WeightedMean`](@ref).
+- `obsdim::ObsDimension`: Specifies which of the array dimensions
+  denotes the observations. see `?ObsDim` for more information.
+"""
+value!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
+       aggmode::AggregateMode, obsdim::ObsDimension) =
+    MethodError(value!, (buffer, loss, targets, outputs, aggmode, obsdim))
+
 function deriv end
 function deriv2 end
 function value_deriv end

From 9655883547691b9468e093cdff1c4b49b4f35988 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 13:51:51 -0300
Subject: [PATCH 04/15] Add aggmode.jl and obsdim.jl source files

---
 src/LearnBase.jl |   6 +++
 src/aggmode.jl   | 111 +++++++++++++++++++++++++++++++++++++++++++++++
 src/costs.jl     |   1 -
 src/obsdim.jl    |  90 ++++++++++++++++++++++++++++++++++++++
 src/other.jl     |  93 ---------------------------------------
 5 files changed, 207 insertions(+), 94 deletions(-)
 create mode 100644 src/aggmode.jl
 create mode 100644 src/obsdim.jl

diff --git a/src/LearnBase.jl b/src/LearnBase.jl
index 867db18..1113049 100644
--- a/src/LearnBase.jl
+++ b/src/LearnBase.jl
@@ -2,6 +2,12 @@ module LearnBase
 
 using Markdown # for docstrings
 
+# AGGREGATION MODES
+include("aggmode.jl")
+
+# OBSERVATION DIMENSIONS
+include("obsdim.jl")
+
 # LEARNING COSTS (e.g. loss & penalty)
 include("costs.jl")
 
diff --git a/src/aggmode.jl b/src/aggmode.jl
new file mode 100644
index 0000000..1e78b43
--- /dev/null
+++ b/src/aggmode.jl
@@ -0,0 +1,111 @@
+"""
+Baseclass for all aggregation modes.
+"""
+abstract type AggregateMode end
+
+"""
+    module AggMode
+
+Types for aggregation of multiple observations.
+
+- `AggMode.None()`
+- `AggMode.Sum()`
+- `AggMode.Mean()`
+- `AggMode.WeightedSum(weights)`
+- `AggMode.WeightedMean(weights)`
+"""
+module AggMode
+    using ..LearnBase: AggregateMode
+
+    """
+        AggMode.None()
+
+    Opt-out of aggregation. This is usually the default value.
+    Using `None` will cause the element-wise results to be returned.
+    """
+    struct None <: AggregateMode end
+
+    """
+        AggMode.Sum()
+
+    Causes the method to return the unweighted sum of the
+    elements instead of the individual elements. Can be used in
+    combination with `ObsDim`, in which case a vector will be
+    returned containing the sum for each observation (useful
+    mainly for multivariable regression).
+    """
+    struct Sum <: AggregateMode end
+
+    """
+        AggMode.Mean()
+
+    Causes the method to return the unweighted mean of the
+    elements instead of the individual elements. Can be used in
+    combination with `ObsDim`, in which case a vector will be
+    returned containing the mean for each observation (useful
+    mainly for multivariable regression).
+    """
+    struct Mean <: AggregateMode end
+
+    """
+        AggMode.WeightedSum(weights; [normalize = false])
+
+    Causes the method to return the weighted sum of all
+    observations. The variable `weights` has to be a vector of
+    the same length as the number of observations.
+    If `normalize = true`, the values of the weight vector will
+    be normalized in such as way that they sum to one.
+
+    # Arguments
+
+    - `weights::AbstractVector`: Vector of weight values that
+      can be used to give certain observations a stronger
+      influence on the sum.
+
+    - `normalize::Bool`: Boolean that specifies if the weight
+      vector should be transformed in such a way that it sums to
+      one (i.e. normalized). This will not mutate the weight
+      vector but instead happen on the fly during the
+      accumulation.
+
+      Defaults to `false`. Setting it to `true` only really
+      makes sense in multivalue-regression, otherwise the result
+      will be the same as for [`WeightedMean`](@ref).
+    """
+    struct WeightedSum{W<:AbstractVector} <: AggregateMode
+        weights::W
+        normalize::Bool
+    end
+    WeightedSum(weights::AbstractVector; normalize::Bool = false) = WeightedSum(weights, normalize)
+
+    """
+        AggMode.WeightedMean(weights; [normalize = true])
+    
+    Causes the method to return the weighted mean of all
+    observations. The variable `weights` has to be a vector of
+    the same length as the number of observations.
+    If `normalize = true`, the values of the weight vector will
+    be normalized in such as way that they sum to one.
+
+    # Arguments
+
+    - `weights::AbstractVector`: Vector of weight values that can
+      be used to give certain observations a stronger influence
+      on the mean.
+
+    - `normalize::Bool`: Boolean that specifies if the weight
+      vector should be transformed in such a way that it sums to
+      one (i.e. normalized). This will not mutate the weight
+      vector but instead happen on the fly during the
+      accumulation.
+
+      Defaults to `true`. Setting it to `false` only really makes
+      sense in multivalue-regression, otherwise the result will
+      be the same as for [`WeightedSum`](@ref).
+    """
+    struct WeightedMean{W<:AbstractVector} <: AggregateMode
+        weights::W
+        normalize::Bool
+    end
+    WeightedMean(weights::AbstractVector; normalize::Bool = true) = WeightedMean(weights, normalize)
+end
diff --git a/src/costs.jl b/src/costs.jl
index a46945e..22fbeaf 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -165,7 +165,6 @@ function deriv end
 function deriv2 end
 function value_deriv end
 
-function value! end
 function deriv! end
 function deriv2! end
 function value_deriv! end
diff --git a/src/obsdim.jl b/src/obsdim.jl
new file mode 100644
index 0000000..c1a580d
--- /dev/null
+++ b/src/obsdim.jl
@@ -0,0 +1,90 @@
+"""
+Baseclass for all observation dimensions.
+"""
+abstract type ObsDimension end
+
+"""
+    module ObsDim
+
+Singleton types to define which dimension of some data structure
+(e.g. some `Array`) denotes the observations.
+
+- `ObsDim.First()`
+- `ObsDim.Last()`
+- `ObsDim.Constant(dim)`
+
+Used for efficient dispatching
+"""
+module ObsDim
+    using ..LearnBase: ObsDimension
+
+    """
+    Default value for most functions. Denotes that the concept of
+    an observation dimension is not defined for the given data.
+    """
+    struct Undefined <: ObsDimension end
+
+    """
+        ObsDim.Last <: ObsDimension
+
+    Defines that the last dimension denotes the observations
+    """
+    struct Last <: ObsDimension end
+
+    """
+        ObsDim.Constant{DIM} <: ObsDimension
+
+    Defines that the dimension `DIM` denotes the observations
+    """
+    struct Constant{DIM} <: ObsDimension end
+    Constant(dim::Int) = Constant{dim}()
+
+    """
+        ObsDim.First <: ObsDimension
+
+    Defines that the first dimension denotes the observations
+    """
+    const First = Constant{1}
+end
+
+Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
+Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim
+Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined()
+Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim)
+Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim)))
+Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims)
+function Base.convert(::Type{ObsDimension}, dim::Symbol)
+    if dim == :first || dim == :begin
+        ObsDim.First()
+    elseif dim == Symbol("end") || dim == :last
+        ObsDim.Last()
+    elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined
+        ObsDim.Undefined()
+    else
+        throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
+    end
+end
+
+"""
+    default_obsdim(data)
+
+The specify the default obsdim for a specific type of data.
+Defaults to `ObsDim.Undefined()`
+"""
+default_obsdim(data) = ObsDim.Undefined()
+default_obsdim(A::AbstractArray) = ObsDim.Last()
+default_obsdim(tup::Tuple) = map(default_obsdim, tup)
+
+"""
+     datasubset(data, [idx], [obsdim])
+
+Return a lazy subset of the observations in `data` that correspond
+to the given `idx`. No data should be copied except of the
+indices. Note that `idx` can be of type `Int` or `AbstractVector`.
+Both options must be supported by a custom type.
+
+If it makes sense for the type of `data`, `obsdim` can be used
+to disptach on which dimension of `data` denotes the observations.
+See `?ObsDim`.
+"""
+function datasubset end
diff --git a/src/other.jl b/src/other.jl
index ec7e87a..cd7b914 100644
--- a/src/other.jl
+++ b/src/other.jl
@@ -204,99 +204,6 @@ const AbstractBatchIterator{E,T} = Union{BatchIterator{E,T},AbstractBatchView{E,
 
 # --------------------------------------------------------------------
 
-#    datasubset(data, [idx], [obsdim])
-#
-# Return a lazy subset of the observations in `data` that correspond
-# to the given `idx`. No data should be copied except of the
-# indices. Note that `idx` can be of type `Int` or `AbstractVector`.
-# Both options must be supported by a custom type.
-#
-# If it makes sense for the type of `data`, `obsdim` can be used
-# to disptach on which dimension of `data` denotes the observations.
-# See `?ObsDim`
-#
-# This function is implemented in MLDataPattern
-function datasubset end
-
-"""
-    default_obsdim(data)
-
-The specify the default obsdim for a specific type of data.
-Defaults to `ObsDim.Undefined()`
-"""
-function default_obsdim end
-
-# just for dispatch for those who care to
-"see `?ObsDim`"
-abstract type ObsDimension end
-
-"""
-    module ObsDim
-
-Singleton types to define which dimension of some data structure
-(e.g. some `Array`) denotes the observations.
-
-- `ObsDim.First()`
-- `ObsDim.Last()`
-- `ObsDim.Constant(dim)`
-
-Used for efficient dispatching
-"""
-module ObsDim
-    using ..LearnBase: ObsDimension
-
-    """
-    Default value for most functions. Denotes that the concept of
-    an observation dimension is not defined for the given data.
-    """
-    struct Undefined <: ObsDimension end
-
-    """
-        ObsDim.Last <: ObsDimension
-
-    Defines that the last dimension denotes the observations
-    """
-    struct Last <: ObsDimension end
-
-    """
-        ObsDim.Constant{DIM} <: ObsDimension
-
-    Defines that the dimension `DIM` denotes the observations
-    """
-    struct Constant{DIM} <: ObsDimension end
-    Constant(dim::Int) = Constant{dim}()
-
-    """
-        ObsDim.First <: ObsDimension
-
-    Defines that the first dimension denotes the observations
-    """
-    const First = Constant{1}
-end
-
-Base.convert(::Type{ObsDimension}, dim) = throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
-Base.convert(::Type{ObsDimension}, dim::ObsDimension) = dim
-Base.convert(::Type{ObsDimension}, ::Nothing) = ObsDim.Undefined()
-Base.convert(::Type{ObsDimension}, dim::Int) = ObsDim.Constant(dim)
-Base.convert(::Type{ObsDimension}, dim::String) = convert(ObsDimension, Symbol(lowercase(dim)))
-Base.convert(::Type{ObsDimension}, dims::Tuple) = map(d->convert(ObsDimension, d), dims)
-function Base.convert(::Type{ObsDimension}, dim::Symbol)
-    if dim == :first || dim == :begin
-        ObsDim.First()
-    elseif dim == Symbol("end") || dim == :last
-        ObsDim.Last()
-    elseif dim == Symbol("nothing") || dim == :none || dim == :null || dim == :na || dim == :undefined
-        ObsDim.Undefined()
-    else
-        throw(ArgumentError("Unknown way to specify a obsdim: $dim"))
-    end
-end
-
-default_obsdim(data) = ObsDim.Undefined()
-default_obsdim(A::AbstractArray) = ObsDim.Last()
-default_obsdim(tup::Tuple) = map(default_obsdim, tup)
-
-
 import Base: AbstractSet
 
 "A continuous range (inclusive) between a lo and a hi"

From d7b34be3b1ac9f13d9dbaf5f9147ec9f80fa96d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 14:56:33 -0300
Subject: [PATCH 05/15] Update docstrings and remove Markdown dependency

---
 Project.toml     |   3 -
 src/LearnBase.jl |   2 -
 src/costs.jl     | 294 ++++++++++++++++++-----------------------------
 3 files changed, 113 insertions(+), 186 deletions(-)

diff --git a/Project.toml b/Project.toml
index ab19398..01fe436 100644
--- a/Project.toml
+++ b/Project.toml
@@ -2,9 +2,6 @@ name = "LearnBase"
 uuid = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6"
 version = "0.3.0"
 
-[deps]
-Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
 [compat]
 julia = "1"
 
diff --git a/src/LearnBase.jl b/src/LearnBase.jl
index 1113049..f498db6 100644
--- a/src/LearnBase.jl
+++ b/src/LearnBase.jl
@@ -1,7 +1,5 @@
 module LearnBase
 
-using Markdown # for docstrings
-
 # AGGREGATION MODES
 include("aggmode.jl")
 
diff --git a/src/costs.jl b/src/costs.jl
index 22fbeaf..8bf9f13 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -1,84 +1,76 @@
-@doc doc"""
+"""
 Baseclass for any kind of cost. Notable examples for
 costs are `Loss` and `Penalty`.
 """
 abstract type Cost end
 
-@doc doc"""
+"""
 Baseclass for all losses. A loss is some (possibly simplified)
-function `L(features, targets, outputs)`, where `outputs` are the
-result of some function `f(features)`.
+function `L(x, y, ŷ)`, of features `x`, targets `y` and outputs
+`ŷ = f(x)` for some function `f`.
 """
 abstract type Loss <: Cost end
 
-@doc doc"""
+"""
 A loss is considered **supervised**, if all the information needed
-to compute `L(features, targets, outputs)` are contained in
-`targets` and `outputs`, and thus allows for the simplification
-`L(targets, outputs)`.
+to compute `L(x, y, ŷ)` are contained in `y` and `ŷ`, and thus allows
+for the simplification `L(y, ŷ)`.
 """
 abstract type SupervisedLoss <: Loss end
 
-@doc doc"""
+"""
 A supervised loss is considered **unary** if it can be written as a composition
-`L(Ψ(target, output))` for some binary function `Ψ`. In this case the loss can
-be evaluated with a single argument termed the **agreement** `Ψ(target, output)`.
-Notable examples for unary supervised losses are distance-based (`Ψ(y,ŷ) = ŷ - y`)
-and margin-based (`Ψ(y,ŷ) = ŷ*y`) losses.
+`L(β(y, ŷ))` for some binary function `β`. In this case the loss can be evaluated
+with a single argument termed the **agreement** `β(y, ŷ)`. Notable
+examples for unary supervised losses are distance-based (`L(y,ŷ) = ψ(ŷ - y)`)
+and margin-based (`L(y,ŷ) = ψ(y⋅ŷ)`) losses.
 """
 abstract type UnarySupervisedLoss <: SupervisedLoss end
 
-@doc doc"""
-A supervised loss that can be simplified to
-`L(targets, outputs) = L(outputs - targets)` is considered
-**distance-based**.
+"""
+A supervised loss that can be simplified to `L(y, ŷ) = L(ŷ - y)`
+is considered **distance-based**.
 """
 abstract type DistanceLoss <: UnarySupervisedLoss end
 
-@doc doc"""
-A supervised loss, where the targets are in {-1, 1}, and which
-can be simplified to `L(targets, outputs) = L(targets * outputs)`
-is considered **margin-based**.
+"""
+A supervised loss with targets `y ∈ {-1, 1}`, and which
+can be simplified to `L(y, ŷ) = L(y⋅ŷ)` is considered
+**margin-based**.
 """
 abstract type MarginLoss <: UnarySupervisedLoss end
 
-@doc doc"""
+"""
 A loss is considered **unsupervised**, if all the information needed
-to compute `L(features, targets, outputs)` are contained in
-`features` and `outputs`, and thus allows for the simplification
-`L(features, outputs)`.
+to compute `L(x, y, ŷ)` are contained in `x` and `ŷ`, and thus allows
+for the simplification `L(x, ŷ)`.
 """
 abstract type UnsupervisedLoss <: Loss end
 
-@doc doc"""
+"""
 Baseclass for all penalties.
 """
 abstract type Penalty <: Cost end
 
-@doc doc"""
+"""
     value(loss, target, output)
 
 Compute the (non-negative) numeric result for the `loss` function
 return it. Note that `target` and `output` can be of different
 numeric type, in which case promotion is performed in the manner
-appropriate for the given loss.
-
-```math
-L : Y \times \mathbb{R} \rightarrow [0,\infty)
-```
+appropriate for the given loss `L: Y × ℝ → [0,∞)`.
 
 # Arguments
 
-- `loss::SupervisedLoss`: The loss-function ``L`` we want to
-  compute the value with.
-- `target::Number`: The ground truth ``y \in Y`` of the observation.
-- `output::Number`: The predicted output ``\hat{y} \in \mathbb{R}``
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `target::Number`: The ground truth `y ∈ Y` of the observation.
+- `output::Number`: The predicted output `ŷ ∈ ℝ`.
   for the observation.
 """
 value(loss::SupervisedLoss, target::Number, output::Number) =
     MethodError(value, (loss, target, output))
 
-@doc doc"""
+"""
     value(loss, targets, outputs, aggmode) -> Number
 
 Compute the weighted or unweighted sum or mean (depending on
@@ -93,17 +85,17 @@ dimensions they share.
 
 # Arguments
 
-- `loss::SupervisedLoss`: The loss-function ``L`` we are working with.
-- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``.
-- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``.
-- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref),
-  [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or
-  [`AggMode.WeightedMean`](@ref).
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
 """
 value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) =
     MethodError(value, (loss, targets, outputs, aggmode))
 
-@doc doc"""
+"""
     value(loss, targets, outputs, aggmode, obsdim) -> AbstractVector
 
 Compute the values of the `loss` function for each pair in
@@ -118,20 +110,20 @@ not be vectors).
 
 # Arguments
 
-- `loss::SupervisedLoss`: The loss-function ``L`` we are working with.
-- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``.
-- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``.
-- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref),
-  [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or
-  [`AggMode.WeightedMean`](@ref).
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
 - `obsdim::ObsDimension`: Specifies which of the array dimensions
-  denotes the observations. see `?ObsDim` for more information.
+  denotes the observations. See `?ObsDim` for more information.
 """
 value(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
       aggmode::AggregateMode, obsdim::ObsDimension) =
     MethodError(value, (loss, targets, outputs, aggmode, obsdim))
 
-@doc doc"""
+"""
     value!(buffer, loss, targets, outputs, aggmode, obsdim) -> buffer
 
 Compute the values of the `loss` function for each pair in
@@ -148,14 +140,14 @@ must not be vectors).
 
 - `buffer::AbstractArray`: Array to store the computed values in.
   Old values will be overwritten and lost.
-- `loss::SupervisedLoss`: The loss-function ``L`` we are working with.
-- `targets::AbstractArray`: The array of ground truths ``\\mathbf{y}``.
-- `outputs::AbstractArray`: The array of predicted outputs ``\\mathbf{\\hat{y}}``.
-- `aggmode::AggregateMode`: Must be one of the following: [`AggMode.Sum()`](@ref),
-  [`AggMode.Mean()`](@ref), [`AggMode.WeightedSum`](@ref), or
-  [`AggMode.WeightedMean`](@ref).
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
 - `obsdim::ObsDimension`: Specifies which of the array dimensions
-  denotes the observations. see `?ObsDim` for more information.
+  denotes the observations. See `?ObsDim` for more information.
 """
 value!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
        aggmode::AggregateMode, obsdim::ObsDimension) =
@@ -169,228 +161,168 @@ function deriv! end
 function deriv2! end
 function value_deriv! end
 
-@doc doc"""
+"""
     isconvex(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` denotes a convex function.
-A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is convex if
-its domain is a convex set and if for all ``x, y`` in that
-domain, with ``\theta`` such that for ``0 \leq \theta \leq 1``,
-we have
-
-```math
-f(\theta x + (1 - \theta) y) \leq \theta f(x) + (1 - \theta) f(y)
-```
+A function `f: ℝⁿ → ℝ` is convex if its domain is a convex set
+and if for all `x, y` in that domain, with `θ` such that for
+`0 ≦ θ ≦ 1`, we have `f(θ x + (1 - θ) y) ≦ θ f(x) + (1 - θ) f(y)`.
 """
 isconvex(l) = isstrictlyconvex(l)
 
-@doc doc"""
+"""
     isstrictlyconvex(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` denotes a strictly convex function.
-A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is
-strictly convex if its domain is a convex set and if for all
-``x, y`` in that domain where ``x \neq y``, with
-``\theta`` such that for ``0 < \theta < 1``, we have
-
-```math
-f(\theta x + (1 - \theta) y) < \theta f(x) + (1 - \theta) f(y)
-```
+A function `f : ℝⁿ → ℝ` is strictly convex if its domain is a convex
+set and if for all `x, y` in that domain where `x ≠ y`, with `θ` such
+that for `0 < θ < 1`, we have `f(θ x + (1 - θ) y) < θ f(x) + (1 - θ) f(y)`.
 """
 isstrictlyconvex(l) = isstronglyconvex(l)
 
-@doc doc"""
+"""
     isstronglyconvex(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` denotes a strongly convex function.
-A function ``f : \mathbb{R}^n \rightarrow \mathbb{R}`` is
-``m``-strongly convex if its domain is a convex set and if
-``\forall x,y \in`` **dom** ``f`` where ``x \neq y``,
-and ``\theta`` such that for ``0 \le \theta \le 1`` , we have
-
-```math
-f(\theta x + (1 - \theta)y) < \theta f(x) + (1 - \theta) f(y) - 0.5 m \cdot \theta (1 - \theta) {\| x - y \|}_2^2
-```
+A function `f : ℝⁿ → ℝ` is `m`-strongly convex if its domain is a convex
+set, and if for all `x, y` in that domain where `x ≠ y`, and `θ` such that
+for `0 ≤ θ ≤ 1`, we have
+`f(θ x + (1 - θ)y) < θ f(x) + (1 - θ) f(y) - 0.5 m ⋅ θ (1 - θ) | x - y |₂²`
 
-In a more familiar setting, if the loss function is
-differentiable we have
-
-```math
-\left( \nabla f(x) - \nabla f(y) \right)^\top (x - y) \ge m {\| x - y\|}_2^2
-```
+In a more familiar setting, if the loss function is differentiable we have
+`(∇f(x) - ∇f(y))ᵀ (x - y) ≥ m | x - y |₂²`
 """
 isstronglyconvex(::SupervisedLoss) = false
 
-@doc doc"""
+"""
     isdifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool
 
 Return `true` if the given `loss` is differentiable
 (optionally limited to the given point `x` if specified).
 
-A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}^{m}`` is
-differentiable at a point ``x \in`` **int dom** ``f``, if there
-exists a matrix ``Df(x) \in \mathbb{R}^{m \times n}`` such that
+A function `f : ℝⁿ → ℝᵐ` is differentiable at a point `x` in the interior
+domain of `f` if there exists a matrix `Df(x) ∈ ℝ^(m × n)` such that
 it satisfies:
 
-```math
-\lim_{z \neq x, z \to x} \frac{{\|f(z) - f(x) - Df(x)(z-x)\|}_2}{{\|z - x\|}_2} = 0
-```
+`lim_{z ≠ x, z → x} (|f(z) - f(x) - Df(x)(z-x)|₂) / |z - x|₂ = 0`
 
 A function is differentiable if its domain is open and it is
-differentiable at every point ``x``.
+differentiable at every point `x`.
 """
 isdifferentiable(l::SupervisedLoss) = istwicedifferentiable(l)
 isdifferentiable(l::SupervisedLoss, at) = isdifferentiable(l)
 
-@doc doc"""
+"""
     istwicedifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool
 
 Return `true` if the given `loss` is differentiable
 (optionally limited to the given point `x` if specified).
 
-A function ``f : \mathbb{R}^{n} \rightarrow \mathbb{R}`` is
-said to be twice differentiable at a point ``x \in`` **int
-dom** ``f``, if the function derivative for ``\nabla f``
-exists at ``x``.
-
-```math
-\nabla^2 f(x) = D \nabla f(x)
-```
+A function `f : ℝⁿ → ℝ` is said to be twice differentiable
+at a point `x` in the interior domain of `f`, if the function
+derivative for `∇f` exists at `x`: `∇²f(x) = D∇f(x)`.
 
 A function is twice differentiable if its domain is open and it
-is twice differentiable at every point ``x``.
+is twice differentiable at every point `x`.
 """
 istwicedifferentiable(::SupervisedLoss) = false
 istwicedifferentiable(l::SupervisedLoss, at) = istwicedifferentiable(l)
 
-@doc doc"""
+"""
     islocallylipschitzcont(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` function is locally-Lipschitz
 continous.
 
-A supervised loss ``L : Y \times \mathbb{R} \rightarrow [0, \infty)``
-is called locally Lipschitz continuous if ``\forall a \ge 0``
-there exists a constant :math:`c_a \ge 0`, such that
+A supervised loss `L : Y × ℝ → [0, ∞)` is called locally Lipschitz
+continuous if for all `a ≥ 0` there exists a constant `cₐ ≥ 0`,
+such that
 
-```math
-\sup_{y \in Y} \left| L(y,t) − L(y,t′) \right| \le c_a |t − t′|,  \qquad  t,t′ \in [−a,a]
-```
+`sup_{y ∈ Y} | L(y,t) − L(y,t′) | ≤ cₐ |t − t′|, t, t′ ∈ [−a,a]`
 
 Every convex function is locally lipschitz continuous.
 """
 islocallylipschitzcont(l) = isconvex(l) || islipschitzcont(l)
 
-@doc doc"""
+"""
     islipschitzcont(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` function is Lipschitz continuous.
 
-A supervised loss function ``L : Y \times \mathbb{R} \rightarrow
-[0, \infty)`` is Lipschitz continous, if there exists a finite
-constant ``M < \infty`` such that
-
-```math
-|L(y, t) - L(y, t′)| \le M |t - t′|,  \qquad  \forall (y, t) \in Y \times \mathbb{R}
-```
+A supervised loss function `L : Y × ℝ → [0, ∞)` is Lipschitz continous,
+if there exists a finite constant `M < ∞` such that
+`|L(y, t) - L(y, t′)| ≤ M |t - t′|, ∀ (y, t) ∈ Y × ℝ`
 """
 islipschitzcont(::SupervisedLoss) = false
 
-@doc doc"""
+"""
     isnemitski(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` denotes a Nemitski loss function.
 
-We call a supervised loss function ``L : Y \times \mathbb{R}
-\rightarrow [0,\infty)`` a Nemitski loss if there exist a
-measurable function ``b : Y \rightarrow [0, \infty)`` and an
-increasing function ``h : [0, \infty) \rightarrow [0, \infty)``
-such that
-
-```math
-L(y,\hat{y}) \le b(y) + h(|\hat{y}|),  \qquad  (y, \hat{y}) \in Y \times \mathbb{R}.
-```
+We call a supervised loss function `L : Y × ℝ → [0,∞)` a Nemitski
+loss if there exist a measurable function `b : Y → [0, ∞)` and an
+increasing function `h : [0, ∞) → [0, ∞)` such that
+`L(y,ŷ) ≤ b(y) + h(|ŷ|), (y, ŷ) ∈ Y × ℝ`
 
-If a loss if locally lipsschitz continuous then it is a Nemitski loss
+If a loss if locally lipsschitz continuous then it is a Nemitski loss.
 """
 isnemitski(l::SupervisedLoss) = islocallylipschitzcont(l)
 
-@doc doc"""
+"""
     isclipable(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` function is clipable. A
-supervised loss ``L : Y \times \mathbb{R} \rightarrow [0,
-\infty)`` can be clipped at ``M > 0`` if, for all ``(y,t)
-\in Y \times \mathbb{R}``,
-
-```math
-L(y, \hat{t}) \le L(y, t)
-```
-
-where ``\hat{t}`` denotes the clipped value of ``t`` at
-``\pm M``. That is
-
-```math
-\hat{t} = \begin{cases} -M & \quad \text{if } t < -M \\ t & \quad \text{if } t \in [-M, M] \\ M & \quad \text{if } t > M \end{cases}
-```
+supervised loss `L : Y × ℝ → [0,∞)` can be clipped at `M > 0`
+if, for all `(y,t) ∈ Y × ℝ`, `L(y, t̂) ≤ L(y, t)` where
+`t̂` denotes the clipped value of `t` at `± M`.
+That is
+`t̂ = -M` if `t < -M`, `t̂ = t` if `t ∈ [-M, M]`, and `t = M` if `t > M`.
 """
 isclipable(::SupervisedLoss) = false
 
-@doc doc"""
+"""
     isdistancebased(loss::SupervisedLoss) -> Bool
 
-Return `true` ifthe given `loss` is a distance-based loss.
+Return `true` if the given `loss` is a distance-based loss.
 
-A supervised loss function ``L : Y \times \mathbb{R} \rightarrow
-[0, \infty)`` is said to be **distance-based**, if there exists a
-representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)``
-satisfying ``\psi (0) = 0`` and
-
-```math
-L(y, \hat{y}) = \psi (\hat{y} - y),  \qquad  (y, \hat{y}) \in Y \times \mathbb{R}
-```
+A supervised loss function `L : Y × ℝ → [0,∞)` is said to be
+distance-based, if there exists a representing function `ψ : ℝ → [0,∞)`
+satisfying `ψ(0) = 0` and `L(y, ŷ) = ψ (ŷ - y), (y, ŷ) ∈ Y × ℝ`.
 """
 isdistancebased(::SupervisedLoss) = false
 
-@doc doc"""
+"""
     ismarginbased(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` is a margin-based loss.
 
-A supervised loss function ``L : Y \times \mathbb{R} \rightarrow
-[0, \infty)`` is said to be **margin-based**, if there exists a
-representing function ``\psi : \mathbb{R} \rightarrow [0, \infty)``
-satisfying
-
-```math
-L(y, \hat{y}) = \psi (y \cdot \hat{y}),  \qquad  (y, \hat{y}) \in Y \times \mathbb{R}
-```
+A supervised loss function `L : Y × ℝ → [0,∞)` is said to be
+margin-based, if there exists a representing function `ψ : ℝ → [0,∞)`
+satisfying `L(y, ŷ) = ψ(y⋅ŷ), (y, ŷ) ∈ Y × ℝ`.
 """
 ismarginbased(::SupervisedLoss) = false
 
-@doc doc"""
+"""
     isclasscalibrated(loss::SupervisedLoss) -> Bool
 """
 isclasscalibrated(::SupervisedLoss) = false
 
-@doc doc"""
+"""
     issymmetric(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given loss is a symmetric loss.
 
-A function ``f : \mathbb{R} \rightarrow [0,\infty)`` is said
-to be symmetric about origin if we have
+A function `f : ℝ → [0,∞)` is said to be symmetric
+about origin if we have `f(x) = f(-x), ∀ x ∈ ℝ`.
 
-```math
-f(x) = f(-x), \qquad  \forall x \in \mathbb{R}
-```
-
-A distance-based loss is said to be symmetric if its representing
-function is symmetric.
+A distance-based loss is said to be symmetric if its
+representing function is symmetric.
 """
 issymmetric(::SupervisedLoss) = false
 
-@doc doc"""
+"""
     isminimizable(loss::SupervisedLoss) -> Bool
 
 Return `true` if the given `loss` is a minimizable loss.

From abb7cd48b69f0e8898276c822762f1f103bfe232 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 16:16:26 -0300
Subject: [PATCH 06/15] Add docstrings for deriv function in costs.jl

---
 src/costs.jl | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/costs.jl b/src/costs.jl
index 8bf9f13..01b815f 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -153,7 +153,77 @@ value!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outp
        aggmode::AggregateMode, obsdim::ObsDimension) =
     MethodError(value!, (buffer, loss, targets, outputs, aggmode, obsdim))
 
-function deriv end
+"""
+    deriv(loss, target, output) -> Number
+
+Compute the derivative for the `loss` function
+in respect to the `output`. Note that `target` and
+`output` can be of different numeric type, in which
+case promotion is performed in the manner appropriate
+for the given loss.
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `target::Number`: The ground truth `y ∈ Y` of the observation.
+- `output::Number`: The predicted output `ŷ ∈ ℝ` for the observation.
+"""
+deriv(loss::SupervisedLoss, target::Number, output::Number) =
+    MethodError(deriv, (loss, target, output))
+
+"""
+    deriv(loss, targets, outputs, aggmode) -> Number
+
+Compute the weighted or unweighted sum or mean (depending on `aggmode`)
+of the individual derivatives of the `loss` function for each pair
+in `targets` and `outputs`. This method will not allocate a
+temporary array.
+
+In the case that the two parameters are arrays with a different
+number of dimensions, broadcast will be performed. Note that the
+given parameters are expected to have the same size in the
+dimensions they share.
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
+"""
+deriv(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) =
+    MethodError(deriv, (loss, targets, outputs, aggmode))
+
+"""
+    deriv(loss, targets, outputs, aggmode, obsdim) -> AbstractVector
+
+Compute the derivative of the `loss` function for each pair in
+`targets` and `outputs` individually, and return either the
+weighted or unweighted sum or mean for each observation (depending on
+`aggmode`). This method will not allocate a temporary array, but
+it will allocate the resulting vector.
+
+Both arrays have to be of the same shape and size. Furthermore
+they have to have at least two array dimensions (i.e. they must
+not be vectors).
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
+- `obsdim::ObsDimension`: Specifies which of the array dimensions
+  denotes the observations. See `?ObsDim` for more information.
+"""
+deriv(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
+      aggmode::AggregateMode, obsdim::LearnBase.ObsDimension) =
+    MethodError(deriv, (loss, targets, outputs, aggmode, obsdim))
+
 function deriv2 end
 function value_deriv end
 

From 70bd28a6765ec7ab523439826c490e54a0975224 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 16:23:09 -0300
Subject: [PATCH 07/15] Add docstrings for deriv! function in costs.jl

---
 src/costs.jl | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/src/costs.jl b/src/costs.jl
index 01b815f..91cdcb5 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -221,14 +221,41 @@ not be vectors).
   denotes the observations. See `?ObsDim` for more information.
 """
 deriv(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
-      aggmode::AggregateMode, obsdim::LearnBase.ObsDimension) =
+      aggmode::AggregateMode, obsdim::ObsDimension) =
     MethodError(deriv, (loss, targets, outputs, aggmode, obsdim))
 
-function deriv2 end
-function value_deriv end
+"""
+    deriv!(buffer, loss, targets, outputs, aggmode, obsdim) -> buffer
+
+Compute the derivative of the `loss` function for each pair in
+`targets` and `outputs` individually, and return either the
+weighted or unweighted sum or mean for each observation, depending on
+`aggmode`. The results are stored into the given vector `buffer`.
+This method will not allocate a temporary array.
 
-function deriv! end
+Both arrays have to be of the same shape and size. Furthermore
+they have to have at least two array dimensions (i.e. so they
+must not be vectors).
+
+# Arguments
+
+- `buffer::AbstractArray`: Array to store the computed values in.
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
+- `obsdim::ObsDimension`: Specifies which of the array dimensions
+  denotes the observations. See `?ObsDim` for more information.
+"""
+deriv!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
+       aggmode::AggregateMode, obsdim::ObsDimension) =
+    MethodError(deriv!, (buffer, loss, targets, outputs, aggmode, obsdim))
+
+function deriv2 end
 function deriv2! end
+function value_deriv end
 function value_deriv! end
 
 """

From d2ee665ddbffa11060a771c77c9fe4e306a41024 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 17:08:45 -0300
Subject: [PATCH 08/15] Add docstrings for deriv2 function in costs.jl

---
 src/costs.jl | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/costs.jl b/src/costs.jl
index 91cdcb5..d559fdb 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -253,7 +253,76 @@ deriv!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outp
        aggmode::AggregateMode, obsdim::ObsDimension) =
     MethodError(deriv!, (buffer, loss, targets, outputs, aggmode, obsdim))
 
-function deriv2 end
+"""
+    deriv2(loss, target, output) -> Number
+
+Compute the second derivative for the `loss` function
+in respect to the `output`. Note that `target` and `output`
+can be of different numeric type, in which case promotion is
+performed in the manner appropriate for the given loss.
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `target::Number`: The ground truth `y ∈ Y` of the observation.
+- `output::Number`: The predicted output `ŷ ∈ ℝ` for the observation.
+"""
+deriv2(loss::SupervisedLoss, target::Number, output::Number) =
+    MethodError(deriv2, (loss, target, output))
+
+"""
+    deriv2(loss, targets, outputs, aggmode) -> Number
+
+Compute the weighted or unweighted sum or mean (depending on `aggmode`)
+of the individual second derivatives of the `loss` function for
+each pair in `targets` and `outputs`. This method will not
+allocate a temporary array.
+
+In the case that the two parameters are arrays with a different
+number of dimensions, broadcast will be performed. Note that the
+given parameters are expected to have the same size in the
+dimensions they share.
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
+"""
+deriv2(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray, aggmode::AggregateMode) =
+    MethodError(deriv2, (loss, targets, outputs, aggmode))
+
+"""
+    deriv2(loss, targets, outputs, aggmode, obsdim) -> AbstractVector
+
+Compute the second derivative of the `loss` function for each pair in
+`targets` and `outputs` individually, and return either the
+weighted or unweighted sum or mean for each observation (depending on
+`aggmode`). This method will not allocate a temporary array, but
+it will allocate the resulting vector.
+
+Both arrays have to be of the same shape and size. Furthermore
+they have to have at least two array dimensions (i.e. they must
+not be vectors).
+
+# Arguments
+
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
+- `obsdim::ObsDimension`: Specifies which of the array dimensions
+  denotes the observations. See `?ObsDim` for more information.
+"""
+deriv2(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
+       aggmode::AggregateMode, obsdim::ObsDimension) =
+    MethodError(deriv2, (loss, targets, outputs, aggmode, obsdim))
+
 function deriv2! end
 function value_deriv end
 function value_deriv! end

From 3d18408c317fd87b0dea1b4dc79e12be99f50d25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 17:16:10 -0300
Subject: [PATCH 09/15] Move struct defs to outside testset

---
 test/costs.jl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/costs.jl b/test/costs.jl
index cd94bad..03bd524 100644
--- a/test/costs.jl
+++ b/test/costs.jl
@@ -1,3 +1,8 @@
+# dummy types for testing
+struct MyStronglyConvexType end
+LearnBase.isstronglyconvex(::MyStronglyConvexType) = true
+LearnBase.islipschitzcont(::MyStronglyConvexType) = true
+
 @testset "Costs" begin
     @test LearnBase.Cost <: Any
     @test LearnBase.Penalty <: LearnBase.Cost
@@ -33,9 +38,6 @@
     @test typeof(LearnBase.issymmetric) <: Function
 
     # test fallback methods
-    struct MyStronglyConvexType end
-    LearnBase.isstronglyconvex(::MyStronglyConvexType) = true
-    LearnBase.islipschitzcont(::MyStronglyConvexType) = true
     @test LearnBase.isstronglyconvex(MyStronglyConvexType())
     @test LearnBase.isstrictlyconvex(MyStronglyConvexType())
     @test LearnBase.isconvex(MyStronglyConvexType())

From 0cf5da01f2606474b778fd5e7f619eb4198dc7f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 17:20:56 -0300
Subject: [PATCH 10/15] Add docstrings for deriv2! function in costs.jl

---
 src/costs.jl | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/costs.jl b/src/costs.jl
index d559fdb..858895c 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -139,7 +139,6 @@ must not be vectors).
 # Arguments
 
 - `buffer::AbstractArray`: Array to store the computed values in.
-  Old values will be overwritten and lost.
 - `loss::SupervisedLoss`: The loss function `L` we want to compute.
 - `targets::AbstractArray`: The array of ground truths `𝐲`.
 - `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
@@ -323,7 +322,35 @@ deriv2(loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
        aggmode::AggregateMode, obsdim::ObsDimension) =
     MethodError(deriv2, (loss, targets, outputs, aggmode, obsdim))
 
-function deriv2! end
+"""
+    deriv2!(buffer, loss, target, output, aggmode, obsdim) -> buffer
+
+Compute the second derivative of the `loss` function for each pair in
+`targets` and `outputs` individually, and return either the
+weighted or unweighted sum or mean for each observation, depending on
+`aggmode`. The results are stored into the given vector `buffer`.
+This method will not allocate a temporary array.
+
+Both arrays have to be of the same shape and size. Furthermore
+they have to have at least two array dimensions (i.e. so they
+must not be vectors).
+
+# Arguments
+
+- `buffer::AbstractArray`: Array to store the computed values in.
+- `loss::SupervisedLoss`: The loss function `L` we want to compute.
+- `targets::AbstractArray`: The array of ground truths `𝐲`.
+- `outputs::AbstractArray`: The array of predicted outputs `𝐲̂`.
+- `aggmode::AggregateMode`: Must be one of the following:
+  [`AggMode.Sum()`](@ref), [`AggMode.Mean()`](@ref),
+  [`AggMode.WeightedSum`](@ref), or [`AggMode.WeightedMean`](@ref).
+- `obsdim::ObsDimension`: Specifies which of the array dimensions
+  denotes the observations. See `?ObsDim` for more information.
+"""
+deriv2!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, outputs::AbstractArray,
+        aggmode::AggregateMode, obsdim::ObsDimension) =
+    MethodError(deriv2!, (buffer, loss, targets, outputs, aggmode, obsdim))
+
 function value_deriv end
 function value_deriv! end
 

From 22f5acebd0fcf06b4b46bf355e270cea49129ed0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Tue, 14 Apr 2020 17:40:01 -0300
Subject: [PATCH 11/15] Remove value_deriv and value_deriv! names

---
 src/costs.jl  | 3 ---
 test/costs.jl | 2 --
 2 files changed, 5 deletions(-)

diff --git a/src/costs.jl b/src/costs.jl
index 858895c..10ec735 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -351,9 +351,6 @@ deriv2!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, out
         aggmode::AggregateMode, obsdim::ObsDimension) =
     MethodError(deriv2!, (buffer, loss, targets, outputs, aggmode, obsdim))
 
-function value_deriv end
-function value_deriv! end
-
 """
     isconvex(loss::SupervisedLoss) -> Bool
 
diff --git a/test/costs.jl b/test/costs.jl
index 03bd524..02dacf2 100644
--- a/test/costs.jl
+++ b/test/costs.jl
@@ -19,8 +19,6 @@ LearnBase.islipschitzcont(::MyStronglyConvexType) = true
     @test typeof(LearnBase.deriv!) <: Function
     @test typeof(LearnBase.deriv2) <: Function
     @test typeof(LearnBase.deriv2!) <: Function
-    @test typeof(LearnBase.value_deriv) <: Function
-    @test typeof(LearnBase.value_deriv!) <: Function
 
     @test typeof(LearnBase.isminimizable) <: Function
     @test typeof(LearnBase.isdifferentiable) <: Function

From 81ea0ea121bb41420a955efd6351a78606736ed7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Wed, 15 Apr 2020 09:26:54 -0300
Subject: [PATCH 12/15] Tests for ObsDim module

---
 test/aggmode.jl  |  0
 test/obsdim.jl   | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/other.jl    | 70 -----------------------------------------------
 test/runtests.jl |  2 ++
 4 files changed, 73 insertions(+), 70 deletions(-)
 create mode 100644 test/aggmode.jl
 create mode 100644 test/obsdim.jl

diff --git a/test/aggmode.jl b/test/aggmode.jl
new file mode 100644
index 0000000..e69de29
diff --git a/test/obsdim.jl b/test/obsdim.jl
new file mode 100644
index 0000000..ea856b7
--- /dev/null
+++ b/test/obsdim.jl
@@ -0,0 +1,71 @@
+@testset "ObsDim" begin
+    @testset "Type tree" begin
+        @test_throws MethodError LearnBase.ObsDim.Constant(2.0)
+
+        @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDimension
+        @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.First
+        @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.Constant{1}
+
+        @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDimension
+        @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDim.Last
+
+        @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDimension
+        @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2}
+    end
+
+    @testset "Constructors" begin
+        @test_throws ArgumentError convert(LearnBase.ObsDimension, "test")
+        @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0)
+
+        @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.First()
+        @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.Constant(1)
+        @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Last()))  === LearnBase.ObsDim.Last()
+        @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Constant(2))) === LearnBase.ObsDim.Constant(2)
+
+        @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1)
+        @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6)
+        @test convert(LearnBase.ObsDimension, 1) === LearnBase.ObsDim.First()
+        @test convert(LearnBase.ObsDimension, 2) === LearnBase.ObsDim.Constant(2)
+        @test convert(LearnBase.ObsDimension, 6) === LearnBase.ObsDim.Constant(6)
+        @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first)
+        @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first")
+        @test convert(LearnBase.ObsDimension, (:first,:last))  === (LearnBase.ObsDim.First(),LearnBase.ObsDim.Last())
+        @test convert(LearnBase.ObsDimension, :first)  === LearnBase.ObsDim.First()
+        @test convert(LearnBase.ObsDimension, :begin)  === LearnBase.ObsDim.First()
+        @test convert(LearnBase.ObsDimension, "first") === LearnBase.ObsDim.First()
+        @test convert(LearnBase.ObsDimension, "BEGIN") === LearnBase.ObsDim.First()
+        @test convert(LearnBase.ObsDimension, :end)   === LearnBase.ObsDim.Last()
+        @test convert(LearnBase.ObsDimension, :last)  === LearnBase.ObsDim.Last()
+        @test convert(LearnBase.ObsDimension, "End")  === LearnBase.ObsDim.Last()
+        @test convert(LearnBase.ObsDimension, "LAST") === LearnBase.ObsDim.Last()
+        @test convert(LearnBase.ObsDimension, :nothing) === LearnBase.ObsDim.Undefined()
+        @test convert(LearnBase.ObsDimension, :none) === LearnBase.ObsDim.Undefined()
+        @test convert(LearnBase.ObsDimension, :na) === LearnBase.ObsDim.Undefined()
+        @test convert(LearnBase.ObsDimension, :null) === LearnBase.ObsDim.Undefined()
+        @test convert(LearnBase.ObsDimension, :undefined) === LearnBase.ObsDim.Undefined()
+        @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined()
+    end
+
+    struct SomeType end
+    @testset "Default values" begin
+        @testset "Arrays, SubArrays, and Sparse Arrays" begin
+            @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last()
+            @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === LearnBase.ObsDim.Last()
+            @test @inferred(LearnBase.default_obsdim(rand(10,5))) === LearnBase.ObsDim.Last()
+            @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === LearnBase.ObsDim.Last()
+            @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === LearnBase.ObsDim.Last()
+            @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === LearnBase.ObsDim.Last()
+        end
+
+        @testset "Types with no specified default" begin
+            @test @inferred(LearnBase.default_obsdim(SomeType())) === LearnBase.ObsDim.Undefined()
+        end
+
+        @testset "Tuples" begin
+            @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Undefined())
+            @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Last())
+            @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Undefined())
+            @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Last())
+        end
+    end
+end
diff --git a/test/other.jl b/test/other.jl
index 7f3ff21..bcae197 100644
--- a/test/other.jl
+++ b/test/other.jl
@@ -163,73 +163,3 @@ let s = [LearnBase.IntervalSet(0,1), LearnBase.DiscreteSet([0,1])]
     @test typeof(rand(s)) == Vector{Float64}
     @test typeof(rand(s, 2)) == Vector{Vector{Float64}}
 end
-
-@testset "obsdim typetree and Constructor" begin
-    @test_throws MethodError LearnBase.ObsDim.Constant(2.0)
-
-    @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDimension
-    @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.First
-    @test typeof(LearnBase.ObsDim.First()) <: LearnBase.ObsDim.Constant{1}
-
-    @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDimension
-    @test typeof(LearnBase.ObsDim.Last()) <: LearnBase.ObsDim.Last
-
-    @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDimension
-    @test typeof(LearnBase.ObsDim.Constant(2)) <: LearnBase.ObsDim.Constant{2}
-end
-
-@testset "obs_dim helper constructor" begin
-    @test_throws ArgumentError convert(LearnBase.ObsDimension, "test")
-    @test_throws ArgumentError convert(LearnBase.ObsDimension, 1.0)
-
-    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.First()
-    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.First())) === LearnBase.ObsDim.Constant(1)
-    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Last()))  === LearnBase.ObsDim.Last()
-    @test @inferred(convert(LearnBase.ObsDimension, LearnBase.ObsDim.Constant(2))) === LearnBase.ObsDim.Constant(2)
-
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 1)
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, 6)
-    @test convert(LearnBase.ObsDimension, 1) === LearnBase.ObsDim.First()
-    @test convert(LearnBase.ObsDimension, 2) === LearnBase.ObsDim.Constant(2)
-    @test convert(LearnBase.ObsDimension, 6) === LearnBase.ObsDim.Constant(6)
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, :first)
-    @test_throws ErrorException @inferred convert(LearnBase.ObsDimension, "first")
-    @test convert(LearnBase.ObsDimension, (:first,:last))  === (LearnBase.ObsDim.First(),LearnBase.ObsDim.Last())
-    @test convert(LearnBase.ObsDimension, :first)  === LearnBase.ObsDim.First()
-    @test convert(LearnBase.ObsDimension, :begin)  === LearnBase.ObsDim.First()
-    @test convert(LearnBase.ObsDimension, "first") === LearnBase.ObsDim.First()
-    @test convert(LearnBase.ObsDimension, "BEGIN") === LearnBase.ObsDim.First()
-    @test convert(LearnBase.ObsDimension, :end)   === LearnBase.ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, :last)  === LearnBase.ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, "End")  === LearnBase.ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, "LAST") === LearnBase.ObsDim.Last()
-    @test convert(LearnBase.ObsDimension, :nothing) === LearnBase.ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :none) === LearnBase.ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :na) === LearnBase.ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :null) === LearnBase.ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, :undefined) === LearnBase.ObsDim.Undefined()
-    @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined()
-end
-
-struct SomeType end
-@testset "obsdim default values" begin
-    @testset "Arrays, SubArrays, and Sparse Arrays" begin
-        @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(view(rand(10),:))) === LearnBase.ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(rand(10,5))) === LearnBase.ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(view(rand(10,5),:,:))) === LearnBase.ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(sprand(10,0.5))) === LearnBase.ObsDim.Last()
-        @test @inferred(LearnBase.default_obsdim(sprand(10,5,0.5))) === LearnBase.ObsDim.Last()
-    end
-
-    @testset "Types with no specified default" begin
-        @test @inferred(LearnBase.default_obsdim(SomeType())) === LearnBase.ObsDim.Undefined()
-    end
-
-    @testset "Tuples" begin
-        @test @inferred(LearnBase.default_obsdim((SomeType(),SomeType()))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Undefined())
-        @test @inferred(LearnBase.default_obsdim((SomeType(),rand(2,2)))) === (LearnBase.ObsDim.Undefined(), LearnBase.ObsDim.Last())
-        @test @inferred(LearnBase.default_obsdim((rand(10),SomeType()))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Undefined())
-        @test @inferred(LearnBase.default_obsdim((rand(10),rand(2,2)))) === (LearnBase.ObsDim.Last(), LearnBase.ObsDim.Last())
-    end
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index b890291..7b1a1d3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,5 +2,7 @@ using LearnBase
 using SparseArrays
 using Test
 
+include("aggmode.jl")
+include("obsdim.jl")
 include("costs.jl")
 include("other.jl")

From 4d762478e3360eac4abbad0caf8bfdcfdcf0706a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Wed, 15 Apr 2020 09:30:57 -0300
Subject: [PATCH 13/15] Tests for AggMode module

---
 test/aggmode.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/aggmode.jl b/test/aggmode.jl
index e69de29..c801413 100644
--- a/test/aggmode.jl
+++ b/test/aggmode.jl
@@ -0,0 +1,7 @@
+@testset "AggMode" begin
+    @test typeof(LearnBase.AggMode.None()) <: LearnBase.AggregateMode
+    @test typeof(LearnBase.AggMode.Sum()) <: LearnBase.AggregateMode
+    @test typeof(LearnBase.AggMode.Mean()) <: LearnBase.AggregateMode
+    @test typeof(LearnBase.AggMode.WeightedSum([1,2,3])) <: LearnBase.AggregateMode
+    @test typeof(LearnBase.AggMode.WeightedMean([1,2,3])) <: LearnBase.AggregateMode
+end

From def0159bd177afbe71829b7097396afdedda7895 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Wed, 15 Apr 2020 10:04:34 -0300
Subject: [PATCH 14/15] Update tests

---
 test/obsdim.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/obsdim.jl b/test/obsdim.jl
index ea856b7..7be0952 100644
--- a/test/obsdim.jl
+++ b/test/obsdim.jl
@@ -1,3 +1,4 @@
+struct SomeType end
 @testset "ObsDim" begin
     @testset "Type tree" begin
         @test_throws MethodError LearnBase.ObsDim.Constant(2.0)
@@ -46,7 +47,6 @@
         @test convert(LearnBase.ObsDimension, nothing) === LearnBase.ObsDim.Undefined()
     end
 
-    struct SomeType end
     @testset "Default values" begin
         @testset "Arrays, SubArrays, and Sparse Arrays" begin
             @test @inferred(LearnBase.default_obsdim(rand(10))) === LearnBase.ObsDim.Last()

From f8983e4b8cccf32f564e659cab246365eda900c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= <julio.hoffimann@gmail.com>
Date: Wed, 15 Apr 2020 10:25:55 -0300
Subject: [PATCH 15/15] Remove UnarySupervisedLoss

---
 src/costs.jl  | 76 ++++++++++++++++++++++++---------------------------
 test/costs.jl |  7 ++---
 2 files changed, 38 insertions(+), 45 deletions(-)

diff --git a/src/costs.jl b/src/costs.jl
index 10ec735..6b9d76b 100644
--- a/src/costs.jl
+++ b/src/costs.jl
@@ -18,27 +18,18 @@ for the simplification `L(y, ŷ)`.
 """
 abstract type SupervisedLoss <: Loss end
 
-"""
-A supervised loss is considered **unary** if it can be written as a composition
-`L(β(y, ŷ))` for some binary function `β`. In this case the loss can be evaluated
-with a single argument termed the **agreement** `β(y, ŷ)`. Notable
-examples for unary supervised losses are distance-based (`L(y,ŷ) = ψ(ŷ - y)`)
-and margin-based (`L(y,ŷ) = ψ(y⋅ŷ)`) losses.
-"""
-abstract type UnarySupervisedLoss <: SupervisedLoss end
-
 """
 A supervised loss that can be simplified to `L(y, ŷ) = L(ŷ - y)`
 is considered **distance-based**.
 """
-abstract type DistanceLoss <: UnarySupervisedLoss end
+abstract type DistanceLoss <: SupervisedLoss end
 
 """
 A supervised loss with targets `y ∈ {-1, 1}`, and which
 can be simplified to `L(y, ŷ) = L(y⋅ŷ)` is considered
 **margin-based**.
 """
-abstract type MarginLoss <: UnarySupervisedLoss end
+abstract type MarginLoss <: SupervisedLoss end
 
 """
 A loss is considered **unsupervised**, if all the information needed
@@ -352,27 +343,27 @@ deriv2!(buffer::AbstractArray, loss::SupervisedLoss, targets::AbstractArray, out
     MethodError(deriv2!, (buffer, loss, targets, outputs, aggmode, obsdim))
 
 """
-    isconvex(loss::SupervisedLoss) -> Bool
+    isconvex(loss) -> Bool
 
 Return `true` if the given `loss` denotes a convex function.
 A function `f: ℝⁿ → ℝ` is convex if its domain is a convex set
 and if for all `x, y` in that domain, with `θ` such that for
 `0 ≦ θ ≦ 1`, we have `f(θ x + (1 - θ) y) ≦ θ f(x) + (1 - θ) f(y)`.
 """
-isconvex(l) = isstrictlyconvex(l)
+isconvex(loss::SupervisedLoss) = isstrictlyconvex(loss)
 
 """
-    isstrictlyconvex(loss::SupervisedLoss) -> Bool
+    isstrictlyconvex(loss) -> Bool
 
 Return `true` if the given `loss` denotes a strictly convex function.
 A function `f : ℝⁿ → ℝ` is strictly convex if its domain is a convex
 set and if for all `x, y` in that domain where `x ≠ y`, with `θ` such
 that for `0 < θ < 1`, we have `f(θ x + (1 - θ) y) < θ f(x) + (1 - θ) f(y)`.
 """
-isstrictlyconvex(l) = isstronglyconvex(l)
+isstrictlyconvex(loss::SupervisedLoss) = isstronglyconvex(loss)
 
 """
-    isstronglyconvex(loss::SupervisedLoss) -> Bool
+    isstronglyconvex(loss) -> Bool
 
 Return `true` if the given `loss` denotes a strongly convex function.
 A function `f : ℝⁿ → ℝ` is `m`-strongly convex if its domain is a convex
@@ -383,10 +374,10 @@ for `0 ≤ θ ≤ 1`, we have
 In a more familiar setting, if the loss function is differentiable we have
 `(∇f(x) - ∇f(y))ᵀ (x - y) ≥ m | x - y |₂²`
 """
-isstronglyconvex(::SupervisedLoss) = false
+isstronglyconvex(loss::SupervisedLoss) = false
 
 """
-    isdifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool
+    isdifferentiable(loss, [x]) -> Bool
 
 Return `true` if the given `loss` is differentiable
 (optionally limited to the given point `x` if specified).
@@ -400,11 +391,11 @@ it satisfies:
 A function is differentiable if its domain is open and it is
 differentiable at every point `x`.
 """
-isdifferentiable(l::SupervisedLoss) = istwicedifferentiable(l)
-isdifferentiable(l::SupervisedLoss, at) = isdifferentiable(l)
+isdifferentiable(loss::SupervisedLoss) = istwicedifferentiable(loss)
+isdifferentiable(loss::SupervisedLoss, at) = isdifferentiable(loss)
 
 """
-    istwicedifferentiable(loss::SupervisedLoss, [x::Number]) -> Bool
+    istwicedifferentiable(loss, [x]) -> Bool
 
 Return `true` if the given `loss` is differentiable
 (optionally limited to the given point `x` if specified).
@@ -416,11 +407,11 @@ derivative for `∇f` exists at `x`: `∇²f(x) = D∇f(x)`.
 A function is twice differentiable if its domain is open and it
 is twice differentiable at every point `x`.
 """
-istwicedifferentiable(::SupervisedLoss) = false
-istwicedifferentiable(l::SupervisedLoss, at) = istwicedifferentiable(l)
+istwicedifferentiable(loss::SupervisedLoss) = false
+istwicedifferentiable(loss::SupervisedLoss, at) = istwicedifferentiable(loss)
 
 """
-    islocallylipschitzcont(loss::SupervisedLoss) -> Bool
+    islocallylipschitzcont(loss) -> Bool
 
 Return `true` if the given `loss` function is locally-Lipschitz
 continous.
@@ -433,10 +424,11 @@ such that
 
 Every convex function is locally lipschitz continuous.
 """
-islocallylipschitzcont(l) = isconvex(l) || islipschitzcont(l)
+islocallylipschitzcont(loss::SupervisedLoss) =
+    isconvex(loss) || islipschitzcont(loss)
 
 """
-    islipschitzcont(loss::SupervisedLoss) -> Bool
+    islipschitzcont(loss) -> Bool
 
 Return `true` if the given `loss` function is Lipschitz continuous.
 
@@ -444,10 +436,10 @@ A supervised loss function `L : Y × ℝ → [0, ∞)` is Lipschitz continous,
 if there exists a finite constant `M < ∞` such that
 `|L(y, t) - L(y, t′)| ≤ M |t - t′|, ∀ (y, t) ∈ Y × ℝ`
 """
-islipschitzcont(::SupervisedLoss) = false
+islipschitzcont(loss::SupervisedLoss) = false
 
 """
-    isnemitski(loss::SupervisedLoss) -> Bool
+    isnemitski(loss) -> Bool
 
 Return `true` if the given `loss` denotes a Nemitski loss function.
 
@@ -458,10 +450,10 @@ increasing function `h : [0, ∞) → [0, ∞)` such that
 
 If a loss if locally lipsschitz continuous then it is a Nemitski loss.
 """
-isnemitski(l::SupervisedLoss) = islocallylipschitzcont(l)
+isnemitski(loss::SupervisedLoss) = islocallylipschitzcont(loss)
 
 """
-    isclipable(loss::SupervisedLoss) -> Bool
+    isclipable(loss) -> Bool
 
 Return `true` if the given `loss` function is clipable. A
 supervised loss `L : Y × ℝ → [0,∞)` can be clipped at `M > 0`
@@ -470,10 +462,10 @@ if, for all `(y,t) ∈ Y × ℝ`, `L(y, t̂) ≤ L(y, t)` where
 That is
 `t̂ = -M` if `t < -M`, `t̂ = t` if `t ∈ [-M, M]`, and `t = M` if `t > M`.
 """
-isclipable(::SupervisedLoss) = false
+isclipable(loss::SupervisedLoss) = false
 
 """
-    isdistancebased(loss::SupervisedLoss) -> Bool
+    isdistancebased(loss) -> Bool
 
 Return `true` if the given `loss` is a distance-based loss.
 
@@ -481,10 +473,11 @@ A supervised loss function `L : Y × ℝ → [0,∞)` is said to be
 distance-based, if there exists a representing function `ψ : ℝ → [0,∞)`
 satisfying `ψ(0) = 0` and `L(y, ŷ) = ψ (ŷ - y), (y, ŷ) ∈ Y × ℝ`.
 """
-isdistancebased(::SupervisedLoss) = false
+isdistancebased(loss::Loss) = false
+isdistancebased(loss::DistanceLoss) = true
 
 """
-    ismarginbased(loss::SupervisedLoss) -> Bool
+    ismarginbased(loss) -> Bool
 
 Return `true` if the given `loss` is a margin-based loss.
 
@@ -492,15 +485,16 @@ A supervised loss function `L : Y × ℝ → [0,∞)` is said to be
 margin-based, if there exists a representing function `ψ : ℝ → [0,∞)`
 satisfying `L(y, ŷ) = ψ(y⋅ŷ), (y, ŷ) ∈ Y × ℝ`.
 """
-ismarginbased(::SupervisedLoss) = false
+ismarginbased(loss::Loss) = false
+ismarginbased(loss::MarginLoss) = true
 
 """
-    isclasscalibrated(loss::SupervisedLoss) -> Bool
+    isclasscalibrated(loss) -> Bool
 """
-isclasscalibrated(::SupervisedLoss) = false
+isclasscalibrated(loss::SupervisedLoss) = false
 
 """
-    issymmetric(loss::SupervisedLoss) -> Bool
+    issymmetric(loss) -> Bool
 
 Return `true` if the given loss is a symmetric loss.
 
@@ -510,11 +504,11 @@ about origin if we have `f(x) = f(-x), ∀ x ∈ ℝ`.
 A distance-based loss is said to be symmetric if its
 representing function is symmetric.
 """
-issymmetric(::SupervisedLoss) = false
+issymmetric(loss::SupervisedLoss) = false
 
 """
-    isminimizable(loss::SupervisedLoss) -> Bool
+    isminimizable(loss) -> Bool
 
 Return `true` if the given `loss` is a minimizable loss.
 """
-isminimizable(l) = isconvex(l)
+isminimizable(loss::SupervisedLoss) = isconvex(loss)
diff --git a/test/costs.jl b/test/costs.jl
index 02dacf2..8b3261c 100644
--- a/test/costs.jl
+++ b/test/costs.jl
@@ -1,5 +1,5 @@
 # dummy types for testing
-struct MyStronglyConvexType end
+struct MyStronglyConvexType <: LearnBase.SupervisedLoss end
 LearnBase.isstronglyconvex(::MyStronglyConvexType) = true
 LearnBase.islipschitzcont(::MyStronglyConvexType) = true
 
@@ -9,9 +9,8 @@ LearnBase.islipschitzcont(::MyStronglyConvexType) = true
     @test LearnBase.Loss <: LearnBase.Cost
     @test LearnBase.UnsupervisedLoss <: LearnBase.Loss
     @test LearnBase.SupervisedLoss <: LearnBase.Loss
-    @test LearnBase.UnarySupervisedLoss <: LearnBase.SupervisedLoss
-    @test LearnBase.MarginLoss <: LearnBase.UnarySupervisedLoss
-    @test LearnBase.DistanceLoss <: LearnBase.UnarySupervisedLoss
+    @test LearnBase.MarginLoss <: LearnBase.SupervisedLoss
+    @test LearnBase.DistanceLoss <: LearnBase.SupervisedLoss
 
     @test typeof(LearnBase.value) <: Function
     @test typeof(LearnBase.value!) <: Function