diff --git a/Project.toml b/Project.toml index 47b64fe..0fcb92a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJLIBSVMInterface" uuid = "61c7150f-6c77-4bb1-949c-13197eac2a52" authors = ["Anthony D. Blaom "] -version = "0.1.4" +version = "0.2.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" @@ -11,8 +11,8 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] CategoricalArrays = "0.10" -LIBSVM = "0.8.0" -MLJModelInterface = "^0.3.6,^0.4, 1.0" +LIBSVM = "0.8" +MLJModelInterface = "1.4" julia = "1.3" [extras] diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl index fe4c43f..814d78a 100644 --- a/src/MLJLIBSVMInterface.jl +++ b/src/MLJLIBSVMInterface.jl @@ -18,18 +18,10 @@ const PKG = "MLJLIBSVMInterface" # # MODEL TYPES -""" - LinearSVC(; kwargs...) - -Linear support vector machine classifier using LIBLINEAR: https://www.csie.ntu.edu.tw/~cjlin/liblinear/ - -See also SVC, NuSVC -""" mutable struct LinearSVC <: MMI.Deterministic solver::LIBSVM.Linearsolver.LINEARSOLVER tolerance::Float64 cost::Float64 - p::Float64 bias::Float64 end @@ -37,14 +29,12 @@ function LinearSVC( ;solver::LIBSVM.Linearsolver.LINEARSOLVER = LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL ,tolerance::Float64 = Inf ,cost::Float64 = 1.0 - ,p::Float64 = 0.1 ,bias::Float64= -1.0) model = LinearSVC( solver ,tolerance ,cost - ,p ,bias ) @@ -54,19 +44,8 @@ function LinearSVC( return model end -""" - SVC(; kwargs...) - -Kernel support vector machine classifier using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ -If `gamma==-1.0` then `gamma = 1/nfeatures is used in -fitting. -If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is -used in fitting - -See also LinearSVC, NuSVC -""" mutable struct SVC <: MMI.Deterministic - kernel::LIBSVM.Kernel.KERNEL + kernel gamma::Float64 cost::Float64 cachesize::Float64 @@ -78,7 +57,7 @@ mutable struct SVC <: MMI.Deterministic end function SVC( - ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis + ;kernel = LIBSVM.Kernel.RadialBasis ,gamma::Float64 = 0.0 ,cost::Float64 = 1.0 ,cachesize::Float64=200.0 @@ -106,23 +85,10 @@ function SVC( return model end -""" - NuSVC(; kwargs...) - -Kernel support vector machine classifier using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ - -If `gamma==-1.0` then `gamma = 1/nfeatures is used in -fitting. -If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is -used in fitting - -See also LinearSVC, SVC -""" mutable struct NuSVC <: MMI.Deterministic - kernel::LIBSVM.Kernel.KERNEL + kernel gamma::Float64 nu::Float64 - cost::Float64 cachesize::Float64 degree::Int32 coef0::Float64 @@ -131,10 +97,9 @@ mutable struct NuSVC <: MMI.Deterministic end function NuSVC( - ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis + ;kernel = LIBSVM.Kernel.RadialBasis ,gamma::Float64 = 0.0 ,nu::Float64 = 0.5 - ,cost::Float64 = 1.0 ,cachesize::Float64 = 200.0 ,degree::Int32 = Int32(3) ,coef0::Float64 = 0. @@ -145,7 +110,6 @@ function NuSVC( kernel ,gamma ,nu - ,cost ,cachesize ,degree ,coef0 @@ -159,11 +123,10 @@ function NuSVC( return model end -mutable struct OneClassSVM <: MMI.Unsupervised - kernel::LIBSVM.Kernel.KERNEL +mutable struct OneClassSVM <: MMI.UnsupervisedDetector + kernel gamma::Float64 nu::Float64 - cost::Float64 cachesize::Float64 degree::Int32 coef0::Float64 @@ -172,10 +135,9 @@ mutable struct OneClassSVM <: MMI.Unsupervised end function OneClassSVM( - ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis + ;kernel = LIBSVM.Kernel.RadialBasis ,gamma::Float64 = 0.0 ,nu::Float64 = 0.1 - ,cost::Float64 = 1.0 ,cachesize::Float64 = 200.0 ,degree::Int32 = Int32(3) ,coef0::Float64 = 0.0 @@ -186,7 +148,6 @@ function OneClassSVM( kernel ,gamma ,nu - ,cost ,cachesize ,degree ,coef0 @@ -200,20 +161,8 @@ function OneClassSVM( return model end -""" - NuSVR(; kwargs...) - -Kernel support vector machine regressor using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ - -If `gamma==-1.0` then `gamma = 1/nfeatures is used in -fitting. -If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is -used in fitting - -See also EpsilonSVR -""" mutable struct NuSVR <: MMI.Deterministic - kernel::LIBSVM.Kernel.KERNEL + kernel gamma::Float64 nu::Float64 cost::Float64 @@ -225,7 +174,7 @@ mutable struct NuSVR <: MMI.Deterministic end function NuSVR( - ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis + ;kernel = LIBSVM.Kernel.RadialBasis ,gamma::Float64 = 0.0 ,nu::Float64 = 0.5 ,cost::Float64 = 1.0 @@ -253,20 +202,8 @@ function NuSVR( return model end -""" - EpsilonSVR(; kwargs...) - -Kernel support vector machine regressor using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ - -If `gamma==-1.0` then `gamma = 1/nfeatures is used in -fitting. -If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is -used in fitting - -See also NuSVR -""" mutable struct EpsilonSVR <: MMI.Deterministic - kernel::LIBSVM.Kernel.KERNEL + kernel gamma::Float64 epsilon::Float64 cost::Float64 @@ -278,7 +215,7 @@ mutable struct EpsilonSVR <: MMI.Deterministic end function EpsilonSVR( - ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis + ;kernel = LIBSVM.Kernel.RadialBasis ,gamma::Float64 = 0.0 ,epsilon::Float64 = 0.1 ,cost::Float64 = 1.0 @@ -312,17 +249,17 @@ const SVM = Union{LinearSVC, SVC, NuSVC, NuSVR, EpsilonSVR, OneClassSVM} # # CLEAN METHOD -const WARN_PRECOMPUTED_KERNEL = +const ERR_PRECOMPUTED_KERNEL = ArgumentError( "Pre-computed kernels are not supported by installed version of "* - "MLJLIBSVMInterface.jl. Using `LIBSVM.Kernel.RadialBasis` instead. " + "MLJLIBSVMInterface.jl. Alternatively, specify `kernel=k` for some "* + "function or callable `k(v1::AbstractVector, v2::AbstractVector)`. " +) function MMI.clean!(model::SVM) message = "" - if !(model isa LinearSVC) && - model.kernel == LIBSVM.Kernel.Precomputed - message *= WARN_PRECOMPUTED_KERNEL - model.kernel = LIBSVM.Kernel.RadialBasis - end + !(model isa LinearSVC) && + model.kernel == LIBSVM.Kernel.Precomputed && + throw(ERR_PRECOMPUTED_KERNEL) return message end @@ -339,7 +276,11 @@ end """ map_model_type(model::SVM) -Helper function to map the model to the correct LIBSVM model type needed for function dispatch. +Private method. + +Helper function to map the model to the correct LIBSVM model type +needed for function dispatch. + """ function map_model_type(model::SVM) if isa(model, LinearSVC) @@ -383,10 +324,12 @@ function categorical_value(x, v) return pool[get(pool, x)] end -# to ensure the keys of user-provided weights are `CategoricalValue`s: -fix_keys(weights::Dict{<:CategoricalArrays.CategoricalValue}, y) = weights +# to ensure the keys of user-provided weights are `CategoricalValue`s, +# and the values are floats: +fix_keys(weights::Dict{<:CategoricalArrays.CategoricalValue}, y) = + Dict(k => float(weights[k]) for k in keys(weights)) fix_keys(weights, y) = - Dict(categorical_value(x, y) => weights[x] for x in keys(weights)) + Dict(categorical_value(x, y) => float(weights[x]) for x in keys(weights)) """ encode(weights::Dict, y) @@ -409,6 +352,35 @@ function encode(weights::Dict, y) return Dict(MMI.int(cv) => _weights[cv] for cv in cvs) end +function get_encoding(decoder) + refs = MMI.int.(decoder.classes) + return Dict(i => decoder(i) for i in refs) +end + + +""" + orientation(scores) + +Private method. + +Return `1` if the majority of elements of `scores` are less than the +midpoint between the minimum and maximum values. For outlier detection +scores, the implication in that case is that scores increase with +increasing likelihood of outlierness. + +Otherwise return `-1` (scores decrease with increasing likelihood of +outlierness). + +""" +function orientation(scores) + middle = (maximum(scores) + minimum(scores))/2 + if quantile(scores, 0.5) < middle + return 1 + else + return -1 + end +end + # # FIT METHOD @@ -426,17 +398,20 @@ function MMI.fit(model::LinearSVC, verbosity::Int, X, y, weights=nothing) result = LIBSVM.LIBLINEAR.linear_train(y_plain, Xmatrix, weights = _weights, solver_type = Int32(model.solver), - C = model.cost, p = model.p, bias = model.bias, + C = model.cost, bias = model.bias, eps = model.tolerance, verbose = ifelse(verbosity > 1, true, false) ) fitresult = (result, decode) cache = nothing - report = nothing + report = NamedTuple() return fitresult, cache, report end +MMI.fitted_params(::LinearSVC, fitresult) = + (libsvm_model=fitresult[1], encoding=get_encoding(fitresult[2])) + function MMI.fit(model::Union{SVC, NuSVC}, verbosity::Int, X, y, weights=nothing) Xmatrix = MMI.matrix(X)' # notice the transpose @@ -465,6 +440,10 @@ function MMI.fit(model::Union{SVC, NuSVC}, verbosity::Int, X, y, weights=nothing return fitresult, cache, report end +MMI.fitted_params(::Union{SVC, NuSVC}, fitresult) = + (libsvm_model=fitresult[1], encoding=get_encoding(fitresult[2])) + + function MMI.fit(model::Union{NuSVR, EpsilonSVR}, verbosity::Int, X, y) Xmatrix = MMI.matrix(X)' # notice the transpose @@ -484,6 +463,9 @@ function MMI.fit(model::Union{NuSVR, EpsilonSVR}, verbosity::Int, X, y) return fitresult, cache, report end +MMI.fitted_params(::Union{NuSVR, EpsilonSVR}, fitresult) = + (libsvm_model=fitresult,) + function MMI.fit(model::OneClassSVM, verbosity::Int, X) Xmatrix = MMI.matrix(X)' # notice the transpose @@ -493,26 +475,38 @@ function MMI.fit(model::OneClassSVM, verbosity::Int, X) model = deepcopy(model) model.gamma == -1.0 && (model.gamma = 1.0/size(Xmatrix, 1)) model.gamma == 0.0 && (model.gamma = 1.0/(var(Xmatrix) * size(Xmatrix, 1)) ) - fitresult = LIBSVM.svmtrain(Xmatrix; - get_svm_parameters(model)..., - verbose = ifelse(verbosity > 1, true, false) - ) + libsvm_model = LIBSVM.svmtrain(Xmatrix; + get_svm_parameters(model)..., + verbose = ifelse(verbosity > 1, true, false) + ) - report = (gamma=model.gamma,) + # get orientation and training scores: + _, decision_matrix = LIBSVM.svmpredict(libsvm_model, Xmatrix) + decision_scores = view(decision_matrix, 1, :) + orientation = MLJLIBSVMInterface.orientation(decision_scores) + scores = orientation*decision_scores + + fitresult = (libsvm_model, orientation) + report = (gamma=model.gamma, scores=scores) return fitresult, cache, report end +MMI.fitted_params(::OneClassSVM, fitresult) = + (libsvm_model=fitresult[1], orientation=fitresult[2]) + + +# # PREDICT AND TRANSFORM function MMI.predict(model::LinearSVC, fitresult, Xnew) result, decode = fitresult - (p,d) = LIBSVM.LIBLINEAR.linear_predict(result, MMI.matrix(Xnew)') + p, _ = LIBSVM.LIBLINEAR.linear_predict(result, MMI.matrix(Xnew)') return decode(p) end function MMI.predict(model::Union{SVC, NuSVC}, fitresult, Xnew) result, decode = fitresult - (p,d) = LIBSVM.svmpredict(result, MMI.matrix(Xnew)') + p, _ = LIBSVM.svmpredict(result, MMI.matrix(Xnew)') return decode(p) end @@ -522,10 +516,13 @@ function MMI.predict(model::Union{NuSVR, EpsilonSVR}, fitresult, Xnew) end function MMI.transform(model::OneClassSVM, fitresult, Xnew) - (p,d) = LIBSVM.svmpredict(fitresult, MMI.matrix(Xnew)') - return MMI.categorical(p) + libsvm_model, orientation = fitresult + _, decision_matrix = LIBSVM.svmpredict(libsvm_model, MMI.matrix(Xnew)') + decision_scores = view(decision_matrix, 1, :) + return orientation*decision_scores end + # metadata MMI.load_path(::Type{<:LinearSVC}) = "$PKG.LinearSVC" MMI.load_path(::Type{<:SVC}) = "$PKG.SVC" @@ -537,6 +534,13 @@ MMI.load_path(::Type{<:OneClassSVM}) = "$PKG.OneClassSVM" MMI.supports_class_weights(::Type{<:LinearSVC}) = true MMI.supports_class_weights(::Type{<:SVC}) = true +MMI.human_name(::Type{<:LinearSVC}) = "linear support vector classifier" +MMI.human_name(::Type{<:SVC}) = "C-support vector classifier" +MMI.human_name(::Type{<:NuSVC}) = "ν-support vector classifier" +MMI.human_name(::Type{<:NuSVR}) = "ν-support vector regressor" +MMI.human_name(::Type{<:EpsilonSVR}) = "ϵ-support vector regressor" +MMI.human_name(::Type{<:OneClassSVM}) = "$one-class support vector machine" + MMI.package_name(::Type{<:SVM}) = "LIBSVM" MMI.package_uuid(::Type{<:SVM}) = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" MMI.is_pure_julia(::Type{<:SVM}) = false @@ -549,4 +553,879 @@ MMI.target_scitype(::Type{<:Union{NuSVR, EpsilonSVR}}) = MMI.output_scitype(::Type{<:OneClassSVM}) = AbstractVector{<:Finite{2}} # Bool (true means inlier) + +# # DOCUMENT STRINGS + +const DOC_REFERENCE = "C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for "* + "support vector machines.\" *ACM Transactions on Intelligent Systems and "* + "Technology*, 2(3):27:1–27:27. Updated at "* + "[https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf]"* + "(https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf)" + +const DOC_ALGORITHM = "Reference for algorithm and core C-library: $DOC_REFERENCE. " + +const DOC_REFERENCE2 = "Rong-En Fan et al (2008): \"LIBLINEAR: A Library for "* + "Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. "* + "Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf]"* + "(https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf)" + +const DOC_ALGORITHM_LINEAR = "Reference for algorithm and core C-library: "* + "$DOC_REFERENCE2. " + +const DOC_SERIALIZABILITY = "Serialization of "* + "models with user-defined kernels comes with some restrictions. "* + "See [LIVSVM.jl issue"* + "91](https://github.com/JuliaML/LIBSVM.jl/issues/91)" + +const DOC_KERNEL = """ +- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be + called, as in `kernel(x1, x2)`, or one of the built-in kernels from + the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors + whose lengths match the number of columns of the training data `X` (see + "Examples" below). + + - `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2` + + - `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree` + + - `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))` + + - `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)` + + Here `gamma`, `coef0`, `degree` are other hyper-parameters. $DOC_SERIALIZABILITY + +- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then + `gamma = 1/nfeatures` is used in training, where `nfeatures` is the + number of features (columns of `X`). If `gamma==0.0` then `gamma = + 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used + appears in the report (see below). + +- `coef0 = 0.0`: kernel parameter (see above) + +- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above) + +""" + + +# ## LinearSVC + +""" +$(MMI.doc_header(LinearSVC)) + +$DOC_ALGORITHM_LINEAR + +This model type is similar to `SVC` from the same package with the setting +`kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear +case. + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with one of: + + mach = machine(model, X, y) + mach = machine(model, X, y, w) + +where + +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have `Continuous` element scitype; check column scitypes with + `schema(X)` + +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype + with `scitype(y)` + +- `w`: a dictionary of class weights, keyed on `levels(y)`. + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +- `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver, + which must be one of the following from the LIBSVM.jl package: + + - `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal)) + + - `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized + L2-loss support vector classification (dual) + + - `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss + support vector classification (primal) + + - `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized + L1-loss support vector classification (dual) + + - `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by + Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`: + L1-regularized L2-loss support vector classification) + + - `LIBSVM.Linearsolver.L1R_LR`: L1-regularized logistic regression + + - `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual) + +- `tolerance::Float64=Inf`: tolerance for the stopping criterion; + +- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the + cited reference; for greater regularization, decrease `cost` + +- `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if + `bias < 0`, no bias term added (default -1) + + +# Operations + +- `predict(mach, Xnew)`: return predictions of the target given + features `Xnew` having the same scitype as `X` above. + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `libsvm_model`: the trained model object created by the LIBSVM.jl package + +- `encoding`: class encoding used internally by `libsvm_model` - a + dictionary of class labels keyed on the internal integer representation + + +# Examples + +``` +using MLJ +import LIBSVM + +LinearSVC = @load LinearSVC pkg=LIBSVM # model type +model = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance + +X, y = @load_iris # table, vector +mach = machine(model, X, y) |> fit! + +Xnew = (sepal_length = [6.4, 7.2, 7.4], + sepal_width = [2.8, 3.0, 2.8], + petal_length = [5.6, 5.8, 6.1], + petal_width = [2.1, 1.6, 1.9],) + +julia> yhat = predict(mach, Xnew) +3-element CategoricalArrays.CategoricalArray{String,1,UInt32}: + "virginica" + "versicolor" + "virginica" +``` + +## Incorporating class weights + +```julia +weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1) +mach = machine(model, X, y, weights) |> fit! + +julia> yhat = predict(mach, Xnew) +3-element CategoricalArrays.CategoricalArray{String,1,UInt32}: + "versicolor" + "versicolor" + "versicolor" +``` + + +See also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and +[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C +implementation +[documentation](https://github.com/cjlin1/liblinear/blob/master/README). + +""" +LinearSVC + + +# ## SVC + +""" +$(MMI.doc_header(SVC)) + +$DOC_ALGORITHM + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with one of: + + mach = machine(model, X, y) + mach = machine(model, X, y, w) + +where + +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have `Continuous` element scitype; check column scitypes with + `schema(X)` + +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype + with `scitype(y)` + +- `w`: a dictionary of class weights, keyed on `levels(y)`. + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +$DOC_KERNEL + +- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the + cited reference; for greater regularization, decrease `cost` + +- `cachesize=200.0` cache memory size in MB + +- `tolerance=0.001`: tolerance for the stopping criterion + +- `shrinking=true`: whether to use shrinking heuristics + +- `probability=false`: whether to base classification on calibrated + probabilities (expensive) or to use the raw decision function + (recommended). Note that in either case `predict` returns point + predictions and not probabilities, so that this option has little + utility in the current re-implementation. + + +# Operations + +- `predict(mach, Xnew)`: return predictions of the target given + features `Xnew` having the same scitype as `X` above. + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `libsvm_model`: the trained model object created by the LIBSVM.jl package + +- `encoding`: class encoding used internally by `libsvm_model` - a + dictionary of class labels keyed on the internal integer representation + + +# Report + +The fields of `report(mach)` are: + +- `gamma`: actual value of the kernel parameter `gamma` used in training + + +# Examples + +## Using a built-in kernel + +``` +using MLJ +import LIBSVM + +SVC = @load SVC pkg=LIBSVM # model type +model = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance + +X, y = @load_iris # table, vector +mach = machine(model, X, y) |> fit! + +Xnew = (sepal_length = [6.4, 7.2, 7.4], + sepal_width = [2.8, 3.0, 2.8], + petal_length = [5.6, 5.8, 6.1], + petal_width = [2.1, 1.6, 1.9],) + +julia> yhat = predict(mach, Xnew) +3-element CategoricalArrays.CategoricalArray{String,1,UInt32}: + "virginica" + "virginica" + "virginica" +``` + +## User-defined kernels + +``` +k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear` +model = SVC(kernel=k) +mach = machine(model, X, y) |> fit! + +julia> yhat = predict(mach, Xnew) +3-element CategoricalArrays.CategoricalArray{String,1,UInt32}: + "virginica" + "virginica" + "virginica" +``` + +## Incorporating class weights + +In either scenario above, we can do: + +```julia +weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1) +mach = machine(model, X, y, weights) |> fit! + +julia> yhat = predict(mach, Xnew) +3-element CategoricalArrays.CategoricalArray{String,1,UInt32}: + "versicolor" + "versicolor" + "versicolor" +``` + +See also the classifiers [`NuSVC`](@ref) and [`LinearSVC`](@ref), and +[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C +implementation +[documentation](https://github.com/cjlin1/libsvm/blob/master/README). + +""" +SVC + + +# ## NuSVC + +""" +$(MMI.doc_header(NuSVC)) + +$DOC_ALGORITHM + +This model is a re-parameterization of the [`SVC`](@ref) classifier, +where `nu` replaces `cost`, and is mathematically equivalent to +it. The parameter `nu` allows more direct control over the number of +support vectors (see under "Hyper-parameters"). + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with: + + mach = machine(model, X, y) + +where + +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have `Continuous` element scitype; check column scitypes with + `schema(X)` + +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype + with `scitype(y)` + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +$DOC_KERNEL + +- `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin + errors and a lower bound of the fraction of support vectors. Denoted + `ν` in the cited paper. Changing `nu` changes the thickness of the + margin (a neighborhood of the decision surface) and a margin error + is said to have occurred if a training observation lies on the wrong + side of the surface or within the margin. + +- `cachesize=200.0` cache memory size in MB + +- `tolerance=0.001`: tolerance for the stopping criterion + +- `shrinking=true`: whether to use shrinking heuristics + + +# Operations + +- `predict(mach, Xnew)`: return predictions of the target given + features `Xnew` having the same scitype as `X` above. + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `libsvm_model`: the trained model object created by the LIBSVM.jl package + +- `encoding`: class encoding used internally by `libsvm_model` - a + dictionary of class labels keyed on the internal integer representation + + +# Report + +The fields of `report(mach)` are: + +- `gamma`: actual value of the kernel parameter `gamma` used in training + + +# Examples + +## Using a built-in kernel + +``` +using MLJ +import LIBSVM + +NuSVC = @load NuSVC pkg=LIBSVM # model type +model = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance + +X, y = @load_iris # table, vector +mach = machine(model, X, y) |> fit! + +Xnew = (sepal_length = [6.4, 7.2, 7.4], + sepal_width = [2.8, 3.0, 2.8], + petal_length = [5.6, 5.8, 6.1], + petal_width = [2.1, 1.6, 1.9],) + +julia> yhat = predict(mach, Xnew) +3-element CategoricalArrays.CategoricalArray{String,1,UInt32}: + "virginica" + "virginica" + "virginica" +``` + +## User-defined kernels + +``` +k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear` +model = NuSVC(kernel=k) +mach = machine(model, X, y) |> fit! + +julia> yhat = predict(mach, Xnew) +3-element CategoricalArrays.CategoricalArray{String,1,UInt32}: + "virginica" + "virginica" + "virginica" +``` + +See also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref), +[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C +implementation. +[documentation](https://github.com/cjlin1/libsvm/blob/master/README). + + +""" +NuSVC + + +# ## EpsilonSVR + +""" +$(MMI.doc_header(EpsilonSVR)) + +$DOC_ALGORITHM + +This model is an adaptation of the classifier `SVC` to regression, but +has an additional parameter `epsilon` (denoted ``ϵ`` in the cited +reference). + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with: + + mach = machine(model, X, y) + +where + +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have `Continuous` element scitype; check column scitypes with + `schema(X)` + +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `Continuous`; check the scitype with `scitype(y)` + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +$DOC_KERNEL + +- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the + cited reference; for greater regularization, decrease `cost` + +- `epsilon=0.1` (range (0, `Inf`)): the parameter denoted ``ϵ`` in the + cited reference; `epsilon` is the thickness of the penalty-free + neighborhood of the graph of the prediction function ("slab" + or "tube"). Specifically, a data point `(x, y)` incurs no training + loss unless it is outside this neighborhood; the further away it is + from the this neighborhood, the greater the loss penalty. + +- `cachesize=200.0` cache memory size in MB + +- `tolerance=0.001`: tolerance for the stopping criterion + +- `shrinking=true`: whether to use shrinking heuristics + + +# Operations + +- `predict(mach, Xnew)`: return predictions of the target given + features `Xnew` having the same scitype as `X` above. + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `libsvm_model`: the trained model object created by the LIBSVM.jl package + + +# Report + +The fields of `report(mach)` are: + +- `gamma`: actual value of the kernel parameter `gamma` used in training + + +# Examples + +## Using a built-in kernel + +``` +using MLJ +import LIBSVM + +EpsilonSVR = @load EpsilonSVR pkg=LIBSVM # model type +model = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance + +X, y = make_regression(rng=123) # table, vector +mach = machine(model, X, y) |> fit! + +Xnew, _ = make_regression(3, rng=123) + +julia> yhat = predict(mach, Xnew) +3-element Vector{Float64}: + 0.2512132502584155 + 0.007340201523624579 + -0.2482949812264707 +``` + +## User-defined kernels + +``` +k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear` +model = EpsilonSVR(kernel=k) +mach = machine(model, X, y) |> fit! + +julia> yhat = predict(mach, Xnew) +3-element Vector{Float64}: + 1.1121225361666656 + 0.04667702229741916 + -0.6958148424680672 +``` + +See also [`NuSVR`](@ref), +[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C +implementation +[documentation](https://github.com/cjlin1/libsvm/blob/master/README). + +""" +EpsilonSVR + + +# ## NuSVR + +""" +$(MMI.doc_header(NuSVR)) + +$DOC_ALGORITHM + +This model is a re-parameterization of `EpsilonSVR` in which the +`epsilon` hyper-parameter is replaced with a new parameter `nu` +(denoted ``ν`` in the cited reference) which attempts to control the +number of support vectors directly. + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with: + + mach = machine(model, X, y) + +where + +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have `Continuous` element scitype; check column scitypes with + `schema(X)` + +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `Continuous`; check the scitype with `scitype(y)` + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +- $DOC_KERNEL + +- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the + cited reference; for greater regularization, decrease `cost` + +- `nu=0.5` (range (0, 1]): An upper bound on the fraction of training + errors and a lower bound of the fraction of support vectors. + Denoted ``ν`` in the cited paper. Changing `nu` changes the + thickness of some neighborhood of the graph of the prediction + function ("tube" or "slab") and a training error is said to occur + when a data point `(x, y)` lies outside of that neighborhood. + +- `cachesize=200.0` cache memory size in MB + +- `tolerance=0.001`: tolerance for the stopping criterion + +- `shrinking=true`: whether to use shrinking heuristics + + +# Operations + +- `predict(mach, Xnew)`: return predictions of the target given + features `Xnew` having the same scitype as `X` above. + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `libsvm_model`: the trained model object created by the LIBSVM.jl package + + +# Report + +The fields of `report(mach)` are: + +- `gamma`: actual value of the kernel parameter `gamma` used in training + + +# Examples + +## Using a built-in kernel + +``` +using MLJ +import LIBSVM + +NuSVR = @load NuSVR pkg=LIBSVM # model type +model = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance + +X, y = make_regression(rng=123) # table, vector +mach = machine(model, X, y) |> fit! + +Xnew, _ = make_regression(3, rng=123) + +julia> yhat = predict(mach, Xnew) +3-element Vector{Float64}: + 0.2008156459920009 + 0.1131520519131709 + -0.2076156254934889 +``` + +## User-defined kernels + +``` +k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear` +model = NuSVR(kernel=k) +mach = machine(model, X, y) |> fit! + +julia> yhat = predict(mach, Xnew) +3-element Vector{Float64}: + 1.1211558175964662 + 0.06677125944808422 + -0.6817578942749346 +``` + +See also [`EpsilonSVR`](@ref), +[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C +implementation +[documentation](https://github.com/cjlin1/libsvm/blob/master/README). + +""" +NuSVR + + +# ## OneClassSVM + +""" +$(MMI.doc_header(OneClassSVM)) + +$DOC_ALGORITHM + +This model is an outlier detection model delivering raw scores based +on the decision function of a support vector machine. Like the +[`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the +`cost` parameter appearing in standard support vector classification +[`SVC`](@ref). + +To extract +normalized scores ("probabilities") wrap the model using +`ProbabilisticDetector` from +[OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For +threshold-based classification, wrap the probabilistic model using +MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below. + + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with: + + mach = machine(model, X, y) + +where + +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have `Continuous` element scitype; check column scitypes with + `schema(X)` + +Train the machine using `fit!(mach, rows=...)`. + + +# Hyper-parameters + +$DOC_KERNEL + +- `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin + errors and a lower bound of the fraction of support vectors. Denoted + `ν` in the cited paper. Changing `nu` changes the thickness of the + margin (a neighborhood of the decision surface) and a margin error + is said to have occurred if a training observation lies on the wrong + side of the surface or within the margin. + +- `cachesize=200.0` cache memory size in MB + +- `tolerance=0.001`: tolerance for the stopping criterion + +- `shrinking=true`: whether to use shrinking heuristics + + +# Operations + +- `transform(mach, Xnew)`: return scores for outlierness, given + features `Xnew` having the same scitype as `X` above. The greater + the score, the more likely it is an outlier. This score is based on + the SVM decision function. For normalized scores, wrap `model` using + `ProbabilisticDetector` from OutlierDetection.jl and call `predict` + instead, and for threshold-based classification, wrap again using + `BinaryThresholdPredictor`. See the examples below. + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `libsvm_model`: the trained model object created by the LIBSVM.jl package + +- `orientation`: this equals `1` if the decision function for + `libsvm_model` is increasing with increasing outlierness, and `-1` + if it is decreasing instead. Correspondingly, the `libsvm_model` + attaches `true` to outliers in the first case, and `false` in the + second. (The `scores` given in the MLJ report and generated by + `MLJ.transform` already correct for this ambiguity, which is + therefore only an issue for users directly accessing `libsvm_model`.) + + +# Report + +The fields of `report(mach)` are: + +- `gamma`: actual value of the kernel parameter `gamma` used in training + + +# Examples + +## Generating raw scores for outlierness + + +``` +using MLJ +import LIBSVM +import StableRNGs.StableRNG + +OneClassSVM = @load OneClassSVM pkg=LIBSVM # model type +model = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance + +rng = StableRNG(123) +Xmatrix = randn(rng, 5, 3) +Xmatrix[1, 1] = 100.0 +X = MLJ.table(Xmatrix) + +mach = machine(model, X) |> fit! + +# training scores (outliers have larger scores): +julia> report(mach).scores +5-element Vector{Float64}: + 6.711689156091755e-7 + -6.740101976655081e-7 + -6.711632439648446e-7 + -6.743015858874887e-7 + -6.745393717880104e-7 + +# scores for new data: +Xnew = MLJ.table(rand(rng, 2, 3)) + +julia> transform(mach, rand(rng, 2, 3)) +2-element Vector{Float64}: + -6.746293022511047e-7 + -6.744289265348623e-7 +``` + +## Generating probabilistic predictions of outlierness + +Continuing the previous example: + +``` +using OutlierDetection +pmodel = ProbabilisticDetector(model) +pmach = machine(pmodel, X) |> fit! + +# probabilistic predictions on new data: + +julia> y_prob = predict(pmach, Xnew) +2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}: + UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5) + UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0) + +# probabilities for outlierness: + +julia> pdf.(y_prob, "outlier") +2-element Vector{Float64}: + 9.572583265925801e-5 + 0.0 + +# raw scores are still available using `transform`: + +julia> transform(pmach, Xnew) +2-element Vector{Float64}: + 9.572583265925801e-5 + 0.0 +``` + + +## Outlier classification using a probability threshold: + +Continuing the previous example: + +``` +dmodel = BinaryThresholdPredictor(pmodel, threshold=0.9) +dmach = machine(dmodel, X) |> fit! + +julia> yhat = predict(dmach, Xnew) +2-element CategoricalArrays.CategoricalArray{String,1,UInt8}: + "normal" + "normal" +``` + +## User-defined kernels + +Continuing the first example: + +``` +k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear` +model = OneClassSVM(kernel=k) +mach = machine(model, X) |> fit! + +julia> yhat = transform(mach, Xnew) +2-element Vector{Float64}: + -0.4825363352732942 + -0.4848772169720227 +``` + +See also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the +original C implementation +[documentation](https://github.com/cjlin1/libsvm/blob/master/README). For +an alternative source of outlier detection models with an MLJ +interface, see +[OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/). + +""" +OneClassSVM + end # module diff --git a/test/runtests.jl b/test/runtests.jl index 23fcd2b..85f037a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,6 +30,14 @@ import LIBSVM MLJLIBSVMInterface.encode(Dict('d'=> 1.0), v)) end +@testset "orientation of scores" begin + scores = [1, 1, 1, 1, 0] + @test MLJLIBSVMInterface.orientation(scores) == -1 + @test MLJLIBSVMInterface.orientation(-scores) == 1 + @test MLJLIBSVMInterface.orientation(scores .+ 100) == -1 + @test MLJLIBSVMInterface.orientation(-scores .+ 100) == 1 +end + ## CLASSIFIERS @@ -56,6 +64,15 @@ lcpred = MLJBase.predict(linear_classifier, fitresultCL, selectrows(X, test)); @test Set(classes(nucpred[1])) == Set(classes(y[1])) @test Set(classes(lcpred[1])) == Set(classes(y[1])) +fpC = MLJBase.fitted_params(plain_classifier, fitresultC) +fpCnu = MLJBase.fitted_params(nu_classifier, fitresultCnu) +fpCL = MLJBase.fitted_params(linear_classifier, fitresultCL) + +for fp in [fpC, fpCnu, fpCL] + @test keys(fp) == (:libsvm_model, :encoding) + @test fp.encoding[int(MLJBase.classes(y)[1])] == classes(y)[1] +end + rng = StableRNGs.StableRNG(123) # test with linear data: @@ -93,6 +110,13 @@ fitresultR, cacheR, reportR = MLJBase.fit(plain_regressor, 1, fitresultRnu, cacheRnu, reportRnu = MLJBase.fit(nu_regressor, 1, selectrows(X, train), y[train]); +fpR = MLJBase.fitted_params(plain_regressor, fitresultR) +fpRnu = MLJBase.fitted_params(nu_regressor, fitresultRnu) + +for fp in [fpR, fpRnu] + @test fp.libsvm_model isa LIBSVM.SVM +end + rpred = MLJBase.predict(plain_regressor, fitresultR, selectrows(X, test)); nurpred = MLJBase.predict(nu_regressor, fitresultRnu, selectrows(X, test)); @@ -102,24 +126,68 @@ nurpred = MLJBase.predict(nu_regressor, fitresultRnu, selectrows(X, test)); ## ANOMALY DETECTION +N = 50 +rng = StableRNGs.StableRNG(123) +Xmatrix = randn(rng, 2N, 3) + +# insert outliers at observation 1 and N: +Xmatrix[1, 1] = 100.0 +Xmatrix[N, 3] = 200.0 + +X = MLJBase.table(Xmatrix) + oneclasssvm = OneClassSVM() -fitresultoc, cacheoc, reportoc = MLJBase.fit(oneclasssvm, 1, - selectrows(X, train)); -# output is CategoricalArray{Bool} -ocpred = MLJBase.transform(oneclasssvm, - fitresultoc, - selectrows(X, test)); +fitresultoc, cacheoc, reportoc = MLJBase.fit(oneclasssvm, 1, X) + +fp = MLJBase.fitted_params(oneclasssvm, fitresultoc) +@test fp.libsvm_model isa LIBSVM.SVM + +training_scores = reportoc.scores +scores = MLJBase.transform(oneclasssvm, fitresultoc, X) + +@test scores == training_scores + +# crude extraction of outliers from scores: +midpoint = mean([minimum(scores), maximum(scores)]) +outlier_indices = filter(eachindex(scores)) do i + scores[i] .> midpoint +end + +@test outlier_indices == [1, N] + + +## CONSTRUCTOR FAILS + +@test_throws(MLJLIBSVMInterface.ERR_PRECOMPUTED_KERNEL, + SVC(kernel=LIBSVM.Kernel.Precomputed)) + + +## CALLABLE KERNEL + +X, y = make_blobs() + +kernel(x1, x2) = x1' * x2 + +model = SVC(kernel=kernel) +model₂ = SVC(kernel=LIBSVM.Kernel.Linear) + +fitresult, cache, report = MLJBase.fit(model, 0, X, y); +fitresult₂, cache₂, report₂ = MLJBase.fit(model₂, 0, X, y); + +@test fitresult[1].rho ≈ fitresult₂[1].rho +@test fitresult[1].coefs ≈ fitresult₂[1].coefs +@test fitresult[1].SVs.indices ≈ fitresult₂[1].SVs.indices + +yhat = MLJBase.predict(model, fitresult, X); +yhat₂ = MLJBase.predict(model₂, fitresult₂, X); -# test whether the proprotion of outliers corresponds to the `nu` parameter -@test isapprox((length(train) - sum(MLJBase.transform(oneclasssvm, fitresultoc, selectrows(X, train)) .== true)) / length(train), oneclasssvm.nu, atol=0.005) -@test isapprox((length(test) - sum(ocpred .== true)) / length(test), oneclasssvm.nu, atol=0.05) +@test yhat == yhat₂ -## PRECOMPUTED KERNELS +@test accuracy(yhat, y) > 0.75 -model = @test_logs((:warn, MLJLIBSVMInterface.WARN_PRECOMPUTED_KERNEL), +model = @test_throws(MLJLIBSVMInterface.ERR_PRECOMPUTED_KERNEL, SVC(kernel=LIBSVM.Kernel.Precomputed)) -@test model.kernel == LIBSVM.Kernel.RadialBasis ## WEIGHTS @@ -141,24 +209,24 @@ for model in [SVC(), LinearSVC()] # without weights: Θ, _, _ = MLJBase.fit(model, 0, Xtrain, ytrain) - yhat = predict(model, Θ, X); - @test levels(yhat) == levels(y) # the `2` class persists as a level + ŷ = predict(model, Θ, X); + @test levels(ŷ) == levels(y) # the `2` class persists as a level # with uniform weights: Θ_uniform, _, _ = MLJBase.fit(model, 0, Xtrain, ytrain, weights_uniform) - yhat_uniform = predict(model, Θ_uniform, X); - @test levels(yhat_uniform) == levels(y) + ŷ_uniform = predict(model, Θ_uniform, X); + @test levels(ŷ_uniform) == levels(y) # with weights favouring class `3`: Θ_favouring_3, _, _ = MLJBase.fit(model, 0, Xtrain, ytrain, weights_favouring_3) - yhat_favouring_3 = predict(model, Θ_favouring_3, X); - @test levels(yhat_favouring_3) == levels(y) + ŷ_favouring_3 = predict(model, Θ_favouring_3, X); + @test levels(ŷ_favouring_3) == levels(y) # comparisons: if !(model isa LinearSVC) # linear solver is not deterministic - @test yhat_uniform == yhat + @test ŷ_uniform == ŷ end - d = sum(yhat_favouring_3 .== 3) - sum(yhat .== 3) + d = sum(ŷ_favouring_3 .== 3) - sum(ŷ .== 3) if d <= 0 @show model @show d