From 38ea1fb04e2fcc55d2954158d19bd940f2766d6f Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 17 Mar 2022 14:15:44 +1300
Subject: [PATCH 01/18] bump [compat] LIBSVM = "0.8.0"

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 577d4b9..b87f0ee 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,7 +9,7 @@ MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
-LIBSVM = "0.6.0"
+LIBSVM = "0.8.0"
 MLJModelInterface = "^0.3.6,^0.4, 1.0"
 julia = "1.3"
 

From 58536913b01da88e3ef4f61af2fabc432bc4d3e6 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 17 Mar 2022 14:16:18 +1300
Subject: [PATCH 02/18] allow user to specify *callable* kernel

---
 src/MLJLIBSVMInterface.jl | 34 +++++++++++++++++-----------------
 test/runtests.jl          | 31 +++++++++++++++++++++++++++----
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index 61bb957..ef2ee47 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -68,7 +68,7 @@ used in fitting
 See also LinearSVC, NuSVC
 """
 mutable struct SVC <: MMI.Deterministic
-    kernel::LIBSVM.Kernel.KERNEL
+    kernel
     gamma::Float64
     weights::Union{Dict, Nothing}
     cost::Float64
@@ -81,7 +81,7 @@ mutable struct SVC <: MMI.Deterministic
 end
 
 function SVC(
-    ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis
+    ;kernel = LIBSVM.Kernel.RadialBasis
     ,gamma::Float64 = 0.0
     ,weights::Union{Dict, Nothing} = nothing
     ,cost::Float64 = 1.0
@@ -124,7 +124,7 @@ used in fitting
 See also LinearSVC, SVC
 """
 mutable struct NuSVC <: MMI.Deterministic
-    kernel::LIBSVM.Kernel.KERNEL
+    kernel
     gamma::Float64
     weights::Union{Dict, Nothing}
     nu::Float64
@@ -137,7 +137,7 @@ mutable struct NuSVC <: MMI.Deterministic
 end
 
 function NuSVC(
-    ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis
+    ;kernel = LIBSVM.Kernel.RadialBasis
     ,gamma::Float64 = 0.0
     ,weights::Union{Dict, Nothing} = nothing
     ,nu::Float64 = 0.5
@@ -168,7 +168,7 @@ function NuSVC(
 end
 
 mutable struct OneClassSVM <: MMI.Unsupervised
-    kernel::LIBSVM.Kernel.KERNEL
+    kernel
     gamma::Float64
     nu::Float64
     cost::Float64
@@ -180,7 +180,7 @@ mutable struct OneClassSVM <: MMI.Unsupervised
 end
 
 function OneClassSVM(
-    ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis
+    ;kernel = LIBSVM.Kernel.RadialBasis
     ,gamma::Float64 = 0.0
     ,nu::Float64 = 0.1
     ,cost::Float64 = 1.0
@@ -221,7 +221,7 @@ used in fitting
 See also EpsilonSVR
 """
 mutable struct NuSVR <: MMI.Deterministic
-    kernel::LIBSVM.Kernel.KERNEL
+    kernel
     gamma::Float64
     nu::Float64
     cost::Float64
@@ -233,7 +233,7 @@ mutable struct NuSVR <: MMI.Deterministic
 end
 
 function NuSVR(
-    ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis
+    ;kernel = LIBSVM.Kernel.RadialBasis
     ,gamma::Float64 = 0.0
     ,nu::Float64 = 0.5
     ,cost::Float64 = 1.0
@@ -274,7 +274,7 @@ used in fitting
 See also NuSVR
 """
 mutable struct EpsilonSVR <: MMI.Deterministic
-    kernel::LIBSVM.Kernel.KERNEL
+    kernel
     gamma::Float64
     epsilon::Float64
     cost::Float64
@@ -286,7 +286,7 @@ mutable struct EpsilonSVR <: MMI.Deterministic
 end
 
 function EpsilonSVR(
-    ;kernel::LIBSVM.Kernel.KERNEL = LIBSVM.Kernel.RadialBasis
+    ;kernel = LIBSVM.Kernel.RadialBasis
     ,gamma::Float64 = 0.0
     ,epsilon::Float64 = 0.1
     ,cost::Float64 = 1.0
@@ -320,17 +320,17 @@ const SVM = Union{LinearSVC, SVC, NuSVC, NuSVR, EpsilonSVR, OneClassSVM}
 
 # # CLEAN METHOD
 
-const WARN_PRECOMPUTED_KERNEL =
+const ERR_PRECOMPUTED_KERNEL = ArgumentError(
     "Pre-computed kernels are not supported by installed version of "*
-    "MLJLIBSVMInterface.jl. Using `LIBSVM.Kernel.RadialBasis` instead. "
+    "MLJLIBSVMInterface.jl. Alternatively, specify `kernel=k` for some "*
+    "function or callable `k(v1::AbstractVector, v2::AbstractVector)`. "
+)
 
 function MMI.clean!(model::SVM)
     message = ""
-    if !(model isa LinearSVC) &&
-        model.kernel == LIBSVM.Kernel.Precomputed
-        message *= WARN_PRECOMPUTED_KERNEL
-        model.kernel = LIBSVM.Kernel.RadialBasis
-    end
+    !(model isa LinearSVC) &&
+        model.kernel == LIBSVM.Kernel.Precomputed &&
+        throw(ERR_PRECOMPUTED_KERNEL)
     return message
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index ac1a6fe..0a9bf45 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -90,8 +90,31 @@ ocpred = MLJBase.transform(oneclasssvm,
 @test isapprox((length(train) - sum(MLJBase.transform(oneclasssvm, fitresultoc, selectrows(X, train)) .== true)) / length(train), oneclasssvm.nu, atol=0.005)
 @test isapprox((length(test) - sum(ocpred .== true))  / length(test), oneclasssvm.nu, atol=0.05)
 
-## PRECOMPUTED KERNELS
+## CONSTRUCTOR FAILS
 
-model = @test_logs((:warn, MLJLIBSVMInterface.WARN_PRECOMPUTED_KERNEL),
-                   SVC(kernel=LIBSVM.Kernel.Precomputed))
-@test model.kernel == LIBSVM.Kernel.RadialBasis
+@test_throws(MLJLIBSVMInterface.ERR_PRECOMPUTED_KERNEL,
+             SVC(kernel=LIBSVM.Kernel.Precomputed))
+
+
+## CALLABLE KERNEL
+
+X, y = make_blobs()
+
+kernel(x1, x2) = x1' * x2
+
+model  = SVC(kernel=kernel)
+model₂ = SVC(kernel=LIBSVM.Kernel.Linear)
+
+fitresult, cache, report = MLJBase.fit(model, 0, X, y);
+fitresult₂, cache₂, report₂ = MLJBase.fit(model₂, 0, X, y);
+
+@test fitresult[1].rho ≈ fitresult₂[1].rho
+@test fitresult[1].coefs ≈ fitresult₂[1].coefs
+@test fitresult[1].SVs.indices ≈ fitresult₂[1].SVs.indices
+
+yhat = MLJBase.predict(model, fitresult, X);
+yhat₂ = MLJBase.predict(model₂, fitresult₂, X);
+
+@test yhat == yhat₂
+
+@test accuracy(yhat, y) > 0.75

From 8335aeccba56a82f55cca4b538758284b97af78f Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 17 Mar 2022 15:11:41 +1300
Subject: [PATCH 03/18] overload slightly more user-friendly `fitted_params`

---
 src/MLJLIBSVMInterface.jl | 21 +++++++++++++++++++++
 test/runtests.jl          | 22 +++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index ef2ee47..b111a28 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -376,6 +376,11 @@ function get_svm_parameters(model::Union{SVC, NuSVC, NuSVR, EpsilonSVR, OneClass
     return params
 end
 
+function get_encoding(decoder)
+    refs = MMI.int.(decoder.classes)
+    return Dict(i => decoder(i) for i in refs)
+end
+
 function MMI.fit(model::LinearSVC, verbosity::Int, X, y)
 
     Xmatrix = MMI.matrix(X)' # notice the transpose
@@ -396,6 +401,9 @@ function MMI.fit(model::LinearSVC, verbosity::Int, X, y)
     return fitresult, cache, report
 end
 
+MMI.fitted_params(::LinearSVC, fitresult) =
+    (libsvm_model=fitresult[1], encoding=get_encoding(fitresult[2]))
+
 function MMI.fit(model::Union{SVC, NuSVC}, verbosity::Int, X, y)
 
     Xmatrix = MMI.matrix(X)' # notice the transpose
@@ -418,6 +426,10 @@ function MMI.fit(model::Union{SVC, NuSVC}, verbosity::Int, X, y)
     return fitresult, cache, report
 end
 
+MMI.fitted_params(::Union{SVC, NuSVC}, fitresult) =
+    (libsvm_model=fitresult[1], encoding=get_encoding(fitresult[2]))
+
+
 function MMI.fit(model::Union{NuSVR, EpsilonSVR}, verbosity::Int, X, y)
 
     Xmatrix = MMI.matrix(X)' # notice the transpose
@@ -437,6 +449,10 @@ function MMI.fit(model::Union{NuSVR, EpsilonSVR}, verbosity::Int, X, y)
     return fitresult, cache, report
 end
 
+MMI.fitted_params(::Union{NuSVR, EpsilonSVR}, fitresult) =
+    (libsvm_model=fitresult,)
+
+
 function MMI.fit(model::OneClassSVM, verbosity::Int, X)
 
     Xmatrix = MMI.matrix(X)' # notice the transpose
@@ -456,6 +472,11 @@ function MMI.fit(model::OneClassSVM, verbosity::Int, X)
     return fitresult, cache, report
 end
 
+MMI.fitted_params(::OneClassSVM, fitresult) =
+    (libsvm_model=fitresult,)
+
+
+# # PREDICT METHODS
 
 function MMI.predict(model::LinearSVC, fitresult, Xnew)
     result, decode = fitresult
diff --git a/test/runtests.jl b/test/runtests.jl
index 0a9bf45..d17211f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -31,6 +31,15 @@ lcpred = MLJBase.predict(linear_classifier, fitresultCL, selectrows(X, test));
 @test Set(classes(nucpred[1])) == Set(classes(y[1]))
 @test Set(classes(lcpred[1])) == Set(classes(y[1]))
 
+fpC = MLJBase.fitted_params(plain_classifier, fitresultC)
+fpCnu = MLJBase.fitted_params(nu_classifier, fitresultCnu)
+fpCL = MLJBase.fitted_params(linear_classifier, fitresultCL)
+
+for fp in [fpC, fpCnu, fpCL]
+    @test keys(fp) == (:libsvm_model, :encoding)
+    @test fp.encoding[int(MLJBase.classes(y)[1])] == classes(y)[1]
+end
+
 rng = StableRNGs.StableRNG(123)
 
 # test with linear data:
@@ -68,6 +77,13 @@ fitresultR, cacheR, reportR = MLJBase.fit(plain_regressor, 1,
 fitresultRnu, cacheRnu, reportRnu = MLJBase.fit(nu_regressor, 1,
                                                 selectrows(X, train), y[train]);
 
+fpR = MLJBase.fitted_params(plain_regressor, fitresultR)
+fpRnu = MLJBase.fitted_params(nu_regressor, fitresultRnu)
+
+for fp in [fpR, fpRnu]
+    @test fp.libsvm_model isa LIBSVM.SVM
+end
+
 rpred = MLJBase.predict(plain_regressor, fitresultR, selectrows(X, test));
 nurpred = MLJBase.predict(nu_regressor, fitresultRnu, selectrows(X, test));
 
@@ -80,7 +96,11 @@ nurpred = MLJBase.predict(nu_regressor, fitresultRnu, selectrows(X, test));
 oneclasssvm = OneClassSVM()
 
 fitresultoc, cacheoc, reportoc = MLJBase.fit(oneclasssvm, 1,
-                                          selectrows(X, train));
+                                             selectrows(X, train));
+
+fp = MLJBase.fitted_params(oneclasssvm, fitresultoc)
+@test fp.libsvm_model isa LIBSVM.SVM
+
 # output is CategoricalArray{Bool}
 ocpred = MLJBase.transform(oneclasssvm,
                            fitresultoc,

From 699f7da7733a797996705ba397519181c65fae54 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 18 Mar 2022 16:23:20 +1300
Subject: [PATCH 04/18] minor

---
 Project.toml              | 2 +-
 src/MLJLIBSVMInterface.jl | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 47b64fe..ca1e62c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -11,7 +11,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
 CategoricalArrays = "0.10"
-LIBSVM = "0.8.0"
+LIBSVM = "0.8"
 MLJModelInterface = "^0.3.6,^0.4, 1.0"
 julia = "1.3"
 
diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index 73cef9a..d1c65fc 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -449,6 +449,7 @@ end
 MMI.fitted_params(::LinearSVC, fitresult) =
     (libsvm_model=fitresult[1], encoding=get_encoding(fitresult[2]))
 
+
 function MMI.fit(model::Union{SVC, NuSVC}, verbosity::Int, X, y, weights=nothing)
 
     Xmatrix = MMI.matrix(X)' # notice the transpose

From ee1883a4b609c05ca0b92c73bf9b804b38b957d9 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 21 Mar 2022 12:53:46 +1300
Subject: [PATCH 05/18] add human_name; remove p as parameter of LinearSVC

oops
---
 src/MLJLIBSVMInterface.jl | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index d1c65fc..1c9ebce 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -18,18 +18,10 @@ const PKG = "MLJLIBSVMInterface"
 
 # # MODEL TYPES
 
-"""
-    LinearSVC(; kwargs...)
-
-Linear support vector machine classifier using LIBLINEAR: https://www.csie.ntu.edu.tw/~cjlin/liblinear/
-
-See also SVC, NuSVC
-"""
 mutable struct LinearSVC <: MMI.Deterministic
     solver::LIBSVM.Linearsolver.LINEARSOLVER
     tolerance::Float64
     cost::Float64
-    p::Float64
     bias::Float64
 end
 
@@ -37,14 +29,12 @@ function LinearSVC(
     ;solver::LIBSVM.Linearsolver.LINEARSOLVER = LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL
     ,tolerance::Float64 = Inf
     ,cost::Float64 = 1.0
-    ,p::Float64 = 0.1
     ,bias::Float64= -1.0)
 
     model = LinearSVC(
         solver
         ,tolerance
         ,cost
-        ,p
         ,bias
     )
 
@@ -563,6 +553,13 @@ MMI.load_path(::Type{<:OneClassSVM}) = "$PKG.OneClassSVM"
 MMI.supports_class_weights(::Type{<:LinearSVC}) = true
 MMI.supports_class_weights(::Type{<:SVC}) = true
 
+MMI.human_name(::Type{<:LinearSVC}) = "linear support vector classifier"
+MMI.human_name(::Type{<:SVC}) = "C-support vector classifier"
+MMI.human_name(::Type{<:NuSVC}) = "nu-support vector classifier"
+MMI.human_name(::Type{<:NuSVR}) = "nu-support vector regressor"
+MMI.human_name(::Type{<:EpsilonSVR}) = "epsilon-support vector regressor"
+MMI.human_name(::Type{<:OneClassSVM}) = "$one-class support vector machine"
+
 MMI.package_name(::Type{<:SVM}) = "LIBSVM"
 MMI.package_uuid(::Type{<:SVM}) = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
 MMI.is_pure_julia(::Type{<:SVM}) = false
@@ -575,4 +572,6 @@ MMI.target_scitype(::Type{<:Union{NuSVR, EpsilonSVR}}) =
 MMI.output_scitype(::Type{<:OneClassSVM}) =
     AbstractVector{<:Finite{2}} # Bool (true means inlier)
 
+
+
 end # module

From aa274f3daa7b8c3713c19f37b53ba89593f69dd2 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 21 Mar 2022 15:57:52 +1300
Subject: [PATCH 06/18] remove `cost` hyper-param from NuSVC as redundant

---
 src/MLJLIBSVMInterface.jl | 30 +++++++++---------------------
 test/runtests.jl          | 19 +++++++++----------
 2 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index 1c9ebce..c05c332 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -44,17 +44,6 @@ function LinearSVC(
     return model
 end
 
-"""
-    SVC(; kwargs...)
-
-Kernel support vector machine classifier using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/
-If `gamma==-1.0` then  `gamma = 1/nfeatures is used in
-fitting.
-If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is
-used in fitting
-
-See also LinearSVC, NuSVC
-"""
 mutable struct SVC <: MMI.Deterministic
     kernel
     gamma::Float64
@@ -112,7 +101,6 @@ mutable struct NuSVC <: MMI.Deterministic
     kernel
     gamma::Float64
     nu::Float64
-    cost::Float64
     cachesize::Float64
     degree::Int32
     coef0::Float64
@@ -124,7 +112,6 @@ function NuSVC(
     ;kernel = LIBSVM.Kernel.RadialBasis
     ,gamma::Float64 = 0.0
     ,nu::Float64 = 0.5
-    ,cost::Float64 = 1.0
     ,cachesize::Float64 = 200.0
     ,degree::Int32 = Int32(3)
     ,coef0::Float64 = 0.
@@ -135,7 +122,6 @@ function NuSVC(
         kernel
         ,gamma
         ,nu
-        ,cost
         ,cachesize
         ,degree
         ,coef0
@@ -377,10 +363,12 @@ function categorical_value(x, v)
     return pool[get(pool, x)]
 end
 
-# to ensure the keys of user-provided weights are `CategoricalValue`s:
-fix_keys(weights::Dict{<:CategoricalArrays.CategoricalValue}, y) = weights
+# to ensure the keys of user-provided weights are `CategoricalValue`s,
+# and the values are floats:
+fix_keys(weights::Dict{<:CategoricalArrays.CategoricalValue}, y) =
+    Dict(k => float(weights[k]) for k in keys(weights))
 fix_keys(weights, y) =
-    Dict(categorical_value(x, y) => weights[x] for x in keys(weights))
+    Dict(categorical_value(x, y) => float(weights[x]) for x in keys(weights))
 
 """
     encode(weights::Dict, y)
@@ -425,13 +413,13 @@ function MMI.fit(model::LinearSVC, verbosity::Int, X, y, weights=nothing)
 
     result = LIBSVM.LIBLINEAR.linear_train(y_plain, Xmatrix,
         weights = _weights, solver_type = Int32(model.solver),
-        C = model.cost, p = model.p, bias = model.bias,
+        C = model.cost, bias = model.bias,
         eps = model.tolerance, verbose = ifelse(verbosity > 1, true, false)
     )
 
     fitresult = (result, decode)
     cache = nothing
-    report = nothing
+    report = NamedTuple()
 
     return fitresult, cache, report
 end
@@ -522,13 +510,13 @@ MMI.fitted_params(::OneClassSVM, fitresult) =
 
 function MMI.predict(model::LinearSVC, fitresult, Xnew)
     result, decode = fitresult
-    (p,d) = LIBSVM.LIBLINEAR.linear_predict(result, MMI.matrix(Xnew)')
+    p, _ = LIBSVM.LIBLINEAR.linear_predict(result, MMI.matrix(Xnew)')
     return decode(p)
 end
 
 function MMI.predict(model::Union{SVC, NuSVC}, fitresult, Xnew)
     result, decode = fitresult
-    (p,d) = LIBSVM.svmpredict(result, MMI.matrix(Xnew)')
+    p, _ = LIBSVM.svmpredict(result, MMI.matrix(Xnew)')
     return decode(p)
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 2023c36..8c1ea0c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -164,9 +164,8 @@ yhat₂ = MLJBase.predict(model₂, fitresult₂, X);
 
 @test accuracy(yhat, y) > 0.75
 
-model = @test_logs((:warn, MLJLIBSVMInterface.WARN_PRECOMPUTED_KERNEL),
+model = @test_throws(MLJLIBSVMInterface.ERR_PRECOMPUTED_KERNEL,
                    SVC(kernel=LIBSVM.Kernel.Precomputed))
-@test model.kernel == LIBSVM.Kernel.RadialBasis
 
 
 ## WEIGHTS
@@ -188,24 +187,24 @@ for model in [SVC(), LinearSVC()]
 
     # without weights:
     Θ, _, _ = MLJBase.fit(model, 0, Xtrain, ytrain)
-    yhat = predict(model, Θ, X);
-    @test levels(yhat) == levels(y) # the `2` class persists as a level
+    ŷ = predict(model, Θ, X);
+    @test levels(ŷ) == levels(y) # the `2` class persists as a level
 
     # with uniform weights:
     Θ_uniform, _, _ = MLJBase.fit(model, 0, Xtrain, ytrain, weights_uniform)
-    yhat_uniform = predict(model, Θ_uniform, X);
-    @test levels(yhat_uniform) == levels(y)
+    ŷ_uniform = predict(model, Θ_uniform, X);
+    @test levels(ŷ_uniform) == levels(y)
 
     # with weights favouring class `3`:
     Θ_favouring_3, _, _ = MLJBase.fit(model, 0, Xtrain, ytrain, weights_favouring_3)
-    yhat_favouring_3 = predict(model, Θ_favouring_3, X);
-    @test levels(yhat_favouring_3) == levels(y)
+    ŷ_favouring_3 = predict(model, Θ_favouring_3, X);
+    @test levels(ŷ_favouring_3) == levels(y)
 
     # comparisons:
     if !(model isa LinearSVC) # linear solver is not deterministic
-        @test yhat_uniform == yhat
+        @test ŷ_uniform == ŷ
     end
-    d = sum(yhat_favouring_3 .== 3) - sum(yhat .== 3)
+    d = sum(ŷ_favouring_3 .== 3) - sum(ŷ .== 3)
     if d <= 0
         @show model
         @show d

From 76a43624a38d3a83cabd3a74ce3d56920338c6d1 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 21 Mar 2022 16:03:29 +1300
Subject: [PATCH 07/18] add docstrings for `LinearSVC`, `SVC` and `NuSVC`

---
 src/MLJLIBSVMInterface.jl | 463 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 463 insertions(+)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index c05c332..42d58f5 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -561,5 +561,468 @@ MMI.output_scitype(::Type{<:OneClassSVM}) =
     AbstractVector{<:Finite{2}} # Bool (true means inlier)
 
 
+# # DOCUMENT STRINGS
+
+const DOC_REFERENCE = "C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for "*
+    "support vector machines.\" *ACM Transactions on Intelligent Systems and "*
+    "Technology*, 2(3):27:1–27:27. Updated at "*
+    "[https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf]"*
+    "(https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf)"
+
+const DOC_ALGORITHM = "Reference for algorithm and core C-library: $DOC_REFERENCE. "
+
+const DOC_REFERENCE_LINEAR = "Rong-En Fan et al (2008): \"LIBLINEAR: A Library for "*
+    "Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. "*
+    "Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf]"*
+    "(https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf)"
+
+const DOC_ALGORITHM_LINEAR = "Reference for algorithm and core C-library: "*
+    "$DOC_REFERENCE_LINEAR. "
+
+
+"""
+$(MMI.doc_header(LinearSVC))
+
+$DOC_ALGORITHM_LINEAR
+
+This model type is similar to `SVC` from the same package with the setting
+`kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear
+case.
+
+
+# Training data
+
+In MLJ or MLJBase, bind an instance `model` to data with one of:
+
+    mach = machine(model, X, y)
+    mach = machine(model, X, y, w)
+
+where
+
+- `X`: any table of input features (eg, a `DataFrame`) whose columns
+  each have `Continuous` element scitype; check column scitypes with
+  `schema(X)`
+
+- `y`: is the target, which can be any `AbstractVector` whose element
+  scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype
+  with `scitype(y)`
+
+- `w`: a dictionary of class weights, keyed on `levels(y)`.
+
+Train the machine using `fit!(mach, rows=...)`.
+
+
+# Hyper-parameters
+
+- `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver,
+  which must be one of the following from the LIBSVM.jl package:
+
+    - `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal))
+
+    - `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized
+      L2-loss support vector classification (dual)
+
+    - `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss
+      support vector classification (primal)
+
+    - `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized
+      L1-loss support vector classification (dual)
+
+    - `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by
+      Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`:
+      L1-regularized L2-loss support vector classification)
+
+    - `LIBSVM.Linearsolver.L1R_LR`:  L1-regularized logistic regression
+
+    - `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual)
+
+- `tolerance::Float64=Inf`: tolerance for the stopping criterion;
+
+- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
+  cited reference; for greater regularization, decrease `cost`
+
+- `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if
+  `bias < 0`, no bias term added (default -1)
+
+
+# Operations
+
+- `predict(mach, Xnew)`: return predictions of the target given
+  features `Xnew` having the same scitype as `X` above.
+
+
+# Fitted parameters
+
+The fields of `fitted_params(mach)` are:
+
+- `libsvm_model`: the trained model object created by the LIBSVM.jl package
+
+- `encoding`: class encoding used internally by `libsvm_model` - a
+  dictionary of class labels keyed on the internal integer representation
+
+
+# Examples
+
+```
+using MLJ
+import LIBSVM
+
+LinearSVC = @load LinearSVC pkg=LIBSVM               # model type
+model = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance
+
+X, y = @load_iris # table, vector
+mach = machine(model, X, y) |> fit!
+
+Xnew = (sepal_length = [6.4, 7.2, 7.4],
+        sepal_width = [2.8, 3.0, 2.8],
+        petal_length = [5.6, 5.8, 6.1],
+        petal_width = [2.1, 1.6, 1.9],)
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "virginica"
+ "versicolor"
+ "virginica"
+```
+
+## Incorporating class weights
+
+```julia
+weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1)
+mach = machine(model, X, y, weights) |> fit!
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "versicolor"
+ "versicolor"
+ "versicolor"
+```
+
+
+See also the [`SVC`](@ref) classifier, and
+  [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+  [documentation](https://github.com/cjlin1/liblinear/blob/master/README)
+  for the original C implementation.
+
+"""
+LinearSVC
+
+"""
+$(MMI.doc_header(SVC))
+
+$DOC_ALGORITHM
+
+
+# Training data
+
+In MLJ or MLJBase, bind an instance `model` to data with one of:
+
+    mach = machine(model, X, y)
+    mach = machine(model, X, y, w)
+
+where
+
+- `X`: any table of input features (eg, a `DataFrame`) whose columns
+  each have `Continuous` element scitype; check column scitypes with
+  `schema(X)`
+
+- `y`: is the target, which can be any `AbstractVector` whose element
+  scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype
+  with `scitype(y)`
+
+- `w`: a dictionary of class weights, keyed on `levels(y)`.
+
+Train the machine using `fit!(mach, rows=...)`.
+
+
+# Hyper-parameters
+
+- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
+  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors whose
+  length matches the number of columns of the training data `X`, see
+  examples below) or one of the following built-in kernels from the
+  LIBSVM package:
+
+  - `LIBSVM.Kernel.Linear`: `x1'*x2`
+
+  - `LIBSVM.Kernel.Polynomial`: `gamma*x1'*x2 + coef0)^degree`
+
+  - `LIBSVM.Kernel.RadialBasis`: `exp(-gamma*norm(x1, x2)^2)`
+
+  - `LIBSVM.Kernel.Sigmoid`: `tanh(gamma*x1'*x2 + coef0)`
+
+- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
+  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
+  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
+  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
+  appears in the report (see below).
+
+- `coef0 = 0.0`: kernel parameter (see above)
+
+- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
+
+- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
+  cited reference; for greater regularization, decrease `cost`
+
+- `cachesize=200.0` cache memory size in MB
+
+- `tolerance=0.001`: tolerance for the stopping criterion
+
+- `shrinking=true`: whether to use shrinking heuristics
+
+- `probability=false`: whether to base classification on calibrated
+  probabilities (expensive) or to use the raw decision function
+  (recommended). Note that in either case `predict` returns point
+  predictions and not probabilities, so that this option has little
+  utility in the current re-implementation.
+
+
+# Operations
+
+- `predict(mach, Xnew)`: return predictions of the target given
+  features `Xnew` having the same scitype as `X` above.
+
+
+# Fitted parameters
+
+The fields of `fitted_params(mach)` are:
+
+- `libsvm_model`: the trained model object created by the LIBSVM.jl package
+
+- `encoding`: class encoding used internally by `libsvm_model` - a
+  dictionary of class labels keyed on the internal integer representation
+
+
+# Report
+
+The fields of `report(mach)` are:
+
+- `gamma`: actual value of the kernel parameter `gamma` used in training
+
+
+# Examples
+
+## Using a built-in kernel
+
+```
+using MLJ
+import LIBSVM
+
+SVC = @load SVC pkg=LIBSVM               # model type
+model = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance
+
+X, y = @load_iris # table, vector
+mach = machine(model, X, y) |> fit!
+
+Xnew = (sepal_length = [6.4, 7.2, 7.4],
+        sepal_width = [2.8, 3.0, 2.8],
+        petal_length = [5.6, 5.8, 6.1],
+        petal_width = [2.1, 1.6, 1.9],)
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "virginica"
+ "virginica"
+ "virginica"
+```
+
+## Using a user-defined kernel
+
+```
+k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
+model = SVC(kernel=k)
+mach = machine(model, X, y) |> fit!
+
+Xnew = (sepal_length = [6.4, 7.2, 7.4],
+        sepal_width = [2.8, 3.0, 2.8],
+        petal_length = [5.6, 5.8, 6.1],
+        petal_width = [2.1, 1.6, 1.9],)
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "virginica"
+ "virginica"
+ "virginica"
+```
+
+## Incorporating class weights
+
+In either scenario above, we can do:
+
+```julia
+weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1)
+mach = machine(model, X, y, weights) |> fit!
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "versicolor"
+ "versicolor"
+ "versicolor"
+```
+
+See also the classifiers [`NuSVC`](@ref) and [`LinearSVC`](@ref), and
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
+for the original C implementation.
+
+"""
+SVC
+
+"""
+$(MMI.doc_header(SVC))
+
+$DOC_ALGORITHM
+
+This model is simply a reparameterization of the [`SVC`](@ref)
+classifier, where `nu` replaces `cost`, and is therefore mathematically
+equivalent to it.
+
+# Training data
+
+In MLJ or MLJBase, bind an instance `model` to data with one of:
+
+    mach = machine(model, X, y)
+
+where
+
+- `X`: any table of input features (eg, a `DataFrame`) whose columns
+  each have `Continuous` element scitype; check column scitypes with
+  `schema(X)`
+
+- `y`: is the target, which can be any `AbstractVector` whose element
+  scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype
+  with `scitype(y)`
+
+Train the machine using `fit!(mach, rows=...)`.
+
+
+# Hyper-parameters
+
+- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
+  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors whose
+  length matches the number of columns of the training data `X`, see
+  examples below) or one of the following built-in kernels from the
+  LIBSVM package:
+
+  - `LIBSVM.Kernel.Linear`: `x1'*x2`
+
+  - `LIBSVM.Kernel.Polynomial`: `gamma*x1'*x2 + coef0)^degree`
+
+  - `LIBSVM.Kernel.RadialBasis`: `exp(-gamma*norm(x1, x2)^2)`
+
+  - `LIBSVM.Kernel.Sigmoid`: `tanh(gamma*x1'*x2 + coef0)`
+
+- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
+  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
+  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
+  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
+  appears in the report (see below).
+
+- `coef0 = 0.0`: kernel parameter (see above)
+
+- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
+
+- `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin
+  errors and a lower bound of the fraction of support vectors. Denoted
+  `ν` in the cited paper.
+
+- `cachesize=200.0` cache memory size in MB
+
+- `tolerance=0.001`: tolerance for the stopping criterion
+
+- `shrinking=true`: whether to use shrinking heuristics
+
+
+# Operations
+
+- `predict(mach, Xnew)`: return predictions of the target given
+  features `Xnew` having the same scitype as `X` above.
+
+
+# Fitted parameters
+
+The fields of `fitted_params(mach)` are:
+
+- `libsvm_model`: the trained model object created by the LIBSVM.jl package
+
+- `encoding`: class encoding used internally by `libsvm_model` - a
+  dictionary of class labels keyed on the internal integer representation
+
+
+# Report
+
+The fields of `report(mach)` are:
+
+- `gamma`: actual value of the kernel parameter `gamma` used in training
+
+
+# Examples
+
+## Using a built-in kernel
+
+```
+using MLJ
+import LIBSVM
+
+SVC = @load NuSVC pkg=LIBSVM               # model type
+model = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance
+
+X, y = @load_iris # table, vector
+mach = machine(model, X, y) |> fit!
+
+Xnew = (sepal_length = [6.4, 7.2, 7.4],
+        sepal_width = [2.8, 3.0, 2.8],
+        petal_length = [5.6, 5.8, 6.1],
+        petal_width = [2.1, 1.6, 1.9],)
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "virginica"
+ "virginica"
+ "virginica"
+```
+
+## Using a user-defined kernel
+
+```
+k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
+model = NuSVC(kernel=k)
+mach = machine(model, X, y) |> fit!
+
+Xnew = (sepal_length = [6.4, 7.2, 7.4],
+        sepal_width = [2.8, 3.0, 2.8],
+        petal_length = [5.6, 5.8, 6.1],
+        petal_width = [2.1, 1.6, 1.9],)
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "virginica"
+ "virginica"
+ "virginica"
+```
+
+## Incorporating class weights
+
+In either scenario above, we can do:
+
+```julia
+weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1)
+mach = machine(model, X, y, weights) |> fit!
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "versicolor"
+ "versicolor"
+ "versicolor"
+```
+
+See also the classifiers [`NuSVC`](@ref) and [`LinearSVC`](@ref),
+  [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+  [documentation](https://github.com/cjlin1/libsvm/blob/master/README)
+  for the original C implementation.
+
+"""
+NuSVC
+
 
 end # module

From da4d63762f930698f6ee0f7570e4a2a5315fe4fd Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 24 Mar 2022 15:56:47 +1300
Subject: [PATCH 08/18] add EpsilonSVR docstring

---
 src/MLJLIBSVMInterface.jl | 178 +++++++++++++++++++++++++++++++-------
 1 file changed, 145 insertions(+), 33 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index 42d58f5..ab0ec8e 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -85,18 +85,6 @@ function SVC(
     return model
 end
 
-"""
-    NuSVC(; kwargs...)
-
-Kernel support vector machine classifier using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/
-
-If `gamma==-1.0` then  `gamma = 1/nfeatures is used in
-fitting.
-If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is
-used in fitting
-
-See also LinearSVC, SVC
-"""
 mutable struct NuSVC <: MMI.Deterministic
     kernel
     gamma::Float64
@@ -699,10 +687,10 @@ julia> yhat = predict(mach, Xnew)
 ```
 
 
-See also the [`SVC`](@ref) classifier, and
-  [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-  [documentation](https://github.com/cjlin1/liblinear/blob/master/README)
-  for the original C implementation.
+See also the [`SVC`](@ref) and [`NuSVC`] classifiers, and
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+[documentation](https://github.com/cjlin1/liblinear/blob/master/README)
+for the original C implementation.
 
 """
 LinearSVC
@@ -833,11 +821,6 @@ k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
 model = SVC(kernel=k)
 mach = machine(model, X, y) |> fit!
 
-Xnew = (sepal_length = [6.4, 7.2, 7.4],
-        sepal_width = [2.8, 3.0, 2.8],
-        petal_length = [5.6, 5.8, 6.1],
-        petal_width = [2.1, 1.6, 1.9],)
-
 julia> yhat = predict(mach, Xnew)
 3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
  "virginica"
@@ -869,7 +852,7 @@ for the original C implementation.
 SVC
 
 """
-$(MMI.doc_header(SVC))
+$(MMI.doc_header(NuSVC))
 
 $DOC_ALGORITHM
 
@@ -877,9 +860,10 @@ This model is simply a reparameterization of the [`SVC`](@ref)
 classifier, where `nu` replaces `cost`, and is therefore mathematically
 equivalent to it.
 
+
 # Training data
 
-In MLJ or MLJBase, bind an instance `model` to data with one of:
+In MLJ or MLJBase, bind an instance `model` to data with:
 
     mach = machine(model, X, y)
 
@@ -964,7 +948,7 @@ The fields of `report(mach)` are:
 using MLJ
 import LIBSVM
 
-SVC = @load NuSVC pkg=LIBSVM               # model type
+NuSVC = @load NuSVC pkg=LIBSVM               # model type
 model = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance
 
 X, y = @load_iris # table, vector
@@ -989,11 +973,6 @@ k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
 model = NuSVC(kernel=k)
 mach = machine(model, X, y) |> fit!
 
-Xnew = (sepal_length = [6.4, 7.2, 7.4],
-        sepal_width = [2.8, 3.0, 2.8],
-        petal_length = [5.6, 5.8, 6.1],
-        petal_width = [2.1, 1.6, 1.9],)
-
 julia> yhat = predict(mach, Xnew)
 3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
  "virginica"
@@ -1016,13 +995,146 @@ julia> yhat = predict(mach, Xnew)
  "versicolor"
 ```
 
-See also the classifiers [`NuSVC`](@ref) and [`LinearSVC`](@ref),
-  [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-  [documentation](https://github.com/cjlin1/libsvm/blob/master/README)
-  for the original C implementation.
+See also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref),
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
+for the original C implementation.
 
 """
 NuSVC
 
 
+"""
+$(MMI.doc_header(EpsilonSVR))
+
+$DOC_ALGORITHM
+
+This model is an adaptation of the classifier `SVC` to regression, but
+has an additional parameter `epsilon` (denoted `ϵ` in the cited
+reference).
+
+
+# Training data
+
+In MLJ or MLJBase, bind an instance `model` to data with:
+
+    mach = machine(model, X, y)
+
+where
+
+- `X`: any table of input features (eg, a `DataFrame`) whose columns
+  each have `Continuous` element scitype; check column scitypes with
+  `schema(X)`
+
+- `y`: is the target, which can be any `AbstractVector` whose element
+  scitype is `Continuous`; check the scitype with `scitype(y)`
+
+Train the machine using `fit!(mach, rows=...)`.
+
+
+# Hyper-parameters
+
+- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
+  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors whose
+  length matches the number of columns of the training data `X`, see
+  examples below) or one of the following built-in kernels from the
+  LIBSVM package:
+
+  - `LIBSVM.Kernel.Linear`: `x1'*x2`
+
+  - `LIBSVM.Kernel.Polynomial`: `gamma*x1'*x2 + coef0)^degree`
+
+  - `LIBSVM.Kernel.RadialBasis`: `exp(-gamma*norm(x1, x2)^2)`
+
+  - `LIBSVM.Kernel.Sigmoid`: `tanh(gamma*x1'*x2 + coef0)`
+
+- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
+  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
+  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
+  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
+  appears in the report (see below).
+
+- `coef0 = 0.0`: kernel parameter (see above)
+
+- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
+
+- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
+  cited reference; for greater regularization, decrease `cost`
+
+- `epsilon=0.1` (range (, `Inf`)): the parameter denoted ``ϵ`` in the
+  cited reference; `epsilon` is the thickness of the "penalty-free"
+  neighborhood of the decision surface.
+
+- `cachesize=200.0` cache memory size in MB
+
+- `tolerance=0.001`: tolerance for the stopping criterion
+
+- `shrinking=true`: whether to use shrinking heuristics
+
+
+# Operations
+
+- `predict(mach, Xnew)`: return predictions of the target given
+  features `Xnew` having the same scitype as `X` above.
+
+
+# Fitted parameters
+
+The fields of `fitted_params(mach)` are:
+
+- `libsvm_model`: the trained model object created by the LIBSVM.jl package
+
+
+# Report
+
+The fields of `report(mach)` are:
+
+- `gamma`: actual value of the kernel parameter `gamma` used in training
+
+
+# Examples
+
+## Using a built-in kernel
+
+```
+using MLJ
+import LIBSVM
+
+EpsilonSVR = @load EpsilonSVR pkg=LIBSVM            # model type
+model = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance
+
+X, y = make_regression(rng=123) # table, vector
+mach = machine(model, X, y) |> fit!
+
+Xnew, _ = make_regression(3, rng=123)
+
+julia> yhat = predict(mach, Xnew)
+3-element Vector{Float64}:
+  0.2512132502584155
+  0.007340201523624579
+ -0.2482949812264707
+```
+
+## Using a user-defined kernel
+
+```
+k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
+model = EpsilonSVR(kernel=k)
+mach = machine(model, X, y) |> fit!
+
+julia> yhat = predict(mach, Xnew)
+3-element Vector{Float64}:
+  1.1121225361666656
+  0.04667702229741916
+ -0.6958148424680672
+```
+
+See also [`NuSVR`](@ref),
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
+for the original C implementation.
+
+"""
+EpsilonSVR
+
 end # module

From 57821cf9279337fe381e28ad8cbc9a01476db2ff Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 24 Mar 2022 16:19:41 +1300
Subject: [PATCH 09/18] add NuSVC docstring

---
 src/MLJLIBSVMInterface.jl | 192 ++++++++++++++++++++++++++++----------
 1 file changed, 144 insertions(+), 48 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index ab0ec8e..f9d328f 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -164,18 +164,6 @@ function OneClassSVM(
     return model
 end
 
-"""
-    NuSVR(; kwargs...)
-
-Kernel support vector machine regressor using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/
-
-If `gamma==-1.0` then  `gamma = 1/nfeatures is used in
-fitting.
-If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is
-used in fitting
-
-See also EpsilonSVR
-"""
 mutable struct NuSVR <: MMI.Deterministic
     kernel
     gamma::Float64
@@ -217,18 +205,6 @@ function NuSVR(
     return model
 end
 
-"""
-    EpsilonSVR(; kwargs...)
-
-Kernel support vector machine regressor using LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/
-
-If `gamma==-1.0` then  `gamma = 1/nfeatures is used in
-fitting.
-If `gamma==0.0` then a `gamma = 1/(var(X) * nfeatures)` is
-used in fitting
-
-See also NuSVR
-"""
 mutable struct EpsilonSVR <: MMI.Deterministic
     kernel
     gamma::Float64
@@ -567,6 +543,24 @@ const DOC_REFERENCE_LINEAR = "Rong-En Fan et al (2008): \"LIBLINEAR: A Library f
 const DOC_ALGORITHM_LINEAR = "Reference for algorithm and core C-library: "*
     "$DOC_REFERENCE_LINEAR. "
 
+const DOC_KERNEL = """
+- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
+  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors
+  whose length matches the number of columns of the training data `X`,
+  see examples below) or one of the following built-in kernels from
+  the LIBSVM package:
+
+  - `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`
+
+  - `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`
+
+  - `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1, x2)^2)`
+
+  - `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`
+
+  where `gamma`, `coef0`, `degree` are other hyper-parameters.
+"""
+
 
 """
 $(MMI.doc_header(LinearSVC))
@@ -725,19 +719,7 @@ Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
-  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors whose
-  length matches the number of columns of the training data `X`, see
-  examples below) or one of the following built-in kernels from the
-  LIBSVM package:
-
-  - `LIBSVM.Kernel.Linear`: `x1'*x2`
-
-  - `LIBSVM.Kernel.Polynomial`: `gamma*x1'*x2 + coef0)^degree`
-
-  - `LIBSVM.Kernel.RadialBasis`: `exp(-gamma*norm(x1, x2)^2)`
-
-  - `LIBSVM.Kernel.Sigmoid`: `tanh(gamma*x1'*x2 + coef0)`
+$DOC_KERNEL
 
 - `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
   `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
@@ -882,11 +864,7 @@ Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
-  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors whose
-  length matches the number of columns of the training data `X`, see
-  examples below) or one of the following built-in kernels from the
-  LIBSVM package:
+$DOC_KERNEL
 
   - `LIBSVM.Kernel.Linear`: `x1'*x2`
 
@@ -1034,11 +1012,7 @@ Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
-  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors whose
-  length matches the number of columns of the training data `X`, see
-  examples below) or one of the following built-in kernels from the
-  LIBSVM package:
+$DOC_KERNEL
 
   - `LIBSVM.Kernel.Linear`: `x1'*x2`
 
@@ -1061,7 +1035,7 @@ Train the machine using `fit!(mach, rows=...)`.
 - `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
   cited reference; for greater regularization, decrease `cost`
 
-- `epsilon=0.1` (range (, `Inf`)): the parameter denoted ``ϵ`` in the
+- `epsilon=0.1` (range (0, `Inf`)): the parameter denoted ``ϵ`` in the
   cited reference; `epsilon` is the thickness of the "penalty-free"
   neighborhood of the decision surface.
 
@@ -1137,4 +1111,126 @@ for the original C implementation.
 """
 EpsilonSVR
 
+"""
+$(MMI.doc_header(NuSVR))
+
+$DOC_ALGORITHM
+
+This model is a reparameterization of `EpsilonSCV` in which the
+`epsilon` hyper-parameter is replaced with a new parameter `nu`
+(denoted ``ν`` in the cited reference) which attempts to control the
+number of support vectors directly.
+
+
+# Training data
+
+In MLJ or MLJBase, bind an instance `model` to data with:
+
+    mach = machine(model, X, y)
+
+where
+
+- `X`: any table of input features (eg, a `DataFrame`) whose columns
+  each have `Continuous` element scitype; check column scitypes with
+  `schema(X)`
+
+- `y`: is the target, which can be any `AbstractVector` whose element
+  scitype is `Continuous`; check the scitype with `scitype(y)`
+
+Train the machine using `fit!(mach, rows=...)`.
+
+
+# Hyper-parameters
+
+- $DOC_KERNEL
+
+- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
+  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
+  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
+  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
+  appears in the report (see below).
+
+- `coef0 = 0.0`: kernel parameter (see above)
+
+- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
+
+- `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
+  cited reference; for greater regularization, decrease `cost`
+
+- `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin
+  errors and a lower bound of the fraction of support vectors. Denoted
+  ``ν`` in the cited paper.
+
+- `cachesize=200.0` cache memory size in MB
+
+- `tolerance=0.001`: tolerance for the stopping criterion
+
+- `shrinking=true`: whether to use shrinking heuristics
+
+
+# Operations
+
+- `predict(mach, Xnew)`: return predictions of the target given
+  features `Xnew` having the same scitype as `X` above.
+
+
+# Fitted parameters
+
+The fields of `fitted_params(mach)` are:
+
+- `libsvm_model`: the trained model object created by the LIBSVM.jl package
+
+
+# Report
+
+The fields of `report(mach)` are:
+
+- `gamma`: actual value of the kernel parameter `gamma` used in training
+
+
+# Examples
+
+## Using a built-in kernel
+
+```
+using MLJ
+import LIBSVM
+
+NuSVR = @load NuSVR pkg=LIBSVM            # model type
+model = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance
+
+X, y = make_regression(rng=123) # table, vector
+mach = machine(model, X, y) |> fit!
+
+Xnew, _ = make_regression(3, rng=123)
+
+julia> yhat = predict(mach, Xnew)
+3-element Vector{Float64}:
+  0.2008156459920009
+  0.1131520519131709
+ -0.2076156254934889
+```
+
+## Using a user-defined kernel
+
+```
+k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
+model = NuSVR(kernel=k)
+mach = machine(model, X, y) |> fit!
+
+julia> yhat = predict(mach, Xnew)
+3-element Vector{Float64}:
+  1.1211558175964662
+  0.06677125944808422
+ -0.6817578942749346
+```
+
+See also [`EpsilonSVR`](@ref),
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
+for the original C implementation.
+
+"""
+NuSVR
+
 end # module

From 6a1bee24a8056b754cf2fdd6a1ff3a755eb755bf Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 24 Mar 2022 17:09:33 +1300
Subject: [PATCH 10/18] bump [compat] MLJModelInterface = "1.4"

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index ca1e62c..765e539 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,7 +12,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 [compat]
 CategoricalArrays = "0.10"
 LIBSVM = "0.8"
-MLJModelInterface = "^0.3.6,^0.4, 1.0"
+MLJModelInterface = "1.4"
 julia = "1.3"
 
 [extras]

From f55b6d2ee293b6c7790fb1bb11abb6de939d6152 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 28 Mar 2022 12:28:30 +1300
Subject: [PATCH 11/18] remove the redundant `cost` hyper-parameter from
 OneClassSVM

oops
---
 src/MLJLIBSVMInterface.jl | 324 +++++++++++++++++++++++++++++---------
 test/runtests.jl          |   8 +
 2 files changed, 261 insertions(+), 71 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index f9d328f..f7ce3b6 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -123,11 +123,10 @@ function NuSVC(
     return model
 end
 
-mutable struct OneClassSVM <: MMI.Unsupervised
+mutable struct OneClassSVM <: MMI.UnsupervisedDetector
     kernel
     gamma::Float64
     nu::Float64
-    cost::Float64
     cachesize::Float64
     degree::Int32
     coef0::Float64
@@ -139,7 +138,6 @@ function OneClassSVM(
     ;kernel = LIBSVM.Kernel.RadialBasis
     ,gamma::Float64 = 0.0
     ,nu::Float64 = 0.1
-    ,cost::Float64 = 1.0
     ,cachesize::Float64 = 200.0
     ,degree::Int32 = Int32(3)
     ,coef0::Float64 = 0.0
@@ -150,7 +148,6 @@ function OneClassSVM(
         kernel
         ,gamma
         ,nu
-        ,cost
         ,cachesize
         ,degree
         ,coef0
@@ -361,6 +358,30 @@ function get_encoding(decoder)
 end
 
 
+"""
+    orientation(scores)
+
+Private method.
+
+Return `1` if the majority of elements of `scores` are less than the
+midpoint between the minimum and maximum values. For outlier detection
+scores, the implication in that case is that scores increase with
+increasing likelihood of outlierness.
+
+Otherwise return `-1` (scores decrease with increasing likelihood of
+outlierness).
+
+"""
+function orientation(scores)
+    middle = (maximum(scores) + minimum(scores))/2
+    if quantile(scores, 0.5) < middle
+        return 1
+    else
+        return -1
+    end
+end
+
+
 # # FIT METHOD
 
 function MMI.fit(model::LinearSVC, verbosity::Int, X, y, weights=nothing)
@@ -391,7 +412,6 @@ end
 MMI.fitted_params(::LinearSVC, fitresult) =
     (libsvm_model=fitresult[1], encoding=get_encoding(fitresult[2]))
 
-
 function MMI.fit(model::Union{SVC, NuSVC}, verbosity::Int, X, y, weights=nothing)
 
     Xmatrix = MMI.matrix(X)' # notice the transpose
@@ -446,7 +466,6 @@ end
 MMI.fitted_params(::Union{NuSVR, EpsilonSVR}, fitresult) =
     (libsvm_model=fitresult,)
 
-
 function MMI.fit(model::OneClassSVM, verbosity::Int, X)
 
     Xmatrix = MMI.matrix(X)' # notice the transpose
@@ -456,21 +475,29 @@ function MMI.fit(model::OneClassSVM, verbosity::Int, X)
     model = deepcopy(model)
     model.gamma == -1.0 && (model.gamma = 1.0/size(Xmatrix, 1))
     model.gamma == 0.0 && (model.gamma = 1.0/(var(Xmatrix) * size(Xmatrix, 1)) )
-    fitresult = LIBSVM.svmtrain(Xmatrix;
-        get_svm_parameters(model)...,
-        verbose = ifelse(verbosity > 1, true, false)
-    )
+    libsvm_model = LIBSVM.svmtrain(Xmatrix;
+                                   get_svm_parameters(model)...,
+                                   verbose = ifelse(verbosity > 1, true, false)
+                                   )
 
-    report = (gamma=model.gamma,)
+    # get orientation and training scores:
+    p, decision_matrix = LIBSVM.svmpredict(libsvm_model, Xmatrix)
+    decision_scores = view(decision_matrix, 1, :)
+    orientation = MLJLIBSVMInterface.orientation(decision_scores)
+    scores = orientation*decision_scores
+    @show p
+
+    fitresult = (libsvm_model, orientation)
+    report = (gamma=model.gamma, scores=scores)
 
     return fitresult, cache, report
 end
 
 MMI.fitted_params(::OneClassSVM, fitresult) =
-    (libsvm_model=fitresult,)
+    (libsvm_model=fitresult[1], orientation=fitresult[2])
 
 
-# # PREDICT METHODS
+# # PREDICT AND TRANSFORM
 
 function MMI.predict(model::LinearSVC, fitresult, Xnew)
     result, decode = fitresult
@@ -490,10 +517,13 @@ function MMI.predict(model::Union{NuSVR, EpsilonSVR}, fitresult, Xnew)
 end
 
 function MMI.transform(model::OneClassSVM, fitresult, Xnew)
-    (p,d) = LIBSVM.svmpredict(fitresult, MMI.matrix(Xnew)')
-    return MMI.categorical(p)
+    libsvm_model, orientation = fitresult
+    _, decision_matrix = LIBSVM.svmpredict(libsvm_model, MMI.matrix(Xnew)')
+    decision_scores = view(decision_matrix, 1, :)
+    return orientation*decision_scores
 end
 
+
 # metadata
 MMI.load_path(::Type{<:LinearSVC}) = "$PKG.LinearSVC"
 MMI.load_path(::Type{<:SVC}) = "$PKG.SVC"
@@ -559,9 +589,22 @@ const DOC_KERNEL = """
   - `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`
 
   where `gamma`, `coef0`, `degree` are other hyper-parameters.
+
+- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
+  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
+  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
+  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
+  appears in the report (see below).
+
+- `coef0 = 0.0`: kernel parameter (see above)
+
+- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
+
 """
 
 
+# ## LinearSVC
+
 """
 $(MMI.doc_header(LinearSVC))
 
@@ -689,6 +732,9 @@ for the original C implementation.
 """
 LinearSVC
 
+
+# ## SVC
+
 """
 $(MMI.doc_header(SVC))
 
@@ -721,16 +767,6 @@ Train the machine using `fit!(mach, rows=...)`.
 
 $DOC_KERNEL
 
-- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
-  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
-  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
-  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
-  appears in the report (see below).
-
-- `coef0 = 0.0`: kernel parameter (see above)
-
-- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
-
 - `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
   cited reference; for greater regularization, decrease `cost`
 
@@ -833,6 +869,9 @@ for the original C implementation.
 """
 SVC
 
+
+# ## NuSVC
+
 """
 $(MMI.doc_header(NuSVC))
 
@@ -866,24 +905,6 @@ Train the machine using `fit!(mach, rows=...)`.
 
 $DOC_KERNEL
 
-  - `LIBSVM.Kernel.Linear`: `x1'*x2`
-
-  - `LIBSVM.Kernel.Polynomial`: `gamma*x1'*x2 + coef0)^degree`
-
-  - `LIBSVM.Kernel.RadialBasis`: `exp(-gamma*norm(x1, x2)^2)`
-
-  - `LIBSVM.Kernel.Sigmoid`: `tanh(gamma*x1'*x2 + coef0)`
-
-- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
-  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
-  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
-  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
-  appears in the report (see below).
-
-- `coef0 = 0.0`: kernel parameter (see above)
-
-- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
-
 - `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin
   errors and a lower bound of the fraction of support vectors. Denoted
   `ν` in the cited paper.
@@ -982,6 +1003,8 @@ for the original C implementation.
 NuSVC
 
 
+# ## EpsilonSVR
+
 """
 $(MMI.doc_header(EpsilonSVR))
 
@@ -1014,24 +1037,6 @@ Train the machine using `fit!(mach, rows=...)`.
 
 $DOC_KERNEL
 
-  - `LIBSVM.Kernel.Linear`: `x1'*x2`
-
-  - `LIBSVM.Kernel.Polynomial`: `gamma*x1'*x2 + coef0)^degree`
-
-  - `LIBSVM.Kernel.RadialBasis`: `exp(-gamma*norm(x1, x2)^2)`
-
-  - `LIBSVM.Kernel.Sigmoid`: `tanh(gamma*x1'*x2 + coef0)`
-
-- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
-  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
-  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
-  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
-  appears in the report (see below).
-
-- `coef0 = 0.0`: kernel parameter (see above)
-
-- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
-
 - `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
   cited reference; for greater regularization, decrease `cost`
 
@@ -1111,6 +1116,9 @@ for the original C implementation.
 """
 EpsilonSVR
 
+
+# ## NuSVR
+
 """
 $(MMI.doc_header(NuSVR))
 
@@ -1144,16 +1152,6 @@ Train the machine using `fit!(mach, rows=...)`.
 
 - $DOC_KERNEL
 
-- `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
-  `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
-  number of features (columns of `X`).  If `gamma==0.0` then `gamma =
-  1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used
-  appears in the report (see below).
-
-- `coef0 = 0.0`: kernel parameter (see above)
-
-- `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)
-
 - `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
   cited reference; for greater regularization, decrease `cost`
 
@@ -1233,4 +1231,188 @@ for the original C implementation.
 """
 NuSVR
 
+
+# ## OneClassSVM
+
+"""
+$(MMI.doc_header(OneClassSVM))
+
+$DOC_ALGORITHM
+
+This model is an outlier detection model delivering raw scores based
+on the decision function of a support vector machine. Like the
+[`NuSVC`](@ref) classifier, it uses the `nu` reparameterization of the
+`cost` parameter appearing in standard support vector classification
+[`SVC`](@ref).
+
+To extract
+normalized scores ("probabilities") wrap the model using
+`ProbabilisticDetector` from
+[OutlierDection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For
+threshold-based classification, wrap the probabilistic model using
+MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.
+
+
+# Training data
+
+In MLJ or MLJBase, bind an instance `model` to data with:
+
+    mach = machine(model, X, y)
+
+where
+
+- `X`: any table of input features (eg, a `DataFrame`) whose columns
+  each have `Continuous` element scitype; check column scitypes with
+  `schema(X)`
+
+Train the machine using `fit!(mach, rows=...)`.
+
+
+# Hyper-parameters
+
+$DOC_KERNEL
+
+- `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin
+  errors and a lower bound of the fraction of support vectors. Denoted
+  `ν` in the cited paper.
+
+- `cachesize=200.0` cache memory size in MB
+
+- `tolerance=0.001`: tolerance for the stopping criterion
+
+- `shrinking=true`: whether to use shrinking heuristics
+
+
+# Operations
+
+- `transform(mach, Xnew)`: return predictions of the target, given
+  features `Xnew` having the same scitype as `X` above.
+
+
+# Fitted parameters
+
+The fields of `fitted_params(mach)` are:
+
+- `libsvm_model`: the trained model object created by the LIBSVM.jl package
+
+- `orientation`: this equals `1` if the decision function for
+  `libsvm_model` is increasing with increasing outlierness, and `-1`
+  if it is decreasing instead. Correspondingly, the `libsvm_model` attaches
+
+
+# Report
+
+The fields of `report(mach)` are:
+
+- `gamma`: actual value of the kernel parameter `gamma` used in training
+
+
+# Examples
+
+## Generating raw scores for outlierness
+
+
+```
+using MLJ
+import LIBSVM
+import StableRNGs.StableRNG
+
+OneClassSVM = @load OneClassSVM pkg=LIBSVM           # model type
+model = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance
+
+rng = StableRNG(123)
+Xmatrix = randn(rng, 5, 3)
+Xmatrix[1, 1] = 100.0
+X = MLJ.table(Xmatrix)
+
+mach = machine(model, X) |> fit!
+
+# training scores (outliers have larger scores):
+julia> report(mach).scores
+5-element Vector{Float64}:
+  6.711689156091755e-7
+ -6.740101976655081e-7
+ -6.711632439648446e-7
+ -6.743015858874887e-7
+ -6.745393717880104e-7
+
+# scores for new data:
+Xnew = MLJ.table(rand(rng, 2, 3))
+
+julia> transform(mach, rand(rng, 2, 3))
+2-element Vector{Float64}:
+ -6.746293022511047e-7
+ -6.744289265348623e-7
+```
+
+## Generating probabilistic predictions of outlierness
+
+Continuing the previous example:
+
+```
+using OutlierDetection
+pmodel = ProbabilisticDetector(model)
+pmach = machine(pmodel, X) |> fit!
+
+# probabilistic predictions on new data:
+
+julia> y_prob = predict(pmach, Xnew)
+2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:
+ UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)
+ UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)
+
+# probabilities for outlierness:
+
+julia> pdf.(y_prob, "outlier")
+2-element Vector{Float64}:
+ 9.572583265925801e-5
+ 0.0
+
+
+## Outlier classification using a probability threshold:
+
+Continuing the previous example:
+
+```
+dmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)
+dmach = machine(dmodel, X) |> fit!
+
+## Using a user-defined kernel
+
+
+```
+k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
+model = OneClassSVM(kernel=k)
+mach = machine(model, X, y) |> fit!
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "virginica"
+ "virginica"
+ "virginica"
+```
+
+## Incorporating class weights
+
+In either scenario above, we can do:
+
+```julia
+weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1)
+mach = machine(model, X, y, weights) |> fit!
+
+julia> yhat = predict(mach, Xnew)
+3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
+ "versicolor"
+ "versicolor"
+ "versicolor"
+```
+
+See also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref),
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
+for the original C implementation.
+
+"""
+OneClassSVM
+
 end # module
diff --git a/test/runtests.jl b/test/runtests.jl
index 8c1ea0c..5e9cd3b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -30,6 +30,14 @@ import LIBSVM
                  MLJLIBSVMInterface.encode(Dict('d'=> 1.0), v))
 end
 
+@test "orientation of scores" begin
+    scores = [1, 1, 1, 1, 0]
+    @test MLJLIBSVMInterface.orientation(scores) == -1
+    @test MLJLIBSVMInterface.orientation(-scores) == 1
+    @test MLJLIBSVMInterface.orientation(scores .+ 100) == -1
+    @test MLJLIBSVMInterface.orientation(-scores .+ 100) == 1
+end
+
 
 ## CLASSIFIERS
 

From 2969832e2a6d93714ce301dbed3e845fce34310e Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 28 Mar 2022 14:48:10 +1300
Subject: [PATCH 12/18] finish new docstrings (spell checked)

---
 src/MLJLIBSVMInterface.jl | 115 ++++++++++++++++++--------------------
 1 file changed, 54 insertions(+), 61 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index f7ce3b6..dc63f4c 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -485,7 +485,6 @@ function MMI.fit(model::OneClassSVM, verbosity::Int, X)
     decision_scores = view(decision_matrix, 1, :)
     orientation = MLJLIBSVMInterface.orientation(decision_scores)
     scores = orientation*decision_scores
-    @show p
 
     fitresult = (libsvm_model, orientation)
     report = (gamma=model.gamma, scores=scores)
@@ -537,9 +536,9 @@ MMI.supports_class_weights(::Type{<:SVC}) = true
 
 MMI.human_name(::Type{<:LinearSVC}) = "linear support vector classifier"
 MMI.human_name(::Type{<:SVC}) = "C-support vector classifier"
-MMI.human_name(::Type{<:NuSVC}) = "nu-support vector classifier"
-MMI.human_name(::Type{<:NuSVR}) = "nu-support vector regressor"
-MMI.human_name(::Type{<:EpsilonSVR}) = "epsilon-support vector regressor"
+MMI.human_name(::Type{<:NuSVC}) = "ν-support vector classifier"
+MMI.human_name(::Type{<:NuSVR}) = "ν-support vector regressor"
+MMI.human_name(::Type{<:EpsilonSVR}) = "ϵ-support vector regressor"
 MMI.human_name(::Type{<:OneClassSVM}) = "$one-class support vector machine"
 
 MMI.package_name(::Type{<:SVM}) = "LIBSVM"
@@ -575,10 +574,10 @@ const DOC_ALGORITHM_LINEAR = "Reference for algorithm and core C-library: "*
 
 const DOC_KERNEL = """
 - `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
-  called, as in `kernel(x1, x2)` (where `x1` and `x2` are vectors
-  whose length matches the number of columns of the training data `X`,
-  see examples below) or one of the following built-in kernels from
-  the LIBSVM package:
+  called, as in `kernel(x1, x2)`, or one of the built-in kernels from
+  the LIBSVM.jl package listed below.  Here `x1` and `x2` are vectors
+  whose lengths match the number of columns of the training data `X`,
+  see examples below.
 
   - `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`
 
@@ -588,7 +587,7 @@ const DOC_KERNEL = """
 
   - `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`
 
-  where `gamma`, `coef0`, `degree` are other hyper-parameters.
+  Here `gamma`, `coef0`, `degree` are other hyper-parameters.
 
 - `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
   `gamma = 1/nfeatures` is used in training, where `nfeatures` is the
@@ -832,7 +831,7 @@ julia> yhat = predict(mach, Xnew)
  "virginica"
 ```
 
-## Using a user-defined kernel
+## User-defined kernels
 
 ```
 k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
@@ -877,7 +876,7 @@ $(MMI.doc_header(NuSVC))
 
 $DOC_ALGORITHM
 
-This model is simply a reparameterization of the [`SVC`](@ref)
+This model is simply a re-parameterization of the [`SVC`](@ref)
 classifier, where `nu` replaces `cost`, and is therefore mathematically
 equivalent to it.
 
@@ -965,7 +964,7 @@ julia> yhat = predict(mach, Xnew)
  "virginica"
 ```
 
-## Using a user-defined kernel
+## User-defined kernels
 
 ```
 k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
@@ -979,21 +978,6 @@ julia> yhat = predict(mach, Xnew)
  "virginica"
 ```
 
-## Incorporating class weights
-
-In either scenario above, we can do:
-
-```julia
-weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1)
-mach = machine(model, X, y, weights) |> fit!
-
-julia> yhat = predict(mach, Xnew)
-3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
- "versicolor"
- "versicolor"
- "versicolor"
-```
-
 See also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref),
 [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
 [documentation](https://github.com/cjlin1/libsvm/blob/master/README)
@@ -1011,7 +995,7 @@ $(MMI.doc_header(EpsilonSVR))
 $DOC_ALGORITHM
 
 This model is an adaptation of the classifier `SVC` to regression, but
-has an additional parameter `epsilon` (denoted `ϵ` in the cited
+has an additional parameter `epsilon` (denoted ``ϵ`` in the cited
 reference).
 
 
@@ -1094,7 +1078,7 @@ julia> yhat = predict(mach, Xnew)
  -0.2482949812264707
 ```
 
-## Using a user-defined kernel
+## User-defined kernels
 
 ```
 k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
@@ -1124,7 +1108,7 @@ $(MMI.doc_header(NuSVR))
 
 $DOC_ALGORITHM
 
-This model is a reparameterization of `EpsilonSCV` in which the
+This model is a re-parameterization of `EpsilonSVR` in which the
 `epsilon` hyper-parameter is replaced with a new parameter `nu`
 (denoted ``ν`` in the cited reference) which attempts to control the
 number of support vectors directly.
@@ -1209,7 +1193,7 @@ julia> yhat = predict(mach, Xnew)
  -0.2076156254934889
 ```
 
-## Using a user-defined kernel
+## User-defined kernels
 
 ```
 k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
@@ -1241,14 +1225,14 @@ $DOC_ALGORITHM
 
 This model is an outlier detection model delivering raw scores based
 on the decision function of a support vector machine. Like the
-[`NuSVC`](@ref) classifier, it uses the `nu` reparameterization of the
+[`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the
 `cost` parameter appearing in standard support vector classification
 [`SVC`](@ref).
 
 To extract
 normalized scores ("probabilities") wrap the model using
 `ProbabilisticDetector` from
-[OutlierDection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For
+[OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For
 threshold-based classification, wrap the probabilistic model using
 MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.
 
@@ -1285,8 +1269,13 @@ $DOC_KERNEL
 
 # Operations
 
-- `transform(mach, Xnew)`: return predictions of the target, given
-  features `Xnew` having the same scitype as `X` above.
+- `transform(mach, Xnew)`: return scores for outlierness, given
+  features `Xnew` having the same scitype as `X` above. The greater
+  the score, the more likely it is an outlier. This score is based on
+  the SVM decision function. For normalized scores, wrap `model` using
+  `ProbabilisticDetector` from OutlierDetection.jl and call `predict`
+  instead, and for threshold-based classification, wrap again using
+  `BinaryThresholdPredictor`. See the examples below.
 
 
 # Fitted parameters
@@ -1297,7 +1286,11 @@ The fields of `fitted_params(mach)` are:
 
 - `orientation`: this equals `1` if the decision function for
   `libsvm_model` is increasing with increasing outlierness, and `-1`
-  if it is decreasing instead. Correspondingly, the `libsvm_model` attaches
+  if it is decreasing instead. Correspondingly, the `libsvm_model`
+  attaches `true` to outliers in the first case, and `false` in the
+  second. (The `scores` given in the MLJ report and generated by
+  `MLJ.transform` already correct for this ambiguity, which is
+  therefore only an issue for users directly accessing `libsvm_model`.)
 
 
 # Report
@@ -1368,6 +1361,14 @@ julia> pdf.(y_prob, "outlier")
  9.572583265925801e-5
  0.0
 
+# raw scores are still available using `transform`:
+
+julia> transform(pmach, Xnew)
+2-element Vector{Float64}:
+ 9.572583265925801e-5
+ 0.0
+```
+
 
 ## Outlier classification using a probability threshold:
 
@@ -1377,40 +1378,32 @@ Continuing the previous example:
 dmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)
 dmach = machine(dmodel, X) |> fit!
 
-## Using a user-defined kernel
+julia> yhat = predict(dmach, Xnew)
+2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:
+ "normal"
+ "normal"
+```
+
+## User-defined kernels
 
+Continuing the first example:
 
 ```
 k(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`
 model = OneClassSVM(kernel=k)
-mach = machine(model, X, y) |> fit!
-
-julia> yhat = predict(mach, Xnew)
-3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
- "virginica"
- "virginica"
- "virginica"
-```
-
-## Incorporating class weights
-
-In either scenario above, we can do:
-
-```julia
-weights = Dict("virginica" => 1, "versicolor" => 20, "setosa" => 1)
-mach = machine(model, X, y, weights) |> fit!
+mach = machine(model, X) |> fit!
 
-julia> yhat = predict(mach, Xnew)
-3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:
- "versicolor"
- "versicolor"
- "versicolor"
+julia> yhat = transform(mach, Xnew)
+2-element Vector{Float64}:
+ -0.4825363352732942
+ -0.4848772169720227
 ```
 
-See also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref),
-[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
+See also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
 [documentation](https://github.com/cjlin1/libsvm/blob/master/README)
-for the original C implementation.
+for the original C implementation. For an alternative source of
+outlier detection models with an MLJ interface, see
+[OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).
 
 """
 OneClassSVM

From 76a4091917b105e9e7aeacf2af404f6f3d600a84 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 28 Mar 2022 15:06:26 +1300
Subject: [PATCH 13/18] bump 0.2.0

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 765e539..0fcb92a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJLIBSVMInterface"
 uuid = "61c7150f-6c77-4bb1-949c-13197eac2a52"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.1.4"
+version = "0.2.0"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

From ea27f1384cb14f081da0e2a77d1a333ca3b08f8c Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 28 Mar 2022 16:04:08 +1300
Subject: [PATCH 14/18] adapt tests to new re-implementation of OneClassSVM

---
 test/runtests.jl | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 5e9cd3b..85f037a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -30,7 +30,7 @@ import LIBSVM
                  MLJLIBSVMInterface.encode(Dict('d'=> 1.0), v))
 end
 
-@test "orientation of scores" begin
+@testset "orientation of scores" begin
     scores = [1, 1, 1, 1, 0]
     @test MLJLIBSVMInterface.orientation(scores) == -1
     @test MLJLIBSVMInterface.orientation(-scores) == 1
@@ -126,22 +126,36 @@ nurpred = MLJBase.predict(nu_regressor, fitresultRnu, selectrows(X, test));
 
 ## ANOMALY DETECTION
 
+N = 50
+rng = StableRNGs.StableRNG(123)
+Xmatrix = randn(rng, 2N, 3)
+
+# insert outliers at observation 1 and N:
+Xmatrix[1, 1] = 100.0
+Xmatrix[N, 3] = 200.0
+
+X = MLJBase.table(Xmatrix)
+
 oneclasssvm = OneClassSVM()
 
-fitresultoc, cacheoc, reportoc = MLJBase.fit(oneclasssvm, 1,
-                                             selectrows(X, train));
+fitresultoc, cacheoc, reportoc = MLJBase.fit(oneclasssvm, 1, X)
 
 fp = MLJBase.fitted_params(oneclasssvm, fitresultoc)
 @test fp.libsvm_model isa LIBSVM.SVM
 
-# output is CategoricalArray{Bool}
-ocpred = MLJBase.transform(oneclasssvm,
-                           fitresultoc,
-                           selectrows(X, test));
+training_scores = reportoc.scores
+scores = MLJBase.transform(oneclasssvm, fitresultoc, X)
+
+@test scores == training_scores
+
+# crude extraction of outliers from scores:
+midpoint = mean([minimum(scores), maximum(scores)])
+outlier_indices = filter(eachindex(scores)) do i
+    scores[i] .> midpoint
+end
+
+@test outlier_indices == [1, N]
 
-# test whether the proprotion of outliers corresponds to the `nu` parameter
-@test isapprox((length(train) - sum(MLJBase.transform(oneclasssvm, fitresultoc, selectrows(X, train)) .== true)) / length(train), oneclasssvm.nu, atol=0.005)
-@test isapprox((length(test) - sum(ocpred .== true))  / length(test), oneclasssvm.nu, atol=0.05)
 
 ## CONSTRUCTOR FAILS
 

From 1ac65f979127443fdcb285a056b9a99502114dab Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 28 Mar 2022 17:03:09 +1300
Subject: [PATCH 15/18] minor change to improve code readability

---
 src/MLJLIBSVMInterface.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index dc63f4c..b37f742 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -481,7 +481,7 @@ function MMI.fit(model::OneClassSVM, verbosity::Int, X)
                                    )
 
     # get orientation and training scores:
-    p, decision_matrix = LIBSVM.svmpredict(libsvm_model, Xmatrix)
+    _, decision_matrix = LIBSVM.svmpredict(libsvm_model, Xmatrix)
     decision_scores = view(decision_matrix, 1, :)
     orientation = MLJLIBSVMInterface.orientation(decision_scores)
     scores = orientation*decision_scores

From 39bfbc3d47b56b4476bd12897a89930652c172bc Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 31 Mar 2022 12:41:59 +1300
Subject: [PATCH 16/18] clarify definitions of `nu`

---
 src/MLJLIBSVMInterface.jl | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index b37f742..5cfc04b 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -564,13 +564,13 @@ const DOC_REFERENCE = "C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for
 
 const DOC_ALGORITHM = "Reference for algorithm and core C-library: $DOC_REFERENCE. "
 
-const DOC_REFERENCE_LINEAR = "Rong-En Fan et al (2008): \"LIBLINEAR: A Library for "*
+const DOC_REFERENCE2 = "Rong-En Fan et al (2008): \"LIBLINEAR: A Library for "*
     "Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. "*
     "Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf]"*
     "(https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf)"
 
 const DOC_ALGORITHM_LINEAR = "Reference for algorithm and core C-library: "*
-    "$DOC_REFERENCE_LINEAR. "
+    "$DOC_REFERENCE2. "
 
 const DOC_KERNEL = """
 - `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
@@ -583,7 +583,7 @@ const DOC_KERNEL = """
 
   - `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`
 
-  - `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1, x2)^2)`
+  - `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`
 
   - `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`
 
@@ -876,9 +876,10 @@ $(MMI.doc_header(NuSVC))
 
 $DOC_ALGORITHM
 
-This model is simply a re-parameterization of the [`SVC`](@ref)
-classifier, where `nu` replaces `cost`, and is therefore mathematically
-equivalent to it.
+This model is a re-parameterization of the [`SVC`](@ref) classifier,
+where `nu` replaces `cost`, and is therefore mathematically equivalent
+to it. The parameter `nu` allows more direct control over the number
+of support vectors (see under "Hyper-parameters below").
 
 
 # Training data
@@ -906,7 +907,10 @@ $DOC_KERNEL
 
 - `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin
   errors and a lower bound of the fraction of support vectors. Denoted
-  `ν` in the cited paper.
+  `ν` in the cited paper. Changing `nu` changes the thickness of the
+  margin (a neighborhood of the decision surface) and a margin error
+  is said to have occurred if a training observation lies on the wrong
+  side of the surface or within the margin.
 
 - `cachesize=200.0` cache memory size in MB
 
@@ -1139,9 +1143,12 @@ Train the machine using `fit!(mach, rows=...)`.
 - `cost=1.0` (range (0, `Inf`)): the parameter denoted ``C`` in the
   cited reference; for greater regularization, decrease `cost`
 
-- `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin
-  errors and a lower bound of the fraction of support vectors. Denoted
-  ``ν`` in the cited paper.
+- `nu=0.5` (range (0, 1]): An upper bound on the fraction of training
+  errors and a lower bound of the fraction of support vectors.
+  Denoted ``ν`` in the cited paper. Changing `nu` changes the
+  thickness of some neighborhood of the graph of the prediction
+  function (called a "tube" or "slab") and a training error is said to
+  occur when a data point `(x, y)` lies outside of that neighborhood.
 
 - `cachesize=200.0` cache memory size in MB
 
@@ -1258,7 +1265,10 @@ $DOC_KERNEL
 
 - `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin
   errors and a lower bound of the fraction of support vectors. Denoted
-  `ν` in the cited paper.
+  `ν` in the cited paper. Changing `nu` changes the thickness of the
+  margin (a neighborhood of the decision surface) and a margin error
+  is said to have occurred if a training observation lies on the wrong
+  side of the surface or within the margin.
 
 - `cachesize=200.0` cache memory size in MB
 

From 56a7bca369c14b57907e913410a6f4cf8a8bfcaa Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 31 Mar 2022 13:46:05 +1300
Subject: [PATCH 17/18] add warning about serialization and user-defined
 kernels

---
 src/MLJLIBSVMInterface.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index 5cfc04b..aa69845 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -572,6 +572,12 @@ const DOC_REFERENCE2 = "Rong-En Fan et al (2008): \"LIBLINEAR: A Library for "*
 const DOC_ALGORITHM_LINEAR = "Reference for algorithm and core C-library: "*
     "$DOC_REFERENCE2. "
 
+const DOC_SERIALIZABILITY = "Serialization of "*
+    "models with user-defined kernels comes with some restrictions. "*
+    "See [LIVSVM.jl issue"*
+    "91](https://github.com/JuliaML/LIBSVM.jl/issues/91]"*
+    "(https://github.com/JuliaML/LIBSVM.jl/issues/91)"
+
 const DOC_KERNEL = """
 - `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
   called, as in `kernel(x1, x2)`, or one of the built-in kernels from
@@ -587,7 +593,7 @@ const DOC_KERNEL = """
 
   - `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`
 
-  Here `gamma`, `coef0`, `degree` are other hyper-parameters.
+  Here `gamma`, `coef0`, `degree` are other hyper-parameters. $DOC_SERIALIZABILITY
 
 - `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then
   `gamma = 1/nfeatures` is used in training, where `nfeatures` is the

From 7b178644caaa0469f25442967fafef018a9daa66 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 1 Apr 2022 11:31:40 +1300
Subject: [PATCH 18/18] docstring tweaks

---
 src/MLJLIBSVMInterface.jl | 70 +++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 33 deletions(-)

diff --git a/src/MLJLIBSVMInterface.jl b/src/MLJLIBSVMInterface.jl
index aa69845..814d78a 100644
--- a/src/MLJLIBSVMInterface.jl
+++ b/src/MLJLIBSVMInterface.jl
@@ -575,15 +575,14 @@ const DOC_ALGORITHM_LINEAR = "Reference for algorithm and core C-library: "*
 const DOC_SERIALIZABILITY = "Serialization of "*
     "models with user-defined kernels comes with some restrictions. "*
     "See [LIVSVM.jl issue"*
-    "91](https://github.com/JuliaML/LIBSVM.jl/issues/91]"*
-    "(https://github.com/JuliaML/LIBSVM.jl/issues/91)"
+    "91](https://github.com/JuliaML/LIBSVM.jl/issues/91)"
 
 const DOC_KERNEL = """
 - `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be
   called, as in `kernel(x1, x2)`, or one of the built-in kernels from
   the LIBSVM.jl package listed below.  Here `x1` and `x2` are vectors
-  whose lengths match the number of columns of the training data `X`,
-  see examples below.
+  whose lengths match the number of columns of the training data `X` (see
+  "Examples" below).
 
   - `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`
 
@@ -729,10 +728,10 @@ julia> yhat = predict(mach, Xnew)
 ```
 
 
-See also the [`SVC`](@ref) and [`NuSVC`] classifiers, and
-[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-[documentation](https://github.com/cjlin1/liblinear/blob/master/README)
-for the original C implementation.
+See also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C
+implementation
+[documentation](https://github.com/cjlin1/liblinear/blob/master/README).
 
 """
 LinearSVC
@@ -819,7 +818,7 @@ The fields of `report(mach)` are:
 using MLJ
 import LIBSVM
 
-SVC = @load SVC pkg=LIBSVM               # model type
+SVC = @load SVC pkg=LIBSVM                   # model type
 model = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance
 
 X, y = @load_iris # table, vector
@@ -867,9 +866,9 @@ julia> yhat = predict(mach, Xnew)
 ```
 
 See also the classifiers [`NuSVC`](@ref) and [`LinearSVC`](@ref), and
-[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
-for the original C implementation.
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C
+implementation
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README).
 
 """
 SVC
@@ -883,9 +882,9 @@ $(MMI.doc_header(NuSVC))
 $DOC_ALGORITHM
 
 This model is a re-parameterization of the [`SVC`](@ref) classifier,
-where `nu` replaces `cost`, and is therefore mathematically equivalent
-to it. The parameter `nu` allows more direct control over the number
-of support vectors (see under "Hyper-parameters below").
+where `nu` replaces `cost`, and is mathematically equivalent to
+it. The parameter `nu` allows more direct control over the number of
+support vectors (see under "Hyper-parameters").
 
 
 # Training data
@@ -956,7 +955,7 @@ The fields of `report(mach)` are:
 using MLJ
 import LIBSVM
 
-NuSVC = @load NuSVC pkg=LIBSVM               # model type
+NuSVC = @load NuSVC pkg=LIBSVM                 # model type
 model = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance
 
 X, y = @load_iris # table, vector
@@ -989,9 +988,10 @@ julia> yhat = predict(mach, Xnew)
 ```
 
 See also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref),
-[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
-for the original C implementation.
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C
+implementation.
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README).
+
 
 """
 NuSVC
@@ -1035,8 +1035,11 @@ $DOC_KERNEL
   cited reference; for greater regularization, decrease `cost`
 
 - `epsilon=0.1` (range (0, `Inf`)): the parameter denoted ``ϵ`` in the
-  cited reference; `epsilon` is the thickness of the "penalty-free"
-  neighborhood of the decision surface.
+  cited reference; `epsilon` is the thickness of the penalty-free
+  neighborhood of the graph of the prediction function ("slab"
+  or "tube"). Specifically, a data point `(x, y)` incurs no training
+  loss unless it is outside this neighborhood; the further away it is
+  from the this neighborhood, the greater the loss penalty.
 
 - `cachesize=200.0` cache memory size in MB
 
@@ -1103,9 +1106,9 @@ julia> yhat = predict(mach, Xnew)
 ```
 
 See also [`NuSVR`](@ref),
-[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
-for the original C implementation.
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C
+implementation
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README).
 
 """
 EpsilonSVR
@@ -1153,8 +1156,8 @@ Train the machine using `fit!(mach, rows=...)`.
   errors and a lower bound of the fraction of support vectors.
   Denoted ``ν`` in the cited paper. Changing `nu` changes the
   thickness of some neighborhood of the graph of the prediction
-  function (called a "tube" or "slab") and a training error is said to
-  occur when a data point `(x, y)` lies outside of that neighborhood.
+  function ("tube" or "slab") and a training error is said to occur
+  when a data point `(x, y)` lies outside of that neighborhood.
 
 - `cachesize=200.0` cache memory size in MB
 
@@ -1191,7 +1194,7 @@ The fields of `report(mach)` are:
 using MLJ
 import LIBSVM
 
-NuSVR = @load NuSVR pkg=LIBSVM            # model type
+NuSVR = @load NuSVR pkg=LIBSVM                 # model type
 model = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance
 
 X, y = make_regression(rng=123) # table, vector
@@ -1221,9 +1224,9 @@ julia> yhat = predict(mach, Xnew)
 ```
 
 See also [`EpsilonSVR`](@ref),
-[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
-for the original C implementation.
+[LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C
+implementation
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README).
 
 """
 NuSVR
@@ -1416,9 +1419,10 @@ julia> yhat = transform(mach, Xnew)
 ```
 
 See also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the
-[documentation](https://github.com/cjlin1/libsvm/blob/master/README)
-for the original C implementation. For an alternative source of
-outlier detection models with an MLJ interface, see
+original C implementation
+[documentation](https://github.com/cjlin1/libsvm/blob/master/README). For
+an alternative source of outlier detection models with an MLJ
+interface, see
 [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).
 
 """