From 6acc028cc040e8b18e67f53377df5efba1a3f717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Denis=20Baru=C4=8Di=C4=87?= Date: Sat, 4 May 2024 21:00:54 +0200 Subject: [PATCH] support regression models --- src/LIBLINEAR.jl | 103 +++++++++++++++++++++++++-------------------- test/validation.jl | 9 ++++ 2 files changed, 67 insertions(+), 45 deletions(-) diff --git a/src/LIBLINEAR.jl b/src/LIBLINEAR.jl index 3891013..a9c066c 100644 --- a/src/LIBLINEAR.jl +++ b/src/LIBLINEAR.jl @@ -23,6 +23,11 @@ const L2R_L2LOSS_SVR_DUAL = Cint(12) const L2R_L1LOSS_SVR_DUAL = Cint(13) const ONECLASS_SVM = Cint(21) +function is_regression_model(m) + solver = m.solver_type + return solver in (L2R_L2LOSS_SVR, L2R_L1LOSS_SVR_DUAL, L2R_L2LOSS_SVR_DUAL) +end + struct FeatureNode index :: Cint value :: Float64 @@ -257,65 +262,73 @@ function linear_train( model end -# predict -function linear_predict( - model :: LinearModel{T}, - instances :: AbstractMatrix{U}; - probability_estimates :: Bool=false, - verbose :: Bool=false) where {T, U<:Real} - set_print(verbose) - ninstances = size(instances, 2) # instances are in columns - size(instances, 1) != model.nr_feature && +function predict_decision_values(model, instances, probability_estimates) + ninstances = size(instances, 2) # instances are in columns + if size(instances, 1) != model.nr_feature error("""Model has $(model.nr_feature) features but - $(size(instances, 1)) provided (instances are in columns)""") + $(size(instances, 1)) provided (instances are in columns)""") + end - model.bias >= 0 && - (instances = [instances; fill(model.bias, 1, ninstances)]) + if model.bias >= 0 + instances = [instances; fill(model.bias, 1, ninstances)] + end - m = Array{Model}(undef, 1) - m[1] = Model(Parameter(model.solver_type, .0, .0, Cint(0), - convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0,.0, - convert(Ptr{Float64}, C_NULL), Cint(0)), - model.nr_class, model.nr_feature, pointer(model.w), - pointer(model._labels), model.bias, model.rho) + m = Model(Parameter(model.solver_type, .0, .0, Cint(0), + convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0,.0, + convert(Ptr{Float64}, C_NULL), Cint(0)), + model.nr_class, model.nr_feature, pointer(model.w), + pointer(model._labels), model.bias, model.rho) (nodes, nodeptrs) = instances2nodes(instances) - if model.solver_type == ONECLASS_SVM - # In this case we need to return inlier/outlier class labels - # which may not be of type `T` - class = Array{String}(undef, ninstances) - else - class = Array{T}(undef, ninstances) - end - w_number = Int(model.nr_class == 2 && model.solver_type != MCSVM_CS ? - 1 : model.nr_class) + w_number = Int(model.nr_class == 2 && model.solver_type != MCSVM_CS ? 1 : model.nr_class) decvalues = Array{Float64}(undef, w_number, ninstances) for i = 1:ninstances if probability_estimates - output = ccall((:predict_probability, liblinear), Float64, (Ptr{Cvoid}, Ptr{FeatureNode}, Ptr{Float64}), - pointer(m), nodeptrs[i], pointer(decvalues, w_number*(i-1)+1)) - else - output = ccall((:predict_values, liblinear), Float64, (Ptr{Cvoid}, Ptr{FeatureNode}, Ptr{Float64}), - pointer(m), nodeptrs[i], pointer(decvalues, w_number*(i-1)+1)) - end - output_int = round(Int,output) - - # For one-class SVM, `predict_values` returns +/- 1 - # corresponding to outliers or not. This doesn't seem to be documented, - # but the code clearly returns +/- 1: - # https://github.com/cjlin1/liblinear/blob/8dc206b782e07676dc0d00678bedd295ce85acf3/linear.cpp#L3295 - # and that is the return from scipy as well. - if model.solver_type === ONECLASS_SVM - c = output_int == -1 ? "outlier" : output_int == 1 ? "inlier" : error("Unexpected output $output_int") + output = ccall((:predict_probability, liblinear), Float64, + (Ref{Model}, Ptr{FeatureNode}, Ptr{Float64}), + m, nodeptrs[i], pointer(decvalues, w_number*(i-1)+1)) else - c = model.labels[output_int] + output = ccall((:predict_values, liblinear), Float64, + (Ref{Model}, Ptr{FeatureNode}, Ptr{Float64}), + m, nodeptrs[i], pointer(decvalues, w_number*(i-1)+1)) end - class[i] = c end - (class, decvalues) + return decvalues +end + +# For one-class SVM, `predict_values` returns +/- 1 +# corresponding to outliers or not. This doesn't seem to be documented, +# but the code clearly returns +/- 1: +# https://github.com/cjlin1/liblinear/blob/8dc206b782e07676dc0d00678bedd295ce85acf3/linear.cpp#L3295 +# and that is the return from scipy as well. +oneclass_output(decval) = ifelse(only(decval) > 0, "inlier", "outlier") + +# predict +function linear_predict( + model :: LinearModel{T}, + instances :: AbstractMatrix{U}; + probability_estimates :: Bool=false, + verbose :: Bool=false) where {T, U<:Real} + set_print(verbose) + ninstances = size(instances, 2) # instances are in columns + + decvalues = predict_decision_values(model, instances, probability_estimates) + + if is_regression_model(model) + return decvalues, decvalues + end + + if model.solver_type === ONECLASS_SVM + classes = map(oneclass_output, eachcol(decvalues)) + else + classes = map(decvals -> model.labels[argmax(decvals)], + eachcol(decvalues)) + end + + return classes, decvalues end end # module diff --git a/test/validation.jl b/test/validation.jl index 60c4ee1..8731c60 100644 --- a/test/validation.jl +++ b/test/validation.jl @@ -38,4 +38,13 @@ @test (class .== labels[2:2:end]) == correct end end + + @testset "Regression" begin + x = rand(1, 10000); + y = vec(2 .* x) + m = linear_train(y, x, solver_type=LIBLINEAR.L2R_L2LOSS_SVR, eps=1e-5) + y_ = first(linear_predict(m, x)) + @test size(y, 1) == size(y_, 2) # TODO: provide better test + end + end