Skip to content

Commit

Permalink
support regression models
Browse files Browse the repository at this point in the history
  • Loading branch information
barucden committed May 4, 2024
1 parent 74cecf0 commit 6acc028
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 45 deletions.
103 changes: 58 additions & 45 deletions src/LIBLINEAR.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ const L2R_L2LOSS_SVR_DUAL = Cint(12)
const L2R_L1LOSS_SVR_DUAL = Cint(13)
const ONECLASS_SVM = Cint(21)

function is_regression_model(m)
solver = m.solver_type
return solver in (L2R_L2LOSS_SVR, L2R_L1LOSS_SVR_DUAL, L2R_L2LOSS_SVR_DUAL)
end

struct FeatureNode
index :: Cint
value :: Float64
Expand Down Expand Up @@ -257,65 +262,73 @@ function linear_train(
model
end

# predict
function linear_predict(
model :: LinearModel{T},
instances :: AbstractMatrix{U};
probability_estimates :: Bool=false,
verbose :: Bool=false) where {T, U<:Real}
set_print(verbose)
ninstances = size(instances, 2) # instances are in columns

size(instances, 1) != model.nr_feature &&
function predict_decision_values(model, instances, probability_estimates)
ninstances = size(instances, 2) # instances are in columns
if size(instances, 1) != model.nr_feature
error("""Model has $(model.nr_feature) features but
$(size(instances, 1)) provided (instances are in columns)""")
$(size(instances, 1)) provided (instances are in columns)""")
end

model.bias >= 0 &&
(instances = [instances; fill(model.bias, 1, ninstances)])
if model.bias >= 0
instances = [instances; fill(model.bias, 1, ninstances)]
end

m = Array{Model}(undef, 1)
m[1] = Model(Parameter(model.solver_type, .0, .0, Cint(0),
convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0,.0,
convert(Ptr{Float64}, C_NULL), Cint(0)),
model.nr_class, model.nr_feature, pointer(model.w),
pointer(model._labels), model.bias, model.rho)
m = Model(Parameter(model.solver_type, .0, .0, Cint(0),
convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0,.0,
convert(Ptr{Float64}, C_NULL), Cint(0)),
model.nr_class, model.nr_feature, pointer(model.w),
pointer(model._labels), model.bias, model.rho)

(nodes, nodeptrs) = instances2nodes(instances)

if model.solver_type == ONECLASS_SVM
# In this case we need to return inlier/outlier class labels
# which may not be of type `T`
class = Array{String}(undef, ninstances)
else
class = Array{T}(undef, ninstances)
end
w_number = Int(model.nr_class == 2 && model.solver_type != MCSVM_CS ?
1 : model.nr_class)
w_number = Int(model.nr_class == 2 && model.solver_type != MCSVM_CS ? 1 : model.nr_class)
decvalues = Array{Float64}(undef, w_number, ninstances)
for i = 1:ninstances
if probability_estimates
output = ccall((:predict_probability, liblinear), Float64, (Ptr{Cvoid}, Ptr{FeatureNode}, Ptr{Float64}),
pointer(m), nodeptrs[i], pointer(decvalues, w_number*(i-1)+1))
else
output = ccall((:predict_values, liblinear), Float64, (Ptr{Cvoid}, Ptr{FeatureNode}, Ptr{Float64}),
pointer(m), nodeptrs[i], pointer(decvalues, w_number*(i-1)+1))
end
output_int = round(Int,output)

# For one-class SVM, `predict_values` returns +/- 1
# corresponding to outliers or not. This doesn't seem to be documented,
# but the code clearly returns +/- 1:
# https://github.com/cjlin1/liblinear/blob/8dc206b782e07676dc0d00678bedd295ce85acf3/linear.cpp#L3295
# and that is the return from scipy as well.
if model.solver_type === ONECLASS_SVM
c = output_int == -1 ? "outlier" : output_int == 1 ? "inlier" : error("Unexpected output $output_int")
output = ccall((:predict_probability, liblinear), Float64,
(Ref{Model}, Ptr{FeatureNode}, Ptr{Float64}),
m, nodeptrs[i], pointer(decvalues, w_number*(i-1)+1))
else
c = model.labels[output_int]
output = ccall((:predict_values, liblinear), Float64,
(Ref{Model}, Ptr{FeatureNode}, Ptr{Float64}),
m, nodeptrs[i], pointer(decvalues, w_number*(i-1)+1))
end
class[i] = c
end

(class, decvalues)
return decvalues
end

# For one-class SVM, `predict_values` returns +/- 1
# corresponding to outliers or not. This doesn't seem to be documented,
# but the code clearly returns +/- 1:
# https://github.com/cjlin1/liblinear/blob/8dc206b782e07676dc0d00678bedd295ce85acf3/linear.cpp#L3295
# and that is the return from scipy as well.
oneclass_output(decval) = ifelse(only(decval) > 0, "inlier", "outlier")

# predict
function linear_predict(
model :: LinearModel{T},
instances :: AbstractMatrix{U};
probability_estimates :: Bool=false,
verbose :: Bool=false) where {T, U<:Real}
set_print(verbose)
ninstances = size(instances, 2) # instances are in columns

decvalues = predict_decision_values(model, instances, probability_estimates)

if is_regression_model(model)
return decvalues, decvalues
end

if model.solver_type === ONECLASS_SVM
classes = map(oneclass_output, eachcol(decvalues))
else
classes = map(decvals -> model.labels[argmax(decvals)],
eachcol(decvalues))
end

return classes, decvalues
end

end # module
9 changes: 9 additions & 0 deletions test/validation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,13 @@
@test (class .== labels[2:2:end]) == correct
end
end

@testset "Regression" begin
x = rand(1, 10000);
y = vec(2 .* x)
m = linear_train(y, x, solver_type=LIBLINEAR.L2R_L2LOSS_SVR, eps=1e-5)
y_ = first(linear_predict(m, x))
@test size(y, 1) == size(y_, 2) # TODO: provide better test
end

end

0 comments on commit 6acc028

Please sign in to comment.