From bdbb5d7128da4b64a94034eb27c4156c083d916a Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:24:29 +0100 Subject: [PATCH 1/7] add github actions CI & update tagbot --- .github/workflows/TagBot.yml | 26 +++++++++++++++++++++-- .github/workflows/ci.yml | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index dd911f3..90dc100 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,11 +1,33 @@ name: TagBot on: - schedule: - - cron: 0 0 * * * + issue_comment: + types: + - created + workflow_dispatch: + inputs: + lookback: + default: 3 +permissions: + actions: read + checks: read + contents: write + deployments: read + issues: read + discussions: read + packages: read + pages: read + pull-requests: read + repository-projects: read + security-events: read + statuses: read jobs: TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' runs-on: ubuntu-latest steps: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} + # Edit the following line to reflect the actual name of the GitHub Secret containing your private key + ssh: ${{ secrets.DOCUMENTER_KEY }} + # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..edddcf6 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,41 @@ +name: CI +on: + push: + branches: + - main + tags: ['*'] + pull_request: + workflow_dispatch: +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.3' + - '1' + os: + - ubuntu-latest + arch: + - x64 + - x86 + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v1 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v3 + with: + files: lcov.info From a2aafccc1b96e6deed75735948e514ca656f4ee5 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:27:18 +0100 Subject: [PATCH 2/7] rm old files, update badge --- .travis.yml | 32 -------------------------------- README.md | 6 +++--- REQUIRE | 1 - appveyor.yml | 32 -------------------------------- 4 files changed, 3 insertions(+), 68 deletions(-) delete mode 100644 .travis.yml delete mode 100644 REQUIRE delete mode 100644 appveyor.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a7efde0..0000000 --- a/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -## Documentation: http://docs.travis-ci.com/user/languages/julia/ -language: julia - -os: - - linux - - osx - -julia: - - 1.3 - - 1.4 - - nightly - -notifications: - email: false - -before_script: - - export PATH=$HOME/.local/bin:$PATH - -git: - depth: 99999999 - -script: - - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - - pwd - - ls - - julia --project --check-bounds=yes -e 'using UUIDs; write("Project.toml", replace(read("Project.toml", String), r"uuid = .*?\n" =>"uuid = \"$(uuid4())\"\n"));import Pkg; Pkg.build(); Pkg.test(; coverage=true)' - -after_success: - # push coverage results to Coveralls - - julia --project -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' - # push coverage results to Codecov - - julia --project -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' diff --git a/README.md b/README.md index f0221a1..97ac69c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # LIBLINEAR -[![Build Status](https://travis-ci.org/innerlee/LIBLINEAR.jl.svg?branch=master)](https://travis-ci.org/innerlee/LIBLINEAR.jl) -[![Build status](https://ci.appveyor.com/api/projects/status/x9jq6w5mji1u6eff?svg=true)](https://ci.appveyor.com/project/innerlee/liblinear-jl) +[![CI](https://github.com/innerlee/LIBLINEAR.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/innerlee/LIBLINEAR.jl/actions/workflows/ci.yml) + Julia bindings for [LIBLINEAR](https://www.csie.ntu.edu.tw/~cjlin/liblinear/). @@ -15,7 +15,7 @@ iris = dataset("datasets", "iris") labels = iris.Species # First dimension of input data is features; second is instances -data = convert(Matrix, iris[:, 1:4])' +data = Matrix(iris[:, 1:4])' # Train SVM on half of the data using default parameters. See the linear_train # function in LIBLINEAR.jl for optional parameter settings. diff --git a/REQUIRE b/REQUIRE deleted file mode 100644 index 73052b1..0000000 --- a/REQUIRE +++ /dev/null @@ -1 +0,0 @@ -julia 1.0.0 diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 4203710..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,32 +0,0 @@ -environment: - matrix: - - julia_version: 1 - - julia_version: nightly - -platform: - - x86 # 32-bit - - x64 # 64-bit - -branches: - only: - - master - - /release-.*/ - -notifications: - - provider: Email - on_build_success: false - on_build_failure: false - on_build_status_changed: false - -install: - - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1")) - -build_script: - - echo "%JL_BUILD_SCRIPT%" - - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%" - -test_script: - - pwd - - dir - - echo "%JL_TEST_SCRIPT%" - - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%" From 8d07b49c80bec1719ed76d2f84c447291ef3b370 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:27:35 +0100 Subject: [PATCH 3/7] set branch name to `master` to match setup --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index edddcf6..2da12ab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: CI on: push: branches: - - main + - master tags: ['*'] pull_request: workflow_dispatch: From 45e4c09f066fc3be84d023d99a914fb48cf339ce Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:36:41 +0100 Subject: [PATCH 4/7] add more OSes --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2da12ab..724a6f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,8 @@ jobs: - '1' os: - ubuntu-latest + - macos-latest + - windows-latest arch: - x64 - x86 From 6b84b8f2d2a3ef7f397f7e18a07df33d12f1e952 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 20 Nov 2023 02:04:45 +0100 Subject: [PATCH 5/7] updates for liblinear 247 --- Project.toml | 1 + src/LIBLINEAR.jl | 36 +++++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index 7941c3b..adee8b9 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ liblinear_jll = "275f1f90-abd2-5ca1-9ad8-abd4e3d66eb7" [compat] julia = "1.3" +liblinear_jll = "~2.47.0" [extras] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" diff --git a/src/LIBLINEAR.jl b/src/LIBLINEAR.jl index 4900a6b..be35079 100644 --- a/src/LIBLINEAR.jl +++ b/src/LIBLINEAR.jl @@ -1,4 +1,3 @@ -__precompile__(true) module LIBLINEAR using SparseArrays @@ -22,7 +21,7 @@ const L2R_LR_DUAL = Cint(7) const L2R_L2LOSS_SVR = Cint(11) const L2R_L2LOSS_SVR_DUAL = Cint(12) const L2R_L1LOSS_SVR_DUAL = Cint(13) - +const ONECLASS_SVM = Cint(21) struct FeatureNode index :: Cint @@ -38,14 +37,16 @@ struct Problem end struct Parameter - solver_type :: Cint - eps :: Float64 - C :: Float64 - nr_weight :: Cint - weight_label :: Ptr{Cint} - weight :: Ptr{Float64} - p :: Float64 - init_sol :: Ptr{Float64} # Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC + solver_type :: Cint + eps :: Float64 # stopping tolerance + C :: Float64 + nr_weight :: Cint + weight_label :: Ptr{Cint} + weight :: Ptr{Float64} + p :: Float64 + nu :: Float64 # one-class SVM only + init_sol :: Ptr{Float64} # Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC + regularize_bias :: Cint end struct Model @@ -55,6 +56,7 @@ struct Model w :: Ptr{Float64} label :: Ptr{Cint} # label of each class bias :: Float64 + rho :: Float64 # one-class SVM only end # model in julia @@ -66,6 +68,7 @@ mutable struct LinearModel{T} _labels :: Vector{Cint} # internal label names labels :: Vector{T} bias :: Float64 + rho :: Float64 end # helper @@ -182,7 +185,9 @@ function linear_train( eps :: Real=Inf, C :: Real=1.0, p :: Real=0.1, + nu :: Real=0.5, init_sol :: Ptr{Float64}=convert(Ptr{Float64}, C_NULL), # initial solutions for solvers L2R_LR, L2R_L2LOSS_SVC + regularize_bias :: Cint = Cint(1), bias :: Real=-1.0, verbose :: Bool=false ) where {T, U<:Real} @@ -201,6 +206,7 @@ function linear_train( L2R_L2LOSS_SVR => 0.001, L2R_L2LOSS_SVR_DUAL => 0.1, L2R_L1LOSS_SVR_DUAL => 0.1, + ONECLASS_SVM => 0.01, )[solver_type]) nfeatures = size(instances, 1) # instances are in columns @@ -212,7 +218,7 @@ function linear_train( param = Array{Parameter}(undef, 1) param[1] = Parameter(solver_type, eps, C, Cint(length(weights)), - pointer(weight_labels), pointer(weights), p, init_sol) + pointer(weight_labels), pointer(weights), p, nu, init_sol, regularize_bias) # construct problem (nodes, nodeptrs) = instances2nodes(instances) @@ -238,7 +244,7 @@ function linear_train( w = copy(unsafe_wrap(Array, m.w, w_dim * w_number)) _labels = copy(unsafe_wrap(Array, m.label, m.nr_class)) model = LinearModel(solver_type, Int(m.nr_class), Int(m.nr_feature), - w, _labels, reverse_labels, m.bias) + w, _labels, reverse_labels, m.bias, m.rho) ccall((:free_model_content, liblinear), Cvoid, (Ptr{Model},), ptr) model @@ -262,10 +268,10 @@ function linear_predict( m = Array{Model}(undef, 1) m[1] = Model(Parameter(model.solver_type, .0, .0, Cint(0), - convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0, - convert(Ptr{Float64}, C_NULL)), + convert(Ptr{Cint}, C_NULL), convert(Ptr{Float64}, C_NULL), .0,.0, + convert(Ptr{Float64}, C_NULL), Cint(0)), model.nr_class, model.nr_feature, pointer(model.w), - pointer(model._labels), model.bias) + pointer(model._labels), model.bias, model.rho) (nodes, nodeptrs) = instances2nodes(instances) class = Array{T}(undef, ninstances) From 33fe0dcf4d57bf16e90ba2351a5ebba849e4dfc7 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 20 Nov 2023 02:05:29 +0100 Subject: [PATCH 6/7] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index adee8b9..f136bbe 100644 --- a/Project.toml +++ b/Project.toml @@ -2,7 +2,7 @@ name = "LIBLINEAR" uuid = "2d691ee1-e668-5016-a719-b2531b85e0f5" authors = ["innerlee"] repo = "https://github.com/innerlee/LIBLINEAR.jl.git" -version = "0.6.0" +version = "0.6.1" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" From ccbc4b3527a0c141916d8b2321ef598b200ddaa6 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 20 Nov 2023 03:27:05 +0100 Subject: [PATCH 7/7] get oneclass working / not segfaulting --- src/LIBLINEAR.jl | 36 +++++++++++++++++++++++++++++++----- test/validation.jl | 5 +++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/LIBLINEAR.jl b/src/LIBLINEAR.jl index be35079..3891013 100644 --- a/src/LIBLINEAR.jl +++ b/src/LIBLINEAR.jl @@ -237,14 +237,21 @@ function linear_train( (Ptr{Problem}, Ptr{Parameter}), problem, param) m = unsafe_wrap(Array, ptr, 1)[1] - # extract w & _labels w_dim = Int(m.nr_feature + (bias >= 0 ? 1 : 0)) w_number = Int(m.nr_class == 2 && solver_type != MCSVM_CS ? 1 : m.nr_class) w = copy(unsafe_wrap(Array, m.w, w_dim * w_number)) - _labels = copy(unsafe_wrap(Array, m.label, m.nr_class)) + + # Fill in labels vector + # using `_labels = copy(unsafe_wrap(Array, m.label, m.nr_class))` segfaults + # when using `ONECLASS_SVM`. With this approach, we are just left with + # `_labels` being -1's, which seems better. + _labels = Vector{Cint}(undef, m.nr_class) + _labels .= -1 # initialize to some invalid state + ccall((:get_labels, liblinear), Cvoid, (Ptr{Model},Ptr{Vector{Cint}}), ptr, pointer(_labels)) + rho = solver_type == ONECLASS_SVM ? m.rho : 0.0 model = LinearModel(solver_type, Int(m.nr_class), Int(m.nr_feature), - w, _labels, reverse_labels, m.bias, m.rho) + w, _labels, reverse_labels, m.bias, rho) ccall((:free_model_content, liblinear), Cvoid, (Ptr{Model},), ptr) model @@ -274,7 +281,14 @@ function linear_predict( pointer(model._labels), model.bias, model.rho) (nodes, nodeptrs) = instances2nodes(instances) - class = Array{T}(undef, ninstances) + + if model.solver_type == ONECLASS_SVM + # In this case we need to return inlier/outlier class labels + # which may not be of type `T` + class = Array{String}(undef, ninstances) + else + class = Array{T}(undef, ninstances) + end w_number = Int(model.nr_class == 2 && model.solver_type != MCSVM_CS ? 1 : model.nr_class) decvalues = Array{Float64}(undef, w_number, ninstances) @@ -286,7 +300,19 @@ function linear_predict( output = ccall((:predict_values, liblinear), Float64, (Ptr{Cvoid}, Ptr{FeatureNode}, Ptr{Float64}), pointer(m), nodeptrs[i], pointer(decvalues, w_number*(i-1)+1)) end - class[i] = model.labels[round(Int,output)] + output_int = round(Int,output) + + # For one-class SVM, `predict_values` returns +/- 1 + # corresponding to outliers or not. This doesn't seem to be documented, + # but the code clearly returns +/- 1: + # https://github.com/cjlin1/liblinear/blob/8dc206b782e07676dc0d00678bedd295ce85acf3/linear.cpp#L3295 + # and that is the return from scipy as well. + if model.solver_type === ONECLASS_SVM + c = output_int == -1 ? "outlier" : output_int == 1 ? "inlier" : error("Unexpected output $output_int") + else + c = model.labels[output_int] + end + class[i] = c end (class, decvalues) diff --git a/test/validation.jl b/test/validation.jl index 9758309..60c4ee1 100644 --- a/test/validation.jl +++ b/test/validation.jl @@ -6,6 +6,11 @@ correct[[30,42,43,65,67]] .= false @test (class .== labels[2:2:end]) == correct + @testset "one-class" begin + model = linear_train(labels[1:2:end], inst[:, 1:2:end]; verbose=true, solver_type=Cint(21)) + (class, decvalues) = linear_predict(model, inst[:, 2:2:end], verbose=true) + @test all(in(("outlier", "inlier")), class) + end @testset "Sparse matrix" begin model = linear_train(labels[1:2:end], sparse(inst[:, 1:2:end]); verbose=true, solver_type=Cint(0)) GC.gc()