Skip to content
This repository was archived by the owner on Feb 3, 2020. It is now read-only.

Commit 19c74cf

Browse files
committed
Merge branch 'release/v0.4'
2 parents 172f02b + f81a47f commit 19c74cf

File tree

8 files changed

+135
-103
lines changed

8 files changed

+135
-103
lines changed

.travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
language: julia
22
julia:
33
- release
4-
- nightly
4+
#- nightly
55
notifications:
66
email: false
77
script:

README.md

+6-10
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
Kd tree for Julia.
44

5-
[![Build Status](https://travis-ci.org/KristofferC/KDtree.jl.svg)](https://travis-ci.org/KristofferC/KDtree.jl) [![Coverage Status](https://coveralls.io/repos/KristofferC/KDtree.jl/badge.svg)](https://coveralls.io/r/KristofferC/KDtree.jl)
5+
[![Build Status](https://travis-ci.org/KristofferC/KDtree.jl.svg?branch=master)](https://travis-ci.org/KristofferC/KDtree.jl) [![Coverage Status](https://coveralls.io/repos/KristofferC/KDtree.jl/badge.svg)](https://coveralls.io/r/KristofferC/KDtree.jl)
66

77
Currently supports KNN-search and finding all points inside an hyper sphere centered at a given point. Currently only
88
uses Euclidean distance.
99

10-
Some care has been taken with regards to performance. For example the tree is not implemented as nodes pointing to other nodes but instead as an ensamble of densely packed arrays. This should give better cache locality. The negative aspect of this storage method is that the tree is immutable and new data can not be entered into the tree after it has been created.
10+
Some care has been taken with regards to performance. For example the tree is not implemented as nodes pointing to other nodes but instead as a collection of densely packed arrays. This should give better cache locality. The negative aspect of this storage method is that the tree is immutable and new data can not be entered into the tree after it has been created.
1111

12-
There are some benchmarks for the creation of the tree and the different searches in the benchmark folder.
12+
There are some benchmarks for the creation of the tree and the different searches in the benchmark folder.
1313

1414
Since this is a new project there are still some obvious improvements which are listed in the TODO list.
1515

@@ -64,11 +64,7 @@ gives both the indices and distances:
6464
### TODOs
6565
* Implement a leaf size argument where the sub tree stop splitting after
6666
only a certain number of nodes are left in the sub tree.
67-
* Add proper benchmarks, compare with others implementations.
67+
* Add proper benchmarks, compare with others implementations. Update: Partly done
6868
* Add other measures than Euclidean distance.
69-
* Use a bounded priority queue for storing the K best points in KNN instead of a linear array (should only matter for large K).
70-
* Proper investigation of memory allocations and where time is spent.
71-
* Throw errors at dimension mismatch in the functions etc.
72-
73-
74-
69+
* Use a bounded priority queue for storing the K best points in KNN instead of a linear array (should only matter for large K).
70+
Julias built in PQ is slower than a normal array

benchmark/bench_knn.jl

+9-26
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ using KDtree
33

44
function run_bench_knn()
55
dims = 3
6-
n_points = [10^i for i in 3:5]
6+
n_points = [10^i for i in 3:6]
77
ks = [1, 3, 10, 50, 100, 500]
88

9-
n_iters = 10
9+
n_iters = 100
1010

1111
times = fill(0.0, length(ks), length(n_points))
1212

@@ -33,28 +33,11 @@ end
3333
run_bench_knn()
3434

3535
#=
36-
2015-02-04 No bounds, type fixes
37-
[4.139509999999997e-6 6.266810000000003e-6 1.1824519999999996e-5 1.2813520000000002e-5
38-
6.926150000000001e-6 8.745540000000002e-6 1.5848950000000007e-5 1.9322909999999995e-5
39-
1.2340789999999995e-5 1.7301359999999997e-5 2.9695020000000008e-5 3.380342999999999e-5
40-
3.891018e-5 5.718188999999997e-5 8.192254999999999e-5 0.00010137918999999997
41-
6.649346000000001e-5 9.620713999999998e-5 0.00014474611999999997 0.00016459152000000002
42-
0.0004432140299999998 0.0006229702799999999 0.0008457700199999998 0.0009439302800000001]
43-
44-
45-
2015-02-03: ArrayViews:
46-
[1.3996e-5 2.1771e-5 5.1316e-5 4.4474e-5
47-
2.0837e-5 2.5502e-5 5.9402e-5 6.8732e-5
48-
3.2034e-5 4.1364e-5 8.7393e-5 8.2106e-5
49-
8.5838e-5 8.7394e-5 0.000183183 0.000170121
50-
0.000156437 0.000143063 0.000305409 0.000290792
51-
0.000695723 0.001162545 0.001638387 0.001731067]
52-
53-
2015-02-02:
54-
[2.3015e-5 2.1771e-5 7.0288e-5 6.0958e-5
55-
2.7368e-5 3.5143e-5 8.957e-5 7.7752e-5
56-
6.0647e-5 5.0072e-5 0.000104499 0.000111029
57-
0.000194691 0.000216772 0.000293591 0.000318472
58-
0.000385027 0.000359835 0.000650316 0.000582517
59-
0.003119404 0.005313561 0.006453713 0.006805152]
36+
2015-02-06 (removed old bench since they used a too volatile method)
37+
[2.54688e-6 7.427709999999996e-6 4.546329999999998e-6 4.439879999999999e-6
38+
3.1702599999999993e-6 3.8126800000000002e-6 5.36744e-6 1.047631e-5
39+
9.24091e-6 2.3145960000000002e-5 2.319162000000002e-5 2.1059090000000006e-5
40+
4.556972999999999e-5 5.366267e-5 6.215093000000004e-5 7.335329000000001e-5
41+
4.875141000000001e-5 5.9763709999999965e-5 0.00013036107000000003 0.00018395148999999993
42+
0.0007265081999999999 0.0011446717200000003 0.0012804762699999998 0.00121552762]
6043
=#

benchmark/bench_query_ball.jl

+6-15
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ function run_bench_query_ball()
99
times = fill(0.0, length(rs), length(n_points))
1010

1111
# Compile it
12-
n_iters = 1000
12+
n_iters = 2000
1313

1414
tree = KDTree(randn(2,2))
1515
query_ball_point(tree, zeros(2), 0.1)
@@ -33,18 +33,9 @@ run_bench_query_ball()
3333

3434

3535
#=
36-
37-
38-
2015-02-03: ArrayViews + no sqrt
39-
[1.1196e-5 1.9593e-5 7.5885e-5
40-
1.5239e-5 4.1986e-5 0.000167011
41-
1.9905e-5 7.8374e-5 0.00041986
42-
2.6125e-5 0.000124713 0.000603665]
43-
44-
45-
2015-02-03:
46-
[2.1149e-5 3.2966e-5 8.3661e-5
47-
2.146e-5 5.1628e-5 0.000229523
48-
2.7369e-5 0.000116005 0.000453138
49-
4.012e-5 0.000204643 0.000789959]
36+
2015-02-06: (removed old inaccurate results)
37+
[6.202539999999983e-6 3.595097900000001e-5 0.00021109113900000007
38+
1.3630627000000005e-5 7.9238053e-5 0.0006688952190000003
39+
5.310767400000003e-5 0.0001541270569999998 0.001355720711000002
40+
3.338056700000001e-5 0.00026903708300000007 0.0022353193569999976]
5041
=#

src/kd_tree.jl

+16-13
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,6 @@ function _k_nearest_neighbour{T <: FloatingPoint}(tree::KDTree,
250250
best_dists::Vector{T},
251251
index::Int=1)
252252

253-
min_d, max_d = get_min_max_distance(tree.hyper_recs[index], point)
254-
if min_d > best_dists[k]
255-
return
256-
end
257253
if is_leaf_node(tree, index)
258254
dist_d = euclidean_distance(get_point(tree, index), point)
259255
if dist_d <= best_dists[k] # Closer than the currently k closest.
@@ -272,15 +268,23 @@ function _k_nearest_neighbour{T <: FloatingPoint}(tree::KDTree,
272268
return
273269
end
274270

275-
dist_l = get_min_max_distance(tree.hyper_recs[get_left_node(index)], point)
276-
dist_r = get_min_max_distance(tree.hyper_recs[get_right_node(index)], point)
277-
if dist_l < dist_r
278-
_k_nearest_neighbour(tree, point, k, best_idxs, best_dists, get_left_node(index))
279-
_k_nearest_neighbour(tree, point, k, best_idxs, best_dists, get_right_node(index))
271+
if point[tree.split_dims[index]] < tree.split_vals[index]
272+
close = get_left_node(index)
273+
far = get_right_node(index)
280274
else
281-
_k_nearest_neighbour(tree, point, k, best_idxs, best_dists, get_right_node(index))
282-
_k_nearest_neighbour(tree, point, k,best_idxs, best_dists, get_left_node(index))
275+
far = get_left_node(index)
276+
close = get_right_node(index)
283277
end
278+
279+
_k_nearest_neighbour(tree, point, k, best_idxs, best_dists, close)
280+
281+
# Only go far node if it sphere crosses hyperplane
282+
if abs2(point[tree.split_dims[index]] - tree.split_vals[index]) < best_dists[k]
283+
_k_nearest_neighbour(tree, point, k, best_idxs, best_dists, far)
284+
end
285+
286+
return
287+
284288
end
285289
# Returns the indices for all points in the tree inside a
286290
# hypersphere of a given point with a given radius
@@ -370,5 +374,4 @@ function select_spec!{T <: FloatingPoint}(v::AbstractVector, k::Int, lo::Int,
370374
end
371375
end
372376
return
373-
end
374-
377+
end

test/runtests.jl

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
module TestKDtree
22
using FactCheck
33
using Base.Test
4+
using Base.Collections
5+
46
using KDtree
57

6-
FactCheck.onlystats(true)
8+
#FactCheck.onlystats(true)
79

8-
include("test_kd_tree.jl")
10+
include("test_knn.jl")
11+
include("test_query_ball.jl")
912

10-
FactCheck.exitstatus()
13+
#FactCheck.exitstatus()
1114

1215
end #module

test/test_knn.jl

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
facts("KDtree") do
2+
3+
context("KDtree.nearest_neighbour") do
4+
5+
dim_data = 3
6+
size_data = 1000
7+
data = rand(dim_data, size_data)
8+
9+
tree = KDTree(data)
10+
11+
# Checking that we find existing points
12+
for i = 1:50
13+
n = rand(1:size_data)
14+
idx, dist = k_nearest_neighbour(tree, data[:,n], 1)
15+
@fact n => idx[1]
16+
@fact KDtree.euclidean_distance(data[:,idx[1]], data[:, n]) => roughly(0.0)
17+
end
18+
19+
# Check results vs brute force
20+
21+
pq = PriorityQueue{Int, Float64}(Base.Order.Reverse)
22+
23+
k = 3
24+
for i in 1:k
25+
enqueue!(pq, -i, Inf)
26+
end
27+
28+
dim_data = 3
29+
size_data = 500
30+
data = rand(dim_data, size_data)
31+
tree = KDTree(data)
32+
p = rand(dim_data)
33+
34+
# Brute force
35+
for n in 1:size_data
36+
d = sqrt(KDtree.euclidean_distance(data[:,n], p))
37+
if d <= peek(pq)[2] # Closer than the currently k closest.
38+
dequeue!(pq)
39+
enqueue!(pq, n, d)
40+
end
41+
end
42+
43+
idx, dist = k_nearest_neighbour(tree, p, k)
44+
45+
for i in 1:length(idx)
46+
@fact idx[i] in keys(pq) => true
47+
end
48+
49+
# 8 node rectangle
50+
data = [0.0 0.0 0.0 0.5 0.5 1.0 1.0 1.0;
51+
0.0 0.5 1.0 0.0 1.0 0.0 0.5 1.0]
52+
tree = KDTree(data)
53+
54+
idxs, dists = k_nearest_neighbour(tree, [0.8, 0.8], 1)
55+
@fact idxs[1] => 8 # Should be closest to top right corner
56+
@fact sqrt(0.2^2 + 0.2^2) => roughly(dists[1])
57+
58+
idxs, dists = k_nearest_neighbour(tree, [0.1, 0.8], 3)
59+
@fact idxs => [3, 2, 5]
60+
61+
@fact_throws k_nearest_neighbour(tree, [0.1, 0.8], 10) # k > n_points
62+
63+
@fact_throws k_nearest_neighbour(tree, [0.1], 10) # n_dim != trees dim
64+
end #context
65+
66+
end # facts
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,5 @@
11
facts("KDtree") do
22

3-
context("KDtree.nearest_neighbour") do
4-
5-
dim_data = 3
6-
size_data = 1000
7-
data = randn(dim_data, size_data )
8-
9-
tree = KDTree(data)
10-
11-
# Checking that we find existing points
12-
for i = 1:20
13-
n = rand(1:size_data)
14-
idx, dist = k_nearest_neighbour(tree, data[:,n], 1)
15-
@fact n => idx[1]
16-
@fact KDtree.euclidean_distance(data[:,idx[1]], data[:, n]) => roughly(0.0)
17-
end
18-
19-
20-
21-
# 8 node rectangle
22-
data = [0.0 0.0 0.0 0.5 0.5 1.0 1.0 1.0;
23-
0.0 0.5 1.0 0.0 1.0 0.0 0.5 1.0]
24-
tree = KDTree(data)
25-
26-
idxs, dists = k_nearest_neighbour(tree, [0.8, 0.8], 1)
27-
@fact idxs[1] => 8 # Should be closest to top right corner
28-
@fact sqrt(0.2^2 + 0.2^2) => roughly(dists[1])
29-
30-
idxs, dists = k_nearest_neighbour(tree, [0.1, 0.8], 3)
31-
@fact idxs => [3, 2, 5]
32-
33-
@fact_throws k_nearest_neighbour(tree, [0.1, 0.8], 10) # k > n_points
34-
35-
@fact_throws k_nearest_neighbour(tree, [0.1], 10) # n_dim != trees dim
36-
end #context
37-
383
context("KDtree.ball_query") do
394

405
data = [0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0;
@@ -56,8 +21,32 @@ facts("KDtree") do
5621
@fact idxs => [1, 2, 3, 4, 5, 6, 7, 8] #
5722

5823
@fact_throws query_ball_poin(tree, [0.1], 1.0) # n_dim != trees dim
24+
25+
26+
idx = Int[]
27+
dim_data = 3
28+
size_data = 100
29+
data = rand(dim_data, size_data)
30+
tree = KDTree(data)
31+
p = zeros(dim_data)
32+
r = 0.3
33+
# Brute force
34+
for n in 1:size_data
35+
d = sqrt(KDtree.euclidean_distance([data[:,n]], p))
36+
if d <= r # Closer than the currently k closest.
37+
push!(idx, n)
38+
end
39+
end
40+
41+
q_idxs = query_ball_point(tree, p, r)
42+
43+
for i in 1:length(idx)
44+
@fact q_idxs[i] in idx => true
45+
end
46+
5947
end #context
6048

49+
6150
context("KDtree.yolo_testing") do
6251

6352
# Tests that the n-points in a random hyper sphere around
@@ -82,4 +71,5 @@ facts("KDtree") do
8271
@fact idxs_ball[i] in idxs_knn => true
8372
end
8473
end #context
74+
8575
end # facts

0 commit comments

Comments
 (0)