-
Notifications
You must be signed in to change notification settings - Fork 0
/
spearman_correlation.go
97 lines (83 loc) · 2.87 KB
/
spearman_correlation.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package anomalia
import (
"errors"
"sort"
)
// SpearmanCorrelation holds the Spearman Correlation algorithm configuration.
// It is the non-parametric version of the Pearson correlation and it should be used
// when the time series distribution is unknown or not normally distributed.
//
// Spearman’s correlator returns a value from -1 to 1, where:
// - +1 = a perfect positive correlation between ranks
// - -1 = a perfect negative correlation between ranks
// - 0 = no correlation between ranks.
type SpearmanCorrelation struct {
current, target *TimeSeries
}
type rank struct{ x, y, xRank, yRank float64 }
// NewSpearmanCorrelation returns an instance of the spearman correlation struct.
func NewSpearmanCorrelation(current, target *TimeSeries) *SpearmanCorrelation {
return &SpearmanCorrelation{current, target}
}
// Run runs the spearman correlation on the current and target time series.
func (sc *SpearmanCorrelation) Run() float64 {
// Build up the ranks slice
ranks := make([]rank, sc.current.Size())
for index, currentValue := range sc.current.Values {
ranks[index] = rank{x: currentValue, y: sc.target.Values[index]}
}
// Sort the ranks by x
sort.Slice(ranks, func(i, j int) bool { return ranks[i].x < ranks[j].x })
// Rank the current series
for pos := 0; pos < len(ranks); pos++ {
ranks[pos].xRank = float64(pos) + 1
duplicateValues := []int{pos}
for nested, p := range ranks {
if ranks[pos].x == p.x {
if pos != nested {
duplicateValues = append(duplicateValues, nested)
}
}
}
sum := SumInts(duplicateValues)
avg := float64(sum+len(duplicateValues)) / float64(len(duplicateValues))
ranks[pos].xRank = avg
for index := 1; index < len(duplicateValues); index++ {
ranks[duplicateValues[index]].xRank = avg
}
pos += len(duplicateValues) - 1
}
// Sort the ranks by y
sort.Slice(ranks, func(i int, j int) bool { return ranks[i].y < ranks[j].y })
// Rank the target series
for pos := 0; pos < len(ranks); pos++ {
ranks[pos].yRank = float64(pos) + 1
duplicateValues := []int{pos}
for nested, p := range ranks {
if ranks[pos].y == p.y {
if pos != nested {
duplicateValues = append(duplicateValues, nested)
}
}
}
sum := SumInts(duplicateValues)
avg := float64(sum+len(duplicateValues)) / float64(len(duplicateValues))
ranks[pos].yRank = avg
for index := 1; index < len(duplicateValues); index++ {
ranks[duplicateValues[index]].yRank = avg
}
pos += len(duplicateValues) - 1
}
// Adapt both current and target series
for index, rank := range ranks {
sc.current.Values[index] = rank.xRank
sc.target.Values[index] = rank.yRank
}
return NewPearsonCorrelation(sc.current, sc.target).Run()
}
func (sc *SpearmanCorrelation) sanityCheck() error {
if sc.current.Size() < 3 || sc.current.Size() != sc.target.Size() {
return errors.New("current and/or target series have an invalid dimension")
}
return nil
}