-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathJaroWinklerDistanceStrategy.cs
54 lines (32 loc) · 1.6 KB
/
JaroWinklerDistanceStrategy.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
using Gsemac.Core;
using System;
namespace Gsemac.Text {
internal class JaroWinklerDistanceStrategy :
IStringDistanceStrategy {
// Public members
public double ComputeDistance(string first, string second, bool normalizeResult = false) {
// An implementation for Jaro–Winkler can be found here: https://stackoverflow.com/a/19165108/5383169 (leebickmtu)
// Another implementation with addditional commentary can be found here: https://www.geeksforgeeks.org/jaro-and-jaro-winkler-similarity/ (andrew1234)
double jaroSimilarity = new JaroDistanceStrategy().ComputeSimilarity(first, second);
if (jaroSimilarity <= WeightThreshold)
return 1.0 - jaroSimilarity;
// Calculate the length of the common prefix up to NumChars.
int maxPrefixLength = Math.Min(NumChars, Math.Min(first.Length, second.Length));
int prefixLength = 0;
for (int i = 0; i < maxPrefixLength; ++i) {
if (first[i] != second[i])
break;
++prefixLength;
}
if (prefixLength <= 0)
return 1.0 - jaroSimilarity;
double jaroWinklerSimilarity = jaroSimilarity + ScaleFactor * prefixLength * (1.0 - jaroSimilarity);
return 1.0 - jaroWinklerSimilarity;
}
// Private members
// The following constants are taken from Winkler's paper.
private const double WeightThreshold = 0.7;
private const int NumChars = 4;
private const double ScaleFactor = 0.1;
}
}