diff --git a/src/main/java/WordNet/Similarity/Similarity.java b/src/main/java/WordNet/Similarity/Similarity.java index 7077473..c4cd151 100644 --- a/src/main/java/WordNet/Similarity/Similarity.java +++ b/src/main/java/WordNet/Similarity/Similarity.java @@ -3,11 +3,51 @@ import WordNet.SynSet; import WordNet.WordNet; +import java.util.AbstractMap; +import java.util.ArrayList; + public abstract class Similarity { + private static final String ROOT_KEY = "TUR10-0814560"; //Varlık - Hardcoded! + protected WordNet wordNet; public abstract double computeSimilarity(SynSet synSet1, SynSet synSet2); public Similarity(WordNet wordNet){ this.wordNet = wordNet; } + + public AbstractMap.SimpleEntry findLCS(ArrayList path1, ArrayList path2, boolean autoSimulateRoots) { + if(autoSimulateRoots){ + autoSimulateRoot(path1); + autoSimulateRoot(path2); + } + for (int i = 0; i < path1.size(); i++) { + String LCSid = path1.get(i); + if (path2.contains(LCSid)) { + return new AbstractMap.SimpleEntry<>(LCSid, path1.size() - i + 1); + } + } + return null; + } + public void autoSimulateRoot(ArrayList path){ + if(path.size() == 0) return; + String lastKey = path.get(path.size()-1); + if(lastKey != ROOT_KEY){ + path.add(ROOT_KEY); + } + } + + /* + Finds the length between the concept and the lcs. + lso (lowest super ordinate) = most specific common subsumer (lcs) + */ + protected float findLength(ArrayList conceptPath, AbstractMap.SimpleEntry lcs){ + int len = 0; + for (String s : conceptPath) { + if(s == lcs.getKey()) return Float.valueOf(len); + len++; + } + throw new RuntimeException("Cannot compute the lengths. Given LCS should be extracted from the conceptPath. Two are unrelated. Try autoSimulateRoots."); + } + } diff --git a/src/main/java/WordNet/Similarity/WuPalmer.java b/src/main/java/WordNet/Similarity/WuPalmer.java index c214f2b..3dddcab 100644 --- a/src/main/java/WordNet/Similarity/WuPalmer.java +++ b/src/main/java/WordNet/Similarity/WuPalmer.java @@ -3,6 +3,7 @@ import WordNet.SynSet; import WordNet.WordNet; +import java.util.AbstractMap; import java.util.ArrayList; public class WuPalmer extends Similarity{ @@ -12,9 +13,16 @@ public WuPalmer(WordNet wordNet){ } public double computeSimilarity(SynSet synSet1, SynSet synSet2) { - ArrayList pathToRootOfSynSet1 = wordNet.findPathToRoot(synSet1); - ArrayList pathToRootOfSynSet2 = wordNet.findPathToRoot(synSet2); - float LCSdepth = wordNet.findLCSdepth(pathToRootOfSynSet1, pathToRootOfSynSet2); - return 2 * LCSdepth / (pathToRootOfSynSet1.size() + pathToRootOfSynSet2.size()); + ArrayList path1 = wordNet.findPathToRoot(synSet1); + ArrayList path2 = wordNet.findPathToRoot(synSet2); + AbstractMap.SimpleEntry lcs = findLCS(path1,path2,true); + float lcsDepth = lcs.getValue(); + if(lcsDepth == -1) return -1; //TODO: -1 is used for null returns. Should return nullable results + float c1len = findLength(path1,lcs); + float c2len = findLength(path2,lcs); + + float num = 2 * lcsDepth; + float denom = c1len + c2len + num; + return num / denom; } } diff --git a/src/main/java/WordNet/TestWordNet.java b/src/main/java/WordNet/TestWordNet.java index a6fdd25..43b9379 100644 --- a/src/main/java/WordNet/TestWordNet.java +++ b/src/main/java/WordNet/TestWordNet.java @@ -1,10 +1,15 @@ package WordNet; import Dictionary.Pos; +import WordNet.Similarity.Similarity; +import WordNet.Similarity.WuPalmer; +import javafx.util.Pair; +import java.io.Console; import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; +import java.util.Locale; import java.util.Scanner; public class TestWordNet { @@ -91,7 +96,51 @@ public static void transferHierarchy(WordNet source, WordNet destination){ } + public static void testSimilarityAlgortihms(){ + //wordpairs + ArrayList> wordpairs = new ArrayList<>(); + wordpairs.add(new Pair<>("kedi","kedi")); + wordpairs.add(new Pair<>("varlık","varlık")); + wordpairs.add(new Pair<>("varlık","fiziksel varlık")); + wordpairs.add(new Pair<>("kedi","memeliler")); + wordpairs.add(new Pair<>("masa","varlık")); + wordpairs.add(new Pair<>("kedi","masa")); + wordpairs.add(new Pair<>("kalem","masa")); + wordpairs.add(new Pair<>("kedi","köpek")); + wordpairs.add(new Pair<>("kedi","hayvan")); + + //wordpairs.add(new Pair<>("kedi","kedi")); + //wordpairs.add(new Pair<>("kedi","köpek")); + //wordpairs.add(new Pair<>("göz","göz")); +// wordpairs.add(new Pair<>("göz","gözlük")); +// wordpairs.add(new Pair<>("göz","gözleme")); +// wordpairs.add(new Pair<>("göz","gönül")); +// wordpairs.add(new Pair<>("kedi","uzay")); + + //algorithms + WordNet wordnet = new WordNet(); + ArrayList algortihms = new ArrayList<>(); + algortihms.add(new WuPalmer(wordnet)); + + //results + for (Similarity algortihm : algortihms) { + System.out.println("------" + algortihm.toString() + "------"); + for (Pair wp : wordpairs) { + String w1 = wp.getKey(); + String w2 = wp.getValue(); + SynSet syn1 = wordnet.getSynSetWithLiteral (w1,1); + SynSet syn2 = wordnet.getSynSetWithLiteral (w2,1); + double simScore = algortihm.computeSimilarity(syn1,syn2); + System.out.println(w1 + " - " + w2 + " (" + simScore + " )"); + } + System.out.println("\n"); + } + } + public static void main(String[] args){ + testSimilarityAlgortihms(); + System.exit(-1); + WordNet turkish = new WordNet(); turkish.saveAsXml("deneme.xml"); //transferHierarchy(turkish, domain); diff --git a/src/main/java/WordNet/WordNet.java b/src/main/java/WordNet/WordNet.java index 2d806c7..7f1d9bf 100644 --- a/src/main/java/WordNet/WordNet.java +++ b/src/main/java/WordNet/WordNet.java @@ -1241,7 +1241,7 @@ public int findPathLength(ArrayList pathToRootOfSynSet1, ArrayList pathToRootOfSynSet1, ArrayList * @param pathToRootOfSynSet2 second list of Strings * @return depth and ID of the LCS */ - private SimpleEntry findLCS(ArrayList pathToRootOfSynSet1, ArrayList pathToRootOfSynSet2) { + public SimpleEntry findLCS(ArrayList pathToRootOfSynSet1, ArrayList pathToRootOfSynSet2) { for (int i = 0; i < pathToRootOfSynSet1.size(); i++) { String LCSid = pathToRootOfSynSet1.get(i); if (pathToRootOfSynSet2.contains(LCSid)) {