Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions src/main/java/WordNet/Similarity/Similarity.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,51 @@
import WordNet.SynSet;
import WordNet.WordNet;

import java.util.AbstractMap;
import java.util.ArrayList;

public abstract class Similarity {
private static final String ROOT_KEY = "TUR10-0814560"; //Varlık - Hardcoded!

protected WordNet wordNet;
public abstract double computeSimilarity(SynSet synSet1, SynSet synSet2);

public Similarity(WordNet wordNet){
this.wordNet = wordNet;
}

public AbstractMap.SimpleEntry<String, Integer> findLCS(ArrayList<String> path1, ArrayList<String> path2, boolean autoSimulateRoots) {
if(autoSimulateRoots){
autoSimulateRoot(path1);
autoSimulateRoot(path2);
}
for (int i = 0; i < path1.size(); i++) {
String LCSid = path1.get(i);
if (path2.contains(LCSid)) {
return new AbstractMap.SimpleEntry<>(LCSid, path1.size() - i + 1);
}
}
return null;
}
public void autoSimulateRoot(ArrayList<String> path){
if(path.size() == 0) return;
String lastKey = path.get(path.size()-1);
if(lastKey != ROOT_KEY){
path.add(ROOT_KEY);
}
}

/*
Finds the length between the concept and the lcs.
lso (lowest super ordinate) = most specific common subsumer (lcs)
*/
protected float findLength(ArrayList<String> conceptPath, AbstractMap.SimpleEntry<String, Integer> lcs){
int len = 0;
for (String s : conceptPath) {
if(s == lcs.getKey()) return Float.valueOf(len);
len++;
}
throw new RuntimeException("Cannot compute the lengths. Given LCS should be extracted from the conceptPath. Two are unrelated. Try autoSimulateRoots.");
}

}
16 changes: 12 additions & 4 deletions src/main/java/WordNet/Similarity/WuPalmer.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import WordNet.SynSet;
import WordNet.WordNet;

import java.util.AbstractMap;
import java.util.ArrayList;

public class WuPalmer extends Similarity{
Expand All @@ -12,9 +13,16 @@ public WuPalmer(WordNet wordNet){
}

public double computeSimilarity(SynSet synSet1, SynSet synSet2) {
ArrayList<String> pathToRootOfSynSet1 = wordNet.findPathToRoot(synSet1);
ArrayList<String> pathToRootOfSynSet2 = wordNet.findPathToRoot(synSet2);
float LCSdepth = wordNet.findLCSdepth(pathToRootOfSynSet1, pathToRootOfSynSet2);
return 2 * LCSdepth / (pathToRootOfSynSet1.size() + pathToRootOfSynSet2.size());
ArrayList<String> path1 = wordNet.findPathToRoot(synSet1);
ArrayList<String> path2 = wordNet.findPathToRoot(synSet2);
AbstractMap.SimpleEntry<String, Integer> lcs = findLCS(path1,path2,true);
float lcsDepth = lcs.getValue();
if(lcsDepth == -1) return -1; //TODO: -1 is used for null returns. Should return nullable results
float c1len = findLength(path1,lcs);
float c2len = findLength(path2,lcs);

float num = 2 * lcsDepth;
float denom = c1len + c2len + num;
return num / denom;
}
}
49 changes: 49 additions & 0 deletions src/main/java/WordNet/TestWordNet.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
package WordNet;

import Dictionary.Pos;
import WordNet.Similarity.Similarity;
import WordNet.Similarity.WuPalmer;
import javafx.util.Pair;

import java.io.Console;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Scanner;

public class TestWordNet {
Expand Down Expand Up @@ -91,7 +96,51 @@ public static void transferHierarchy(WordNet source, WordNet destination){

}

public static void testSimilarityAlgortihms(){
//wordpairs
ArrayList<Pair<String,String>> wordpairs = new ArrayList<>();
wordpairs.add(new Pair<>("kedi","kedi"));
wordpairs.add(new Pair<>("varlık","varlık"));
wordpairs.add(new Pair<>("varlık","fiziksel varlık"));
wordpairs.add(new Pair<>("kedi","memeliler"));
wordpairs.add(new Pair<>("masa","varlık"));
wordpairs.add(new Pair<>("kedi","masa"));
wordpairs.add(new Pair<>("kalem","masa"));
wordpairs.add(new Pair<>("kedi","köpek"));
wordpairs.add(new Pair<>("kedi","hayvan"));

//wordpairs.add(new Pair<>("kedi","kedi"));
//wordpairs.add(new Pair<>("kedi","köpek"));
//wordpairs.add(new Pair<>("göz","göz"));
// wordpairs.add(new Pair<>("göz","gözlük"));
// wordpairs.add(new Pair<>("göz","gözleme"));
// wordpairs.add(new Pair<>("göz","gönül"));
// wordpairs.add(new Pair<>("kedi","uzay"));

//algorithms
WordNet wordnet = new WordNet();
ArrayList<Similarity> algortihms = new ArrayList<>();
algortihms.add(new WuPalmer(wordnet));

//results
for (Similarity algortihm : algortihms) {
System.out.println("------" + algortihm.toString() + "------");
for (Pair<String, String> wp : wordpairs) {
String w1 = wp.getKey();
String w2 = wp.getValue();
SynSet syn1 = wordnet.getSynSetWithLiteral (w1,1);
SynSet syn2 = wordnet.getSynSetWithLiteral (w2,1);
double simScore = algortihm.computeSimilarity(syn1,syn2);
System.out.println(w1 + " - " + w2 + " (" + simScore + " )");
}
System.out.println("\n");
}
}

public static void main(String[] args){
testSimilarityAlgortihms();
System.exit(-1);

WordNet turkish = new WordNet();
turkish.saveAsXml("deneme.xml");
//transferHierarchy(turkish, domain);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/WordNet/WordNet.java
Original file line number Diff line number Diff line change
Expand Up @@ -1241,7 +1241,7 @@ public int findPathLength(ArrayList<String> pathToRootOfSynSet1, ArrayList<Strin
}

/**
* Returns the depth of path.
* Returns the depth of the LCS (Lowest super ordinate(lso) == most specific common subsumer(lcs))
*
* @param pathToRootOfSynSet1 first list of Strings
* @param pathToRootOfSynSet2 second list of Strings
Expand Down Expand Up @@ -1279,7 +1279,7 @@ public String findLCSid(ArrayList<String> pathToRootOfSynSet1, ArrayList<String>
* @param pathToRootOfSynSet2 second list of Strings
* @return depth and ID of the LCS
*/
private SimpleEntry<String, Integer> findLCS(ArrayList<String> pathToRootOfSynSet1, ArrayList<String> pathToRootOfSynSet2) {
public SimpleEntry<String, Integer> findLCS(ArrayList<String> pathToRootOfSynSet1, ArrayList<String> pathToRootOfSynSet2) {
for (int i = 0; i < pathToRootOfSynSet1.size(); i++) {
String LCSid = pathToRootOfSynSet1.get(i);
if (pathToRootOfSynSet2.contains(LCSid)) {
Expand Down