Skip to content
This repository has been archived by the owner on Oct 8, 2020. It is now read-only.

Commit

Permalink
Merge branch 'release/0.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickwestphal committed Dec 14, 2017
2 parents 55ace88 + 121f3cd commit 953c902
Show file tree
Hide file tree
Showing 16 changed files with 111 additions and 24 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# SANSA OWL
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/net.sansa-stack/sansa-owl-parent_2.11/badge.svg)](https://maven-badges.herokuapp.com/maven-central/net.sansa-stack/sansa-owl-parent_2.11)
[![Build Status](https://ci.aksw.org/jenkins/job/SANSA%20OWL%20Layer/job/develop/badge/icon)](https://ci.aksw.org/jenkins/job/SANSA%20OWL%20Layer/job/develop/)
[![Twitter](https://img.shields.io/twitter/follow/SANSA_Stack.svg?style=social)](https://twitter.com/SANSA_Stack)

## Description
SANSA OWL is a library to read OWL files into [Spark](https://spark.apache.org) or [Flink](https://flink.apache.org). It allows files to reside in HDFS as well as in a local file system and distributes them across Spark RDDs/Datasets or Flink DataSets.
Expand Down Expand Up @@ -37,7 +40,7 @@ ObjectProperty: bar:prop
Characteristics:
Asymmetric
```
or parsed [OWL API](http://owlapi.sourceforge.net/) axiom objects. We call these intermediate string-based entities 'expressions' and the corresponding distributed data structure 'expressions dataset'. The final data structure holding OWL API axiom objects is called 'axiom dataset'.

Expand Down
34 changes: 28 additions & 6 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>net.sansa-stack</groupId>
<artifactId>sansa-owl-parent_2.11</artifactId>
<version>0.2.0</version>
<version>0.3.0</version>
<packaging>pom</packaging>
<name>OWL - Parent</name>

Expand Down Expand Up @@ -63,13 +63,13 @@
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scala.version>2.11.8</scala.version>
<scala.version>2.11.11</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<spark.version>2.1.1</spark.version>
<flink.version>1.3.0</flink.version>
<spark.version>2.2.1</spark.version>
<flink.version>1.3.2</flink.version>
<hadoop.common.version>2.7.0</hadoop.common.version>
<hadoop.mapreduce-client.version>2.3.0</hadoop.mapreduce-client.version>
<owlapi.version>5.1.0</owlapi.version>
<hadoop.mapreduce-client.version>2.7.0</hadoop.mapreduce-client.version>
<owlapi.version>5.1.3</owlapi.version>
</properties>


Expand All @@ -93,6 +93,12 @@
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- For serialization of final Java collections as used in OWL API -->
<dependency>
<groupId>de.javakaffee</groupId>
<artifactId>kryo-serializers</artifactId>
<version>0.42</version>
</dependency>

<!-- Apache Flink dependencies -->
<dependency>
Expand Down Expand Up @@ -159,6 +165,22 @@
<artifactId>owlapi-parsers</artifactId>
<version>${owlapi.version}</version>
</dependency>
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
<version>4.0</version>
</dependency>
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-assistedinject</artifactId>
<version>4.0</version>
</dependency>
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<version>4.0</version>
</dependency>


<!-- MLlib dependencies -->
<dependency>
Expand Down
4 changes: 2 additions & 2 deletions sansa-owl-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
<parent>
<groupId>net.sansa-stack</groupId>
<artifactId>sansa-owl-parent_2.11</artifactId>
<version>0.2.0</version>
<version>0.3.0</version>
</parent>

<artifactId>sansa-owl-common_2.11</artifactId>
<version>0.2.0</version>
<version>0.3.0</version>

<dependencies>
<!-- Scala dependencies -->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package net.sansa_stack.owl.common

object OWLSyntax extends Enumeration {
val Functional, Manchester = Value
}
8 changes: 6 additions & 2 deletions sansa-owl-flink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
<parent>
<artifactId>sansa-owl-parent_2.11</artifactId>
<groupId>net.sansa-stack</groupId>
<version>0.2.0</version>
<version>0.3.0</version>
</parent>

<artifactId>sansa-owl-flink_2.11</artifactId>
<version>0.2.0</version>
<version>0.3.0</version>

<name>SANSA OWL layer - Flink</name>
<description>A library to read OWL files into Flink.</description>
Expand Down Expand Up @@ -42,6 +42,10 @@
<groupId>org.apache.flink</groupId>
<artifactId>flink-hadoop-compatibility_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>de.javakaffee</groupId>
<artifactId>kryo-serializers</artifactId>
</dependency>

<!-- OWL API dependencies -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
package net.sansa_stack.owl.flink.dataset

import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer
import org.apache.flink.api.scala.ExecutionEnvironment
import org.scalatest.FunSuite
import org.semanticweb.owlapi.model.{OWLAsymmetricObjectPropertyAxiom, OWLDataPropertyAssertionAxiom, OWLDisjointObjectPropertiesAxiom, OWLEquivalentObjectPropertiesAxiom, OWLFunctionalObjectPropertyAxiom, OWLInverseFunctionalObjectPropertyAxiom, OWLInverseObjectPropertiesAxiom, OWLIrreflexiveObjectPropertyAxiom, OWLNegativeDataPropertyAssertionAxiom, OWLNegativeObjectPropertyAssertionAxiom, OWLObjectPropertyAssertionAxiom, OWLObjectPropertyDomainAxiom, OWLObjectPropertyRangeAxiom, OWLReflexiveObjectPropertyAxiom, OWLSubObjectPropertyOfAxiom, OWLSubPropertyChainOfAxiom, OWLSymmetricObjectPropertyAxiom, OWLTransitiveObjectPropertyAxiom, SWRLRule, _}


class FunctionalSyntaxOWLAxiomsDataSetBuilderTest extends FunSuite {
lazy val env = ExecutionEnvironment.getExecutionEnvironment
val env = ExecutionEnvironment.getExecutionEnvironment
env.getConfig.addDefaultKryoSerializer(
Class.forName("java.util.Collections$UnmodifiableCollection"),
classOf[UnmodifiableCollectionsSerializer]
)

var _dataSet: OWLAxiomsDataSet = null
def dataSet = {
if (_dataSet == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package net.sansa_stack.owl.flink.dataset

import java.util.stream.Collectors

import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer
import org.apache.flink.api.scala.ExecutionEnvironment
import org.scalatest.FunSuite
import org.semanticweb.owlapi.apibinding.OWLManager
Expand All @@ -14,6 +15,10 @@ import scala.collection.JavaConverters._

class ManchesterSyntaxOWLAxiomsDataSetBuilderTest extends FunSuite {
lazy val env = ExecutionEnvironment.getExecutionEnvironment
env.getConfig.addDefaultKryoSerializer(
Class.forName("java.util.Collections$UnmodifiableCollection"),
classOf[UnmodifiableCollectionsSerializer]
)
val dataFactory = OWLManager.getOWLDataFactory
var _dataSet: OWLAxiomsDataSet = null
def dataSet = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package net.sansa_stack.owl.flink.dataset

import org.apache.flink.api.scala.ExecutionEnvironment
import org.scalatest.FunSuite
import org.semanticweb.owlapi.manchestersyntax.parser.ManchesterOWLSyntax


class ManchesterSyntaxOWLExpressionsDataSetBuilderTest extends FunSuite {
Expand Down
20 changes: 18 additions & 2 deletions sansa-owl-spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
<parent>
<groupId>net.sansa-stack</groupId>
<artifactId>sansa-owl-parent_2.11</artifactId>
<version>0.2.0</version>
<version>0.3.0</version>
</parent>

<artifactId>sansa-owl-spark_2.11</artifactId>
<version>0.2.0</version>
<version>0.3.0</version>

<name>SANSA OWL layer - SPARK</name>
<description>A library to read OWL files into Spark.</description>
Expand All @@ -37,6 +37,10 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
</dependency>
<dependency>
<groupId>de.javakaffee</groupId>
<artifactId>kryo-serializers</artifactId>
</dependency>

<!-- Hadoop dependencies (mainly used for InputFormat definitions) -->
<dependency>
Expand All @@ -61,6 +65,18 @@
<groupId>net.sourceforge.owlapi</groupId>
<artifactId>owlapi-apibinding</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-assistedinject</artifactId>
</dependency>
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
</dependency>

<!-- Testing dependencies -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package net.sansa_stack.owl.spark.dataset

import com.esotericsoftware.kryo.Kryo
import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer
import org.apache.spark.serializer.KryoRegistrator

class UnmodifiableCollectionKryoRegistrator extends KryoRegistrator {
override def registerClasses(kryo: Kryo): Unit = {
val cls = Class.forName("java.util.Collections$UnmodifiableCollection")
kryo.addDefaultSerializer(cls, new UnmodifiableCollectionsSerializer)
}
}
3 changes: 3 additions & 0 deletions sansa-owl-spark/src/test/resources/ont_manchester.owl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ AnnotationProperty: foo:ann

AnnotationProperty: foo:hasName


AnnotationProperty: comment


Datatype: bar:dtype1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ import org.semanticweb.owlapi.model._


class FunctionalSyntaxOWLAxiomsDatasetBuilderTest extends FunSuite with SharedSparkContext {
lazy val spark = SparkSession.builder().appName(sc.appName).master(sc.master).getOrCreate()
lazy val spark = SparkSession.builder().appName(sc.appName).master(sc.master)
.config(
"spark.kryo.registrator",
"net.sansa_stack.owl.spark.dataset.UnmodifiableCollectionKryoRegistrator")
.getOrCreate()

var _dataset: OWLAxiomsDataset = null
def dataset = {
if (_dataset == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,18 @@ import scala.collection.JavaConverters._


class ManchesterSyntaxOWLAxiomsDatasetBuilderTest extends FunSuite with SharedSparkContext {
lazy val spark = SparkSession.builder().appName(sc.appName).master(sc.master).getOrCreate()
lazy val spark = SparkSession.builder().appName(sc.appName).master(sc.master)
.config(
"spark.kryo.registrator",
"net.sansa_stack.owl.spark.dataset.UnmodifiableCollectionKryoRegistrator")
.getOrCreate()
val dataFactory = OWLManager.getOWLDataFactory
var _dataset: OWLAxiomsDataset = null
def dataset = {
if (_dataset == null) {
_dataset = ManchesterSyntaxOWLAxiomsDatasetBuilder.build(
spark, "src/test/resources/ont_manchester.owl")
// spark, "src/test/resources/ont_manchester.owl")
spark, "hdfs://localhost:54310/ont_manchester.owl")
_dataset.cache()
}
_dataset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ManchesterSyntaxOWLExpressionsDatasetBuilderTest extends FunSuite with Sha
}

test("The total number of frames should be correct") {
val total = 49
val total = 50
assert(dataset.count() == total)
}

Expand All @@ -31,7 +31,7 @@ class ManchesterSyntaxOWLExpressionsDatasetBuilderTest extends FunSuite with Sha
}

test("The number of AnnotationProperty frames should be correct") {
val expectedNumAnnoPropertyFrames = 7
val expectedNumAnnoPropertyFrames = 8
val actualNumAnnoPropertyFrames =
dataset.filter(_.trim.startsWith("AnnotationProperty:")).count()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class ManchesterSyntaxOWLAxiomsRDDBuilderTest extends FunSuite with SharedSparkC
_rdd
}

// TODO: Update comments, since new axiom 'AnnotationProperty: comment' was added
/* *|Annotations:
* | bar:hasTitle "Title",
* | description "A longer
Expand Down Expand Up @@ -429,7 +430,7 @@ class ManchesterSyntaxOWLAxiomsRDDBuilderTest extends FunSuite with SharedSparkC
*
*/
test("The number of axioms should match") {
val expectedNumberOfAxioms = 110
val expectedNumberOfAxioms = 111
assert(rdd.count() == expectedNumberOfAxioms)
}

Expand Down Expand Up @@ -484,6 +485,7 @@ class ManchesterSyntaxOWLAxiomsRDDBuilderTest extends FunSuite with SharedSparkC
assert(subAnnPropOfAxiom.getSubProperty == expectedSubProperty)
}

// TODO: Update comments, since new axiom 'AnnotationProperty: comment' was added
test("Declaration axioms should be created correctly") {
// Declaration(AnnotationProperty(<http://ex.com/bar#annProp1>))
// Declaration(AnnotationProperty(<http://ex.com/bar#annProp2>))
Expand Down Expand Up @@ -534,7 +536,7 @@ class ManchesterSyntaxOWLAxiomsRDDBuilderTest extends FunSuite with SharedSparkC
// Declaration(NamedIndividual(<http://ex.com/foo#indivA>))
// Declaration(NamedIndividual(<http://ex.com/foo#indivB>))
// Declaration(NamedIndividual(<http://ex.com/foo#sameAsIndivA>))
val expectedNumberOfAxioms = 49
val expectedNumberOfAxioms = 50
val filteredRDD = rdd.filter(axiom => axiom.isInstanceOf[OWLDeclarationAxiom])

assert(filteredRDD.count() == expectedNumberOfAxioms)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class ManchesterSyntaxOWLExpressionsRDDBuilderTest extends FunSuite with SharedS
}

test("The total number of frames should be correct") {
val total = 49
val total = 50
assert(rdd.count() == total)
}

Expand All @@ -30,7 +30,7 @@ class ManchesterSyntaxOWLExpressionsRDDBuilderTest extends FunSuite with SharedS
}

test("The number of AnnotationProperty frames should be correct") {
val expectedNumAnnoPropertyFrames = 7
val expectedNumAnnoPropertyFrames = 8
val actualNumAnnoPropertyFrames =
rdd.filter(_.trim.startsWith("AnnotationProperty:")).count()

Expand Down

0 comments on commit 953c902

Please sign in to comment.