From 3aab64f2f0eedfef547d755fbd883adb0ecbf939 Mon Sep 17 00:00:00 2001 From: Konrad Abicht Date: Mon, 29 May 2017 17:42:41 +0200 Subject: [PATCH] added Saft/Addition/hardf (#88) currently only the Parser implementation is available --- src/Saft/Addition/hardf/.gitignore | 2 + .../hardf/Data/ParserFactoryHardf.php | 82 +++++++ src/Saft/Addition/hardf/Data/ParserHardf.php | 200 ++++++++++++++++++ .../hardf/Test/ParserFactoryHardfTest.php | 51 +++++ .../Addition/hardf/Test/ParserHardfTest.php | 34 +++ src/Saft/Addition/hardf/Test/bootstrap.php | 5 + src/Saft/Addition/hardf/composer.json | 31 +++ src/Saft/Addition/hardf/phpunit.xml | 27 +++ 8 files changed, 432 insertions(+) create mode 100644 src/Saft/Addition/hardf/.gitignore create mode 100644 src/Saft/Addition/hardf/Data/ParserFactoryHardf.php create mode 100644 src/Saft/Addition/hardf/Data/ParserHardf.php create mode 100644 src/Saft/Addition/hardf/Test/ParserFactoryHardfTest.php create mode 100644 src/Saft/Addition/hardf/Test/ParserHardfTest.php create mode 100644 src/Saft/Addition/hardf/Test/bootstrap.php create mode 100644 src/Saft/Addition/hardf/composer.json create mode 100644 src/Saft/Addition/hardf/phpunit.xml diff --git a/src/Saft/Addition/hardf/.gitignore b/src/Saft/Addition/hardf/.gitignore new file mode 100644 index 0000000..de4a392 --- /dev/null +++ b/src/Saft/Addition/hardf/.gitignore @@ -0,0 +1,2 @@ +/vendor +/composer.lock diff --git a/src/Saft/Addition/hardf/Data/ParserFactoryHardf.php b/src/Saft/Addition/hardf/Data/ParserFactoryHardf.php new file mode 100644 index 0000000..ae0d3dd --- /dev/null +++ b/src/Saft/Addition/hardf/Data/ParserFactoryHardf.php @@ -0,0 +1,82 @@ +serializationMap = array( + 'n-triples' => 'n-triples', + 'n-quads' => 'n-quads', + 'turtle' => 'turtle', + ); + + $this->nodeFactory = $nodeFactory; + $this->RdfHelpers = $rdfHelpers; + $this->statementFactory = $statementFactory; + $this->statementIteratorFactory = $statementIteratorFactory; + } + + /** + * Creates a Parser instance for a given serialization, if available. + * + * @param string $serialization The serialization you need a parser for. In case it is not + * available, an exception will be thrown. + * @return Parser Suitable parser for the requested serialization. + * @throws \Exception If parser for requested serialization is not available. + */ + public function createParserFor($serialization) + { + if (!in_array($serialization, $this->getSupportedSerializations())) { + throw new \Exception( + 'Requested serialization '. $serialization .' is not available in: '. + implode(', ', $this->getSupportedSerializations()) + ); + } + + return new ParserHardf( + $this->nodeFactory, + $this->statementFactory, + $this->statementIteratorFactory, + $this->RdfHelpers, + $serialization + ); + } + + /** + * Returns an array which contains supported serializations. + * + * @return array Array of supported serializations which are understood by this parser. + */ + public function getSupportedSerializations() + { + return array_keys($this->serializationMap); + } +} diff --git a/src/Saft/Addition/hardf/Data/ParserHardf.php b/src/Saft/Addition/hardf/Data/ParserHardf.php new file mode 100644 index 0000000..ff1a160 --- /dev/null +++ b/src/Saft/Addition/hardf/Data/ParserHardf.php @@ -0,0 +1,200 @@ +RdfHelpers = $rdfHelpers; + + $this->nodeFactory = $nodeFactory; + $this->statementFactory = $statementFactory; + $this->statementIteratorFactory = $statementIteratorFactory; + + $this->serializationMap = array( + 'n-triples' => 'triple', + 'n-quads' => 'quad', + 'turtle' => 'turtle', + ); + + $this->serialization = $this->serializationMap[$serialization]; + + if (false == isset($this->serializationMap[$serialization])) { + throw new \Exception( + 'Unknown serialization format given: '. $serialization .'. Supported are only '. + implode(', ', array_keys($this->serializationMap)) + ); + } + } + + /** + * Returns an array of prefixes which where found during the last parsing. + * + * @return array An associative array with a prefix mapping of the prefixes parsed so far. The key + * will be the prefix, while the values contains the according namespace URI. + */ + public function getCurrentPrefixList() + { + throw new \Exception('Currently not implement.'); + } + + /** + * Parses a given string and returns an iterator containing Statement instances representing the read data. + * + * @param string $inputString Data string containing RDF serialized data. + * @param string $baseUri The base URI of the parsed content. If this URI is null the inputStreams URL + * is taken as base URI. + * @return StatementIterator StatementIterator instaince containing all the Statements parsed by the + * parser to far + * @throws \Exception if the base URI $baseUri is no valid URI. + */ + public function parseStringToIterator($inputString, $baseUri = null) + { + // check $baseUri + if (null !== $baseUri && false == $this->RdfHelpers->simpleCheckURI($baseUri)) { + throw new \Exception('Parameter $baseUri is not a valid URI.'); + } + + $statements = array(); + + $parser = new TriGParser(array('format' => $this->serialization)); + $triples = $parser->parse($inputString); + + foreach ($triples as $triple) { + /* + * handle subject + */ + $subject = null; + if (Util::isIRI($triple['subject'])) { + $subject = $this->nodeFactory->createNamedNode($triple['subject']); + } elseif (Util::isBlank($triple['subject'])) { + $subject = $this->nodeFactory->createBlankNode(substr($triple['subject'], 2)); + } else { + throw new \Exception('Invalid node type for subject found: '. $triple['subject']); + } + + /* + * handle predicate + */ + $predicate = null; + if (Util::isIRI($triple['predicate'])) { + $predicate = $this->nodeFactory->createNamedNode($triple['predicate']); + } else { + throw new \Exception('Invalid node type for predicate found: '. $triple['predicate']); + } + + /* + * handle object + */ + $object = null; + if (Util::isIRI($triple['object'])) { + $object = $this->nodeFactory->createNamedNode($triple['object']); + + } elseif (Util::isBlank($triple['object'])) { + $object = $this->nodeFactory->createBlankNode(substr($triple['object'], 2)); + + } elseif (Util::isLiteral($triple['object'])) { + // safety check, to avoid fatal error about missing Error class in hardf + // FYI: https://github.com/pietercolpaert/hardf/pull/12 + // TODO: remove this here, if fixed + $int = preg_match('/"(\n+\s*.*\n+\s*)"/si', $triple['object'], $match); + if (0 < $int) { + $value = $match[1]; + $lang = null; + $datatype = null; + + /* + * normal case + */ + } else { + // get value + preg_match('/"(.*)"/si', $triple['object'], $match); + $value = $match[1]; + + $lang = Util::getLiteralLanguage($triple['object']); + $lang = '' == $lang ? null : $lang; + $datatype = Util::getLiteralType($triple['object']); + } + + $object = $this->nodeFactory->createLiteral($value, $datatype, $lang); + } else { + throw new \Exception('Invalid node type for object found: '. $triple['object']); + } + + // add statement + $statements[] = $this->statementFactory->createStatement($subject, $predicate, $object); + } + + return $this->statementIteratorFactory->createStatementIteratorFromArray($statements); + } + + /** + * Parses a given stream and returns an iterator containing Statement instances representing the + * previously read data. The stream parses the data not as a whole but in chunks. + * + * @param string $inputStream Filename of the stream to parse which contains RDF serialized data. + * @param string $baseUri The base URI of the parsed content. If this URI is null the inputStreams URL + * is taken as base URI. + * @return StatementIterator A StatementIterator containing all the Statements parsed by the parser to far. + * @throws \Exception if the base URI $baseUri is no valid URI. + */ + public function parseStreamToIterator($inputStream, $baseUri = null) + { + // check $baseUri + if (null !== $baseUri && false == $this->RdfHelpers->simpleCheckURI($baseUri)) { + throw new \Exception('Parameter $baseUri is not a valid URI.'); + } + + return $this->parseStringToIterator(file_get_contents($inputStream), $baseUri); + } +} diff --git a/src/Saft/Addition/hardf/Test/ParserFactoryHardfTest.php b/src/Saft/Addition/hardf/Test/ParserFactoryHardfTest.php new file mode 100644 index 0000000..6bc7d37 --- /dev/null +++ b/src/Saft/Addition/hardf/Test/ParserFactoryHardfTest.php @@ -0,0 +1,51 @@ + ; + "hey"@de ; + [ + "hi"^^ + ] .'; + + $instance = $this->newInstance(); + $parser = $instance->createParserFor('turtle'); + $iterator = $parser->parseStringToIterator($str); + } +} diff --git a/src/Saft/Addition/hardf/Test/ParserHardfTest.php b/src/Saft/Addition/hardf/Test/ParserHardfTest.php new file mode 100644 index 0000000..d25a2ae --- /dev/null +++ b/src/Saft/Addition/hardf/Test/ParserHardfTest.php @@ -0,0 +1,34 @@ +factory = new ParserFactoryHardf( + new NodeFactoryImpl(new RdfHelpers()), + new StatementFactoryImpl(), + new StatementIteratorFactoryImpl(), + new RdfHelpers() + ); + } + + /** + * @return Parser + */ + protected function newInstance($serialization) + { + return $this->factory->createParserFor($serialization); + } +} diff --git a/src/Saft/Addition/hardf/Test/bootstrap.php b/src/Saft/Addition/hardf/Test/bootstrap.php new file mode 100644 index 0000000..6ffb5bb --- /dev/null +++ b/src/Saft/Addition/hardf/Test/bootstrap.php @@ -0,0 +1,5 @@ +=5.6", + "pietercolpaert/hardf": "0.*", + "saft/saft-data": ">=0.9" + }, + "require-dev": { + "phpunit/phpunit": "4.8.*", + "saft/saft-test": ">=0.9" + }, + "autoload": { + "psr-4": { + "Saft\\Addition\\hardf\\": "." + } + }, + "prefer-stable": true +} diff --git a/src/Saft/Addition/hardf/phpunit.xml b/src/Saft/Addition/hardf/phpunit.xml new file mode 100644 index 0000000..e95f82e --- /dev/null +++ b/src/Saft/Addition/hardf/phpunit.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + ./Test + + +