Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add ranges support #14

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions lib/Languages/Galach/Generators/Native/Range.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

namespace QueryTranslator\Languages\Galach\Generators\Native;

use LogicException;
use QueryTranslator\Languages\Galach\Generators\Common\Visitor;
use QueryTranslator\Languages\Galach\Values\Node\Term;
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
use QueryTranslator\Values\Node;

/**
* Range Node Visitor implementation.
*/
final class Range extends Visitor
{
public function accept(Node $node)
{
return $node instanceof Term && $node->token instanceof RangeToken;
}

public function visit(Node $node, Visitor $subVisitor = null, $options = null)
{
if (!$node instanceof Term) {
throw new LogicException(
'Implementation accepts instance of Term Node'
);
}

$token = $node->token;

if (!$token instanceof RangeToken) {
throw new LogicException(
'Implementation accepts instance of Range Token'
);
}

$domainPrefix = '' === $token->domain ? '' : "{$token->domain}:";

switch ($token->type) {
case RangeToken::TYPE_INCLUSIVE:
return $domainPrefix . '[' . $token->rangeFrom . ' TO ' . $token->rangeTo . ']';
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Symbols should be captured by the expression and contained in the token, so if they are customized this class does not need to know about it. And we should support mixed case as well, {a TO b] and [a TO b}.

It will probably mean a truckload of constructor arguments, but I'm OK with that :)


case RangeToken::TYPE_EXCLUSIVE:
return $domainPrefix . '{' . $token->rangeFrom . ' TO ' . $token->rangeTo . '}';

default:
throw new LogicException(sprintf('Range type %s is not supported', $token->type));
}
}
}
10 changes: 10 additions & 0 deletions lib/Languages/Galach/TokenExtractor/Full.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use QueryTranslator\Languages\Galach\TokenExtractor;
use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Languages\Galach\Values\Token\Phrase;
use QueryTranslator\Languages\Galach\Values\Token\Range;
use QueryTranslator\Languages\Galach\Values\Token\Tag;
use QueryTranslator\Languages\Galach\Values\Token\User;
use QueryTranslator\Languages\Galach\Values\Token\Word;
Expand Down Expand Up @@ -35,6 +36,7 @@ final class Full extends TokenExtractor
'/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<rangeStartSymbol>[\[\{])(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)[\]\}])/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
];

Expand All @@ -48,6 +50,14 @@ protected function createTermToken($position, array $data)
$lexeme = $data['lexeme'];

switch (true) {
case isset($data['rangeStartSymbol']):
return new Range(
$lexeme,
$position,
$data['domain'],
$data['rangeFrom'], $data['rangeTo'],
Range::getTypeByStart($data['rangeStartSymbol'])
);
case isset($data['word']):
return new Word(
$lexeme,
Expand Down
1 change: 1 addition & 0 deletions lib/Languages/Galach/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ final class Tokenizer implements Tokenizing
* @see \QueryTranslator\Languages\Galach\Values\Token\Tag
* @see \QueryTranslator\Languages\Galach\Values\Token\User
* @see \QueryTranslator\Languages\Galach\Values\Token\Word
* @see \QueryTranslator\Languages\Galach\Values\Token\Range
*/
const TOKEN_TERM = 512;

Expand Down
81 changes: 81 additions & 0 deletions lib/Languages/Galach/Values/Token/Range.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<?php

namespace QueryTranslator\Languages\Galach\Values\Token;

use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Values\Token;

/**
* Range term token.
*
* @see \QueryTranslator\Languages\Galach\Tokenizer::TOKEN_TERM
*/
final class Range extends Token
{
const TYPE_INCLUSIVE = 'inclusive';
const TYPE_EXCLUSIVE = 'exclusive';

/**
* Holds domain string.
*
* @var string
*/
public $domain;

/**
* @var string
*/
public $rangeFrom;

/**
* @var string
*/
public $rangeTo;

/**
* @var string
*/
public $type;

/**
* @param string $lexeme
* @param int $position
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No alignment in PHPDoc, it unnecessarily adds to diff when updated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just run the php-cs-fixer with the current settings, I guess it was never run before on the code.
I guess it is a good thing to comply to a CS here :)

* @param string $domain
* @param string $rangeFrom
* @param string $rangeTo
* @param string $type
*/
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $type)
{
if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) {
throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type));
}

parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position);

$this->domain = $domain;
$this->rangeFrom = $rangeFrom;
$this->rangeTo = $rangeTo;
$this->type = $type;
}

/**
* Returns the range type, given the starting symbol.
*
* @param string $startSymbol the start symbol, either '[' or '{'
*
* @return string
*/
public static function getTypeByStart($startSymbol)
{
if ('[' === $startSymbol) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having this check here means if someone customizes the symbol, this class will also need to be changed. So it should be rather done outside, in the Full TokenExtractor implementation.

return self::TYPE_INCLUSIVE;
}

if ('{' === $startSymbol) {
return self::TYPE_EXCLUSIVE;
}

throw new \InvalidArgumentException(sprintf('Invalid range start symbol: %s', $startSymbol));
}
}
90 changes: 90 additions & 0 deletions tests/Galach/Generators/Native/RangeTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
<?php

namespace QueryTranslator\Tests\Galach\Generators\Native;

use PHPUnit\Framework\TestCase;
use QueryTranslator\Languages\Galach\Generators\Common\Visitor;
use QueryTranslator\Languages\Galach\Generators\Native\Range;
use QueryTranslator\Languages\Galach\Values\Node\Mandatory;
use QueryTranslator\Languages\Galach\Values\Node\Term;
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
use QueryTranslator\Languages\Galach\Values\Token\Word;
use QueryTranslator\Values\Node;

class RangeTest extends TestCase
{
/**
* @var Visitor
*/
public $visitor;

protected function setUp()
{
$this->visitor = new Range();
}

public function acceptDataprovider()
{
return [
[true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))],
[false, new Term(new Word('word', 0, '', 'a'))],
];
}

/**
* @param bool $expected
* @param Node $token
*
* @dataProvider acceptDataprovider
*/
public function testAccepts($expected, $node)
{
$this->assertSame($expected, $this->visitor->accept($node));
}

public function visitDataprovider()
{
return [
['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))],
['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'))],
];
}

/**
* @param string $expected
* @param Node $token
*
* @dataProvider visitDataprovider
*/
public function testVisit($expected, $node)
{
$this->assertSame($expected, $this->visitor->visit($node));
}

public function visitWrongNodeDataprovider()
{
return [
[new Mandatory()],
[new Term(new Word('word', 0, '', 'a'))],
];
}

/**
* @param string $expected
* @param Node $token
*
* @dataProvider visitWrongNodeDataprovider
*/
public function testVisitWrongNodeFails($node)
{
$this->expectException(\LogicException::class);
$this->visitor->visit($node);
}

public function testVisitUnknownTypeFails()
{
$this->expectException(\LogicException::class);
$node = new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'unknown'));
$this->visitor->visit($node);
}
}
13 changes: 13 additions & 0 deletions tests/Galach/Tokenizer/FullTokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin as GroupBeginToken;
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin;
use QueryTranslator\Languages\Galach\Values\Token\Phrase as PhraseToken;
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
use QueryTranslator\Languages\Galach\Values\Token\Tag as TagToken;
use QueryTranslator\Languages\Galach\Values\Token\User as UserToken;
use QueryTranslator\Languages\Galach\Values\Token\Word as WordToken;
Expand Down Expand Up @@ -112,6 +113,18 @@ public function providerForTestTokenize()
new WordToken('word\\ word', 0, '', 'word word'),
],
],
[
'[a TO b]',
[
new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'),
],
],
[
'{a TO b}',
[
new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'),
],
],
[
'"phrase"',
[
Expand Down
14 changes: 14 additions & 0 deletions tests/Galach/Tokenizer/TextTokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,20 @@ public static function setUpBeforeClass()
new WordToken('@user', 0, '', '@user'),
new Token(Tokenizer::TOKEN_GROUP_END, ')', 5),
],
'[a TO b]' => [
new WordToken('[a', 0, '', '[a'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
new WordToken('TO', 3, '', 'TO'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
new WordToken('b]', 6, '', 'b]'),
],
'{a TO b}' => [
new WordToken('{a', 0, '', '{a'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
new WordToken('TO', 3, '', 'TO'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
new WordToken('b}', 6, '', 'b}'),
],
'domain:domain:' => [
new WordToken('domain:domain:', 0, '', 'domain:domain:'),
],
Expand Down
65 changes: 65 additions & 0 deletions tests/Galach/Values/Token/RangeTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php

namespace QueryTranslator\Tests\Galach\Values\Token;

use PHPUnit\Framework\TestCase;
use QueryTranslator\Languages\Galach\Values\Token\Range;

class RangeTest extends TestCase
{
public function failingStartSymbolDataprovider()
{
return [
[''],
['/'],
['('],
];
}

/**
* @dataProvider failingStartSymbolDataprovider
* @param string $startSymbol
*/
public function testGetTypeByStartFails($startSymbol)
{
$this->expectException(\InvalidArgumentException::class);
Range::getTypeByStart($startSymbol);
}

public function successfulStartSymbolDataprovider()
{
return [
['inclusive', '['],
['exclusive', '{'],
];
}

/**
* @dataProvider successfulStartSymbolDataprovider
* @param string $expectedType
* @param string $startSymbol
*/
public function testGetTypeByStartSucceeds($expectedType, $startSymbol)
{
$this->assertSame($expectedType, Range::getTypeByStart($startSymbol));
}

public function failingTypeDataprovider()
{
return [
[''],
[null],
['other'],
];
}

/**
* @dataProvider failingTypeDataprovider
* @param string $type
*/
public function testConstructorFailsWrongType($type)
{
$this->expectException(\InvalidArgumentException::class);
new Range('[a TO b]', 0, '', 'a', 'b', $type);
}
}