Skip to content

Commit

Permalink
Refactor range start/end symbol handling, allow asymmetric ranges
Browse files Browse the repository at this point in the history
  • Loading branch information
thePanz committed Apr 6, 2018
1 parent a7c3867 commit 771b486
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 79 deletions.
38 changes: 34 additions & 4 deletions lib/Languages/Galach/Generators/Native/Range.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,45 @@ public function visit(Node $node, Visitor $subVisitor = null, $options = null)

$domainPrefix = '' === $token->domain ? '' : "{$token->domain}:";

switch ($token->type) {
return $domainPrefix.
$this->buildRangeStart($token).
' TO '.
$this->buildRangeEnd($token);
}

/**
* @param RangeToken $token
* @return string
*/
private function buildRangeStart($token)
{
switch ($token->startType) {
case RangeToken::TYPE_INCLUSIVE:
return '[' . $token->rangeFrom;

case RangeToken::TYPE_EXCLUSIVE:
return '{' . $token->rangeFrom;

default:
throw new LogicException(sprintf('Range start type %s is not supported', $token->startType));
}
}

/**
* @param RangeToken $token
* @return string
*/
private function buildRangeEnd($token)
{
switch ($token->endType) {
case RangeToken::TYPE_INCLUSIVE:
return $domainPrefix . '[' . $token->rangeFrom . ' TO ' . $token->rangeTo . ']';
return $token->rangeTo. ']';

case RangeToken::TYPE_EXCLUSIVE:
return $domainPrefix . '{' . $token->rangeFrom . ' TO ' . $token->rangeTo . '}';
return $token->rangeTo. '}';

default:
throw new LogicException(sprintf('Range type %s is not supported', $token->type));
throw new LogicException(sprintf('Range end type %s is not supported', $token->endType));
}
}
}
23 changes: 20 additions & 3 deletions lib/Languages/Galach/TokenExtractor/Full.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ final class Full extends TokenExtractor
'/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<rangeStartSymbol>[\[\{])(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)[\]\}])/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<rangeStartSymbol>[\[\{])(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)(?<rangeEndSymbol>[\]\}]))/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
];

Expand All @@ -50,13 +50,14 @@ protected function createTermToken($position, array $data)
$lexeme = $data['lexeme'];

switch (true) {
case isset($data['rangeStartSymbol']):
case (isset($data['rangeStartSymbol']) && isset($data['rangeEndSymbol'])):
return new Range(
$lexeme,
$position,
$data['domain'],
$data['rangeFrom'], $data['rangeTo'],
Range::getTypeByStart($data['rangeStartSymbol'])
$this->getRangeTypeBySymbol($data['rangeStartSymbol']),
$this->getRangeTypeBySymbol($data['rangeEndSymbol'])
);
case isset($data['word']):
return new Word(
Expand Down Expand Up @@ -95,4 +96,20 @@ protected function createTermToken($position, array $data)

throw new RuntimeException('Could not extract term token from the given data');
}

/**
* Returns the range type, given the symbol.
*
* @param string $symbol the range start/end symbol
*
* @return string
*/
protected function getRangeTypeBySymbol($symbol)
{
if (in_array($symbol, ['{','}'], true)) {
return Range::TYPE_EXCLUSIVE;
}

return Range::TYPE_INCLUSIVE;
}
}
39 changes: 16 additions & 23 deletions lib/Languages/Galach/Values/Token/Range.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,47 +35,40 @@ final class Range extends Token
/**
* @var string
*/
public $type;
public $startType;

/**
* @var string
*/
public $endType;

/**
* @param string $lexeme
* @param int $position
* @param string $domain
* @param string $rangeFrom
* @param string $rangeTo
* @param string $type
* @param string $startType
* @param string $endType
*/
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $type)
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $startType, $endType)
{
if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) {
throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type));
}
$this->ensureValidType($startType);
$this->ensureValidType($endType);

parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position);

$this->domain = $domain;
$this->rangeFrom = $rangeFrom;
$this->rangeTo = $rangeTo;
$this->type = $type;
$this->startType = $startType;
$this->endType = $endType;
}

/**
* Returns the range type, given the starting symbol.
*
* @param string $startSymbol the start symbol, either '[' or '{'
*
* @return string
*/
public static function getTypeByStart($startSymbol)
private function ensureValidType($type)
{
if ('[' === $startSymbol) {
return self::TYPE_INCLUSIVE;
}

if ('{' === $startSymbol) {
return self::TYPE_EXCLUSIVE;
if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) {
throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type));
}

throw new \InvalidArgumentException(sprintf('Invalid range start symbol: %s', $startSymbol));
}
}
27 changes: 22 additions & 5 deletions tests/Galach/Generators/Native/RangeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ protected function setUp()
public function acceptDataprovider()
{
return [
[true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))],
[true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))],
[false, new Term(new Word('word', 0, '', 'a'))],
];
}
Expand All @@ -45,8 +45,10 @@ public function testAccepts($expected, $node)
public function visitDataprovider()
{
return [
['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))],
['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'))],
['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))],
['[a TO b}', new Term(new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'))],
['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'))],
['{a TO b]', new Term(new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'))],
];
}

Expand Down Expand Up @@ -81,10 +83,25 @@ public function testVisitWrongNodeFails($node)
$this->visitor->visit($node);
}

public function testVisitUnknownTypeFails()
public function testVisitUnknownRangeStartTypeFails()
{
$token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive');
$token->startType = 'unknown';
$node = new Term($token);

$this->expectException(\LogicException::class);
$this->expectExceptionMessage('Range start type unknown is not supported');
$this->visitor->visit($node);
}

public function testVisitUnknownRangeEndTypeFails()
{
$token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive');
$token->endType = 'unknown';
$node = new Term($token);

$this->expectException(\LogicException::class);
$node = new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'unknown'));
$this->expectExceptionMessage('Range end type unknown is not supported');
$this->visitor->visit($node);
}
}
16 changes: 14 additions & 2 deletions tests/Galach/Tokenizer/FullTokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,25 @@ public function providerForTestTokenize()
[
'[a TO b]',
[
new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'),
new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'),
],
],
[
'[a TO b}',
[
new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'),
],
],
[
'{a TO b}',
[
new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'),
new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'),
],
],
[
'{a TO b]',
[
new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'),
],
],
[
Expand Down
14 changes: 14 additions & 0 deletions tests/Galach/Tokenizer/TextTokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,27 @@ public static function setUpBeforeClass()
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
new WordToken('b]', 6, '', 'b]'),
],
'[a TO b}' => [
new WordToken('[a', 0, '', '[a'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
new WordToken('TO', 3, '', 'TO'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
new WordToken('b}', 6, '', 'b}'),
],
'{a TO b}' => [
new WordToken('{a', 0, '', '{a'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
new WordToken('TO', 3, '', 'TO'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
new WordToken('b}', 6, '', 'b}'),
],
'{a TO b]' => [
new WordToken('{a', 0, '', '{a'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
new WordToken('TO', 3, '', 'TO'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
new WordToken('b]', 6, '', 'b]'),
],
'domain:domain:' => [
new WordToken('domain:domain:', 0, '', 'domain:domain:'),
],
Expand Down
57 changes: 15 additions & 42 deletions tests/Galach/Values/Token/RangeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,59 +7,32 @@

class RangeTest extends TestCase
{
public function failingStartSymbolDataprovider()
{
return [
[''],
['/'],
['('],
];
}

/**
* @dataProvider failingStartSymbolDataprovider
* @param string $startSymbol
*/
public function testGetTypeByStartFails($startSymbol)
{
$this->expectException(\InvalidArgumentException::class);
Range::getTypeByStart($startSymbol);
}

public function successfulStartSymbolDataprovider()
{
return [
['inclusive', '['],
['exclusive', '{'],
];
}

/**
* @dataProvider successfulStartSymbolDataprovider
* @param string $expectedType
* @param string $startSymbol
*/
public function testGetTypeByStartSucceeds($expectedType, $startSymbol)
{
$this->assertSame($expectedType, Range::getTypeByStart($startSymbol));
}

public function failingTypeDataprovider()
{
return [
[''],
[null],
['other'],
['', 'inclusive'],
['', 'exclusive'],
['inclusive', ''],
['exclusive', ''],
[null, null],
['other', 'inclusive'],
['other', 'exclusive'],
['inclusive','other'],
['exclusive','other'],
['inclusive', null],
['exclusive', null],
[null, 'inclusive'],
[null, 'exclusive'],
];
}

/**
* @dataProvider failingTypeDataprovider
* @param string $type
*/
public function testConstructorFailsWrongType($type)
public function testConstructorFailsWrongType($startType, $endType)
{
$this->expectException(\InvalidArgumentException::class);
new Range('[a TO b]', 0, '', 'a', 'b', $type);
new Range('[a TO b]', 0, '', 'a', 'b', $startType, $endType);
}
}

0 comments on commit 771b486

Please sign in to comment.