Skip to content

Commit

Permalink
Add support for range [a TO b]
Browse files Browse the repository at this point in the history
  • Loading branch information
thePanz committed Apr 5, 2018
1 parent a06fe70 commit 6af5e8b
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 0 deletions.
9 changes: 9 additions & 0 deletions lib/Languages/Galach/TokenExtractor/Full.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use QueryTranslator\Languages\Galach\TokenExtractor;
use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Languages\Galach\Values\Token\Phrase;
use QueryTranslator\Languages\Galach\Values\Token\Range;
use QueryTranslator\Languages\Galach\Values\Token\Tag;
use QueryTranslator\Languages\Galach\Values\Token\User;
use QueryTranslator\Languages\Galach\Values\Token\Word;
Expand Down Expand Up @@ -35,6 +36,7 @@ final class Full extends TokenExtractor
'/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?\[(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)\])/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
];

Expand All @@ -48,6 +50,13 @@ protected function createTermToken($position, array $data)
$lexeme = $data['lexeme'];

switch (true) {
case isset($data['rangeFrom']) && isset($data['rangeTo']):
return new Range(
$lexeme,
$position,
$data['domain'],
$data['rangeFrom'], $data['rangeTo']
);
case isset($data['word']):
return new Word(
$lexeme,
Expand Down
1 change: 1 addition & 0 deletions lib/Languages/Galach/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ final class Tokenizer implements Tokenizing
* @see \QueryTranslator\Languages\Galach\Values\Token\Tag
* @see \QueryTranslator\Languages\Galach\Values\Token\User
* @see \QueryTranslator\Languages\Galach\Values\Token\Word
* @see \QueryTranslator\Languages\Galach\Values\Token\Range
*/
const TOKEN_TERM = 512;

Expand Down
47 changes: 47 additions & 0 deletions lib/Languages/Galach/Values/Token/Range.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php

namespace QueryTranslator\Languages\Galach\Values\Token;

use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Values\Token;

/**
* Range term token.
*
* @see \QueryTranslator\Languages\Galach\Tokenizer::TOKEN_TERM
*/
final class Range extends Token
{
/**
* Holds domain string.
*
* @var string
*/
public $domain;

/**
* @var string
*/
public $rangeFrom;

/**
* @var string
*/
public $rangeTo;

/**
* @param string $lexeme
* @param int $position
* @param string $domain
* @param string $rangeFrom
* @param string $rangeTo
*/
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo)
{
parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position);

$this->domain = $domain;
$this->rangeFrom = $rangeFrom;
$this->rangeTo = $rangeTo;
}
}
7 changes: 7 additions & 0 deletions tests/Galach/Tokenizer/FullTokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin as GroupBeginToken;
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin;
use QueryTranslator\Languages\Galach\Values\Token\Phrase as PhraseToken;
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
use QueryTranslator\Languages\Galach\Values\Token\Tag as TagToken;
use QueryTranslator\Languages\Galach\Values\Token\User as UserToken;
use QueryTranslator\Languages\Galach\Values\Token\Word as WordToken;
Expand Down Expand Up @@ -112,6 +113,12 @@ public function providerForTestTokenize()
new WordToken('word\\ word', 0, '', 'word word'),
],
],
[
'[a TO b]',
[
new RangeToken('[a TO b]', 0, '', 'a', 'b'),
],
],
[
'"phrase"',
[
Expand Down
7 changes: 7 additions & 0 deletions tests/Galach/Tokenizer/TextTokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ public static function setUpBeforeClass()
new WordToken('@user', 0, '', '@user'),
new Token(Tokenizer::TOKEN_GROUP_END, ')', 5),
],
'[a TO b]' => [
new WordToken('[a', 0, '', '[a'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
new WordToken('TO', 3, '', 'TO'),
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
new WordToken('b]', 6, '', 'b]'),
],
'domain:domain:' => [
new WordToken('domain:domain:', 0, '', 'domain:domain:'),
],
Expand Down

0 comments on commit 6af5e8b

Please sign in to comment.