diff --git a/CHANGELOG.md b/CHANGELOG.md index 9862bea..05d2146 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Cleaned up the selector logic. - Fixed issue with greedy regex for charset detection. - Fixed bug causing infinite loops in some cases. +- Refactored the way we handle options. Removed the magical option array. ### Removed - Curl interface and curl implementation has been removed. diff --git a/composer.json b/composer.json index 79258c5..5549a5e 100755 --- a/composer.json +++ b/composer.json @@ -20,7 +20,8 @@ "paquettg/string-encode": "~1.0.0", "php-http/httplug": "^2.1", "php-http/guzzle6-adapter": "^2.0", - "guzzlehttp/psr7": "^1.6" + "guzzlehttp/psr7": "^1.6", + "myclabs/php-enum": "^1.7" }, "require-dev": { "phpunit/phpunit": "^7.5.1", diff --git a/src/PHPHtmlParser/Content.php b/src/PHPHtmlParser/Content.php index 66bc779..fdb741c 100755 --- a/src/PHPHtmlParser/Content.php +++ b/src/PHPHtmlParser/Content.php @@ -4,6 +4,7 @@ namespace PHPHtmlParser; +use PHPHtmlParser\Enum\StringToken; use PHPHtmlParser\Exceptions\ContentLengthException; use PHPHtmlParser\Exceptions\LogicalException; @@ -75,11 +76,12 @@ public function char(?int $char = null): string * Moves the current position forward. * * @chainable + * * @throws ContentLengthException */ public function fastForward(int $count): Content { - if (!$this->canFastForward()) { + if (!$this->canFastForward($count)) { // trying to go over the content length, throw exception throw new ContentLengthException('Attempt to fastForward pass the length of the content.'); } @@ -91,9 +93,9 @@ public function fastForward(int $count): Content /** * Checks if we can move the position forward. */ - public function canFastForward(): bool + public function canFastForward(int $count): bool { - return \strlen($this->content) > $this->pos; + return \strlen($this->content) >= $this->pos + $count; } /** @@ -175,8 +177,6 @@ public function copyUntil(string $string, bool $char = false, bool $escape = fal /** * Copies the content until the string is found and return it * unless the 'unless' is found in the substring. - * - * @return string */ public function copyUntilUnless(string $string, string $unless): string { @@ -197,13 +197,11 @@ public function copyUntilUnless(string $string, string $unless): string /** * Copies the content until it reaches the token string.,. * - * @return string - * * @uses $this->copyUntil() */ - public function copyByToken(string $token, bool $char = false, bool $escape = false) + public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string { - $string = $this->$token; + $string = $stringToken->getValue(); return $this->copyUntil($string, $char, $escape); } @@ -236,13 +234,11 @@ public function skip(string $string, bool $copy = false): string /** * Skip a given token of pre-defined characters. * - * @return Content|string - * * @uses $this->skip() */ - public function skipByToken(string $token, bool $copy = false) + public function skipByToken(StringToken $skipToken, bool $copy = false): string { - $string = $this->$token; + $string = $skipToken->getValue(); return $this->skip($string, $copy); } diff --git a/src/PHPHtmlParser/Dom.php b/src/PHPHtmlParser/Dom.php index d23110d..d2db15e 100755 --- a/src/PHPHtmlParser/Dom.php +++ b/src/PHPHtmlParser/Dom.php @@ -10,10 +10,10 @@ use PHPHtmlParser\Dom\Collection; use PHPHtmlParser\Dom\HtmlNode; use PHPHtmlParser\Dom\TextNode; +use PHPHtmlParser\Enum\StringToken; use PHPHtmlParser\Exceptions\ChildNotFoundException; use PHPHtmlParser\Exceptions\CircularException; use PHPHtmlParser\Exceptions\ContentLengthException; -use PHPHtmlParser\Exceptions\CurlException; use PHPHtmlParser\Exceptions\LogicalException; use PHPHtmlParser\Exceptions\NotLoadedException; use PHPHtmlParser\Exceptions\StrictException; @@ -72,9 +72,9 @@ class Dom /** * A global options array to be used by all load calls. * - * @var array + * @var ?Options */ - private $globalOptions = []; + private $globalOptions; /** * A persistent option object to be used for all options in the @@ -147,7 +147,7 @@ public function __get($name) * @throws StrictException * @throws LogicalException */ - public function loadFromFile(string $file, array $options = []): Dom + public function loadFromFile(string $file, ?Options $options = null): Dom { $content = @\file_get_contents($file); if ($content === false) { @@ -168,7 +168,7 @@ public function loadFromFile(string $file, array $options = []): Dom * @throws StrictException * @throws \Psr\Http\Client\ClientExceptionInterface */ - public function loadFromUrl(string $url, array $options = [], ?ClientInterface $client = null, ?RequestInterface $request = null): Dom + public function loadFromUrl(string $url, ?Options $options, ?ClientInterface $client = null, ?RequestInterface $request = null): Dom { if ($client === null) { $client = new Client(); @@ -191,11 +191,15 @@ public function loadFromUrl(string $url, array $options = [], ?ClientInterface $ * @throws CircularException * @throws StrictException */ - public function loadStr(string $str, array $option = []): Dom + public function loadStr(string $str, ?Options $options = null): Dom { $this->options = new Options(); - $this->options->setOptions($this->globalOptions) - ->setOptions($option); + if ($this->globalOptions !== null) { + $this->options->setFromOptions($this->globalOptions); + } + if ($options !== null) { + $this->options->setFromOptions($options); + } $this->rawSize = \strlen($str); $this->raw = $str; @@ -216,7 +220,7 @@ public function loadStr(string $str, array $option = []): Dom * * @chainable */ - public function setOptions(array $options): Dom + public function setOptions(Options $options): Dom { $this->globalOptions = $options; @@ -235,9 +239,7 @@ public function find(string $selector, int $nth = null) { $this->isLoaded(); - $result = $this->root->find($selector, $nth); - - return $result; + return $this->root->find($selector, $nth); } /** @@ -463,7 +465,7 @@ private function isLoaded(): void */ private function clean(string $str): string { - if ($this->options->get('cleanupInput') != true) { + if ($this->options->isCleanupInput() != true) { // skip entire cleanup step return $str; } @@ -488,7 +490,7 @@ private function clean(string $str): string // clean out the \n\r $replace = ' '; - if ($this->options->get('preserveLineBreaks')) { + if ($this->options->isPreserveLineBreaks()) { $replace = ' '; } $str = \str_replace(["\r\n", "\r", "\n"], $replace, $str); @@ -515,7 +517,7 @@ private function clean(string $str): string } // strip out "; $dom = new Dom(); - $dom->setOptions(['cleanupInput' => false]); + $dom->setOptions((new Options())->setCleanupInput(false)); $dom->loadStr($html); $this->assertSame($html, $dom->root->outerHtml()); } @@ -213,7 +214,7 @@ public function testLoadUtf8() public function testLoadFileWhitespace() { $dom = new Dom(); - $dom->setOptions(['cleanupInput' => false]); + $dom->setOptions((new Options())->setCleanupInput(false)); $dom->loadFromFile('tests/data/files/whitespace.html'); $this->assertEquals(1, \count($dom->find('.class'))); $this->assertEquals('', (string) $dom); @@ -237,7 +238,8 @@ public function testLoadFileBigTwice() public function testLoadFileBigTwicePreserveOption() { $dom = new Dom(); - $dom->loadFromFile('tests/data/files/big.html', ['preserveLineBreaks' => true]); + $dom->loadFromFile('tests/data/files/big.html', + (new Options)->setPreserveLineBreaks(true)); $post = $dom->find('.post-row', 0); $this->assertEquals( "

Журчанье воды
\nЧерно-белые тени
\nВновь на фонтане

", @@ -261,7 +263,7 @@ public function testLoadFromUrl() ->andReturn($responseMock); $dom = new Dom(); - $dom->loadFromUrl('http://google.com', [], $clientMock); + $dom->loadFromUrl('http://google.com', null, $clientMock); $this->assertEquals('VonBurgermeister', $dom->find('.post-row div .post-user font', 0)->text); } @@ -397,9 +399,7 @@ public function testHasChildren() public function testWhitespaceInText() { $dom = new Dom(); - $dom->setOptions([ - 'removeDoubleSpace' => false, - ]); + $dom->setOptions((new Options())->setRemoveDoubleSpace(false)); $dom->loadStr('
    Hello world
'); $this->assertEquals('
    Hello world
', (string) $dom); } @@ -415,7 +415,7 @@ public function testGetComplexAttribute() public function testGetComplexAttributeHtmlSpecialCharsDecode() { $dom = new Dom(); - $dom->setOptions(['htmlSpecialCharsDecode' => true]); + $dom->setOptions((new Options())->setHtmlSpecialCharsDecode(true)); $dom->loadStr('Next >'); $a = $dom->find('a', 0); $this->assertEquals('Next >', $a->innerHtml); @@ -563,7 +563,7 @@ public function testLoadGetAttributeWithBackslash() public function test25ChildrenFound() { $dom = new Dom(); - $dom->setOptions(['whitespaceTextNode' => false]); + $dom->setOptions((new Options())->setWhitespaceTextNode(false)); $dom->loadFromFile('tests/data/files/51children.html'); $children = $dom->find('#red-line-g *'); $this->assertEquals(25, \count($children)); @@ -596,22 +596,21 @@ public function testLessThanCharacterInJavascript() $results = (new Dom())->loadStr('
', - [ - 'cleanupInput' => false, - 'removeScripts' => false - ])->find('body'); + (new Options())->setCleanupInput(false) + ->setRemoveScripts(false) + )->find('body'); $this->assertCount(1, $results); } public function testUniqueIdForAllObjects() { // Create a dom which will be used as a parent/container for a paragraph - $dom1 = new \PHPHtmlParser\Dom; + $dom1 = new \PHPHtmlParser\Dom(); $dom1->loadStr('
A container div
'); // Resets the counter (doesn't matter here as the counter was 0 even without resetting) $div = $dom1->firstChild(); // Create a paragraph outside of the first dom - $dom2 = new \PHPHtmlParser\Dom; + $dom2 = new \PHPHtmlParser\Dom(); $dom2->loadStr('

Our new paragraph.

'); // Resets the counter $paragraph = $dom2->firstChild(); @@ -647,7 +646,6 @@ public function testCompatibleWithWordPressShortcode() $node = $dom->find('p', 0); $this->assertEquals(' [wprs_alert type="success" content="this is a short code" /] ', $node->innerHtml); - } public function testBrokenHtml() diff --git a/tests/Options/CleanupTest.php b/tests/Options/CleanupTest.php index b7e5325..914078a 100755 --- a/tests/Options/CleanupTest.php +++ b/tests/Options/CleanupTest.php @@ -3,6 +3,7 @@ declare(strict_types=1); use PHPHtmlParser\Dom; +use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; class CleanupTest extends TestCase @@ -10,9 +11,7 @@ class CleanupTest extends TestCase public function testCleanupInputTrue() { $dom = new Dom(); - $dom->setOptions([ - 'cleanupInput' => true, - ]); + $dom->setOptions((new Options())->setCleanupInput(true)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(0, \count($dom->find('style'))); $this->assertEquals(0, \count($dom->find('script'))); @@ -21,9 +20,7 @@ public function testCleanupInputTrue() public function testCleanupInputFalse() { $dom = new Dom(); - $dom->setOptions([ - 'cleanupInput' => false, - ]); + $dom->setOptions((new Options())->setCleanupInput(false)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(1, \count($dom->find('style'))); $this->assertEquals(22, \count($dom->find('script'))); @@ -32,9 +29,7 @@ public function testCleanupInputFalse() public function testRemoveStylesTrue() { $dom = new Dom(); - $dom->setOptions([ - 'removeStyles' => true, - ]); + $dom->setOptions((new Options())->setRemoveStyles(true)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(0, \count($dom->find('style'))); } @@ -42,9 +37,7 @@ public function testRemoveStylesTrue() public function testRemoveStylesFalse() { $dom = new Dom(); - $dom->setOptions([ - 'removeStyles' => false, - ]); + $dom->setOptions((new Options())->setRemoveStyles(false)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(1, \count($dom->find('style'))); $this->assertEquals('text/css', @@ -54,9 +47,7 @@ public function testRemoveStylesFalse() public function testRemoveScriptsTrue() { $dom = new Dom(); - $dom->setOptions([ - 'removeScripts' => true, - ]); + $dom->setOptions((new Options())->setRemoveScripts(true)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(0, \count($dom->find('script'))); } @@ -64,9 +55,7 @@ public function testRemoveScriptsTrue() public function testRemoveScriptsFalse() { $dom = new Dom(); - $dom->setOptions([ - 'removeScripts' => false, - ]); + $dom->setOptions((new Options())->setRemoveScripts(false)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(22, \count($dom->find('script'))); $this->assertEquals('text/javascript', @@ -85,9 +74,7 @@ public function testSmartyScripts() public function testSmartyScriptsDisabled() { $dom = new Dom(); - $dom->setOptions([ - 'removeSmartyScripts' => false, - ]); + $dom->setOptions((new Options())->setRemoveSmartyScripts(false)); $dom->loadStr(' aa={123} '); diff --git a/tests/Options/PreserveLineBreaks.php b/tests/Options/PreserveLineBreaks.php index ad095a3..be39649 100755 --- a/tests/Options/PreserveLineBreaks.php +++ b/tests/Options/PreserveLineBreaks.php @@ -3,6 +3,7 @@ declare(strict_types=1); use PHPHtmlParser\Dom; +use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; class PreserveLineBreaks extends TestCase @@ -10,9 +11,8 @@ class PreserveLineBreaks extends TestCase public function testPreserveLineBreakTrue() { $dom = new Dom(); - $dom->setOptions([ - 'preserveLineBreaks' => true, - ]); + $dom->setOptions((new Options())->setPreserveLineBreaks(true)); + $dom->loadStr('
'); @@ -22,9 +22,7 @@ public function testPreserveLineBreakTrue() public function testPreserveLineBreakBeforeClosingTag() { $dom = new Dom(); - $dom->setOptions([ - 'preserveLineBreaks' => true, - ]); + $dom->setOptions((new Options())->setPreserveLineBreaks(true)); $dom->loadStr('
'); diff --git a/tests/Options/StrictTest.php b/tests/Options/StrictTest.php index 96d457b..709f292 100755 --- a/tests/Options/StrictTest.php +++ b/tests/Options/StrictTest.php @@ -4,6 +4,7 @@ use PHPHtmlParser\Dom; use PHPHtmlParser\Exceptions\StrictException; +use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; class StrictTest extends TestCase @@ -11,9 +12,7 @@ class StrictTest extends TestCase public function testConfigStrict() { $dom = new Dom(); - $dom->setOptions([ - 'strict' => true, - ]); + $dom->setOptions((new Options())->setStrict(true)); $dom->loadStr('

Hey you

Ya you!

'); $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text); } @@ -21,9 +20,7 @@ public function testConfigStrict() public function testConfigStrictMissingSelfClosing() { $dom = new Dom(); - $dom->setOptions([ - 'strict' => true, - ]); + $dom->setOptions((new Options())->setStrict(true)); try { // should throw an exception $dom->loadStr('

Hey you


Ya you!

'); @@ -37,9 +34,7 @@ public function testConfigStrictMissingSelfClosing() public function testConfigStrictMissingAttribute() { $dom = new Dom(); - $dom->setOptions([ - 'strict' => true, - ]); + $dom->setOptions((new Options())->setStrict(true)); try { // should throw an exception $dom->loadStr('

Hey you

Ya you!

'); @@ -53,9 +48,7 @@ public function testConfigStrictMissingAttribute() public function testConfigStrictBRTag() { $dom = new Dom(); - $dom->setOptions([ - 'strict' => true, - ]); + $dom->setOptions((new Options())->setStrict(true)); $dom->loadStr('
'); $this->assertTrue(true); } diff --git a/tests/Options/WhitespaceTextNodeTest.php b/tests/Options/WhitespaceTextNodeTest.php index 0097f28..245ef7f 100755 --- a/tests/Options/WhitespaceTextNodeTest.php +++ b/tests/Options/WhitespaceTextNodeTest.php @@ -3,6 +3,7 @@ declare(strict_types=1); use PHPHtmlParser\Dom; +use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; class WhitespaceTextNodeTest extends TestCase @@ -10,9 +11,7 @@ class WhitespaceTextNodeTest extends TestCase public function testConfigGlobalNoWhitespaceTextNode() { $dom = new Dom(); - $dom->setOptions([ - 'whitespaceTextNode' => false, - ]); + $dom->setOptions((new Options())->setWhitespaceTextNode(false)); $dom->loadStr('

Hey you

Ya you!

'); $this->assertEquals('Ya you!', $dom->getElementById('hey')->nextSibling()->text); } @@ -20,12 +19,8 @@ public function testConfigGlobalNoWhitespaceTextNode() public function testConfigLocalOverride() { $dom = new Dom(); - $dom->setOptions([ - 'whitespaceTextNode' => false, - ]); - $dom->loadStr('

Hey you

Ya you!

', [ - 'whitespaceTextNode' => true, - ]); + $dom->setOptions((new Options())->setWhitespaceTextNode(false)); + $dom->loadStr('

Hey you

Ya you!

', (new Options())->setWhitespaceTextNode(true)); $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text); } } diff --git a/tests/OptionsTest.php b/tests/OptionsTest.php index a78f508..f7406a1 100755 --- a/tests/OptionsTest.php +++ b/tests/OptionsTest.php @@ -2,8 +2,6 @@ declare(strict_types=1); -use PHPHtmlParser\Dom; -use PHPHtmlParser\Exceptions\UnknownOptionException; use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; @@ -13,137 +11,62 @@ public function testDefaultWhitespaceTextNode() { $options = new Options(); - $this->assertTrue($options->whitespaceTextNode); + $this->assertTrue($options->isWhitespaceTextNode()); } public function testSettingOption() { $options = new Options(); - $options->setOptions([ - 'strict' => true, - ]); - - $this->assertTrue($options->strict); - } - - public function testAddingOption() - { - $this->expectException(UnknownOptionException::class); + $options->setStrict(true); - $options = new Options(); - $options->setOptions([ - 'test' => true, - ]); + $this->assertTrue($options->isStrict()); } public function testOverwritingOption() { $options = new Options(); - $options->setOptions([ - 'strict' => false, - ])->setOptions([ - 'strict' => true, - 'whitespaceTextNode' => false, - ]); - - $this->assertTrue($options->get('strict')); - $this->assertFalse($options->get('whitespaceTextNode')); - } + $options->setStrict(false); + $options2 = new Options(); + $options2->setStrict(true); + $options2->setWhitespaceTextNode(false); + $options->setFromOptions($options2); - public function testGettingNoOption() - { - $options = new Options(); - $this->assertEquals(null, $options->get('doesnotexist')); + $this->assertTrue($options->isStrict()); + $this->assertFalse($options->isWhitespaceTextNode()); } public function testSetters() { $options = new Options(); - $options->setOptions([ - 'whitespaceTextNode' => false, - 'strict' => false, - 'enforceEncoding' => null, - 'cleanupInput' => false, - 'removeScripts' => false, - 'removeStyles' => false, - 'preserveLineBreaks' => false, - 'removeDoubleSpace' => false, - 'removeSmartyScripts' => false, - 'htmlSpecialCharsDecode' => false, - ]); - $options->setWhitespaceTextNode(true); - $this->assertTrue($options->get('whitespaceTextNode')); + $this->assertTrue($options->isWhitespaceTextNode()); $options->setStrict(true); - $this->assertTrue($options->get('strict')); + $this->assertTrue($options->isStrict()); $options->setEnforceEncoding('utf8'); - $this->assertEquals('utf8', $options->get('enforceEncoding')); + $this->assertEquals('utf8', $options->getEnforceEncoding()); $options->setCleanupInput(true); - $this->assertTrue($options->get('cleanupInput')); + $this->assertTrue($options->isCleanupInput()); $options->setRemoveScripts(true); - $this->assertTrue($options->get('removeScripts')); + $this->assertTrue($options->isRemoveScripts()); $options->setRemoveStyles(true); - $this->assertTrue($options->get('removeStyles')); + $this->assertTrue($options->isRemoveStyles()); $options->setPreserveLineBreaks(true); - $this->assertTrue($options->get('preserveLineBreaks')); + $this->assertTrue($options->isPreserveLineBreaks()); $options->setRemoveDoubleSpace(true); - $this->assertTrue($options->get('removeDoubleSpace')); + $this->assertTrue($options->isRemoveDoubleSpace()); $options->setRemoveSmartyScripts(true); - $this->assertTrue($options->get('removeSmartyScripts')); + $this->assertTrue($options->isRemoveSmartyScripts()); $options->setHtmlSpecialCharsDecode(true); - $this->assertTrue($options->get('htmlSpecialCharsDecode')); - - // now reset to false - - $options->setWhitespaceTextNode(false); - $this->assertFalse($options->get('whitespaceTextNode')); - - $options->setStrict(false); - $this->assertFalse($options->get('strict')); - - $options->setEnforceEncoding(null); - $this->assertNull($options->get('enforceEncoding')); - - $options->setCleanupInput(false); - $this->assertFalse($options->get('cleanupInput')); - - $options->setRemoveScripts(false); - $this->assertFalse($options->get('removeScripts')); - - $options->setRemoveStyles(false); - $this->assertFalse($options->get('removeStyles')); - - $options->setPreserveLineBreaks(false); - $this->assertFalse($options->get('preserveLineBreaks')); - - $options->setRemoveDoubleSpace(false); - $this->assertFalse($options->get('removeDoubleSpace')); - - $options->setRemoveSmartyScripts(false); - $this->assertFalse($options->get('removeSmartyScripts')); - - $options->setHtmlSpecialCharsDecode(false); - $this->assertFalse($options->get('htmlSpecialCharsDecode')); - } - - public function testUnknownOptionDom() - { - $dom = new Dom(); - $dom->setOptions([ - 'unknown_option' => true, - ]); - - $this->expectException(UnknownOptionException::class); - $dom->loadStr('
'); + $this->assertTrue($options->isHtmlSpecialCharsDecode()); } } diff --git a/tests/StaticDomTest.php b/tests/StaticDomTest.php index fbc1a5b..7345363 100755 --- a/tests/StaticDomTest.php +++ b/tests/StaticDomTest.php @@ -73,7 +73,7 @@ public function testLoadFromUrl() ->once() ->andReturn($responseMock); - Dom::loadFromUrl('http://google.com', [], $clientMock); + Dom::loadFromUrl('http://google.com', null, $clientMock); $this->assertEquals('VonBurgermeister', Dom::find('.post-row div .post-user font', 0)->text); } }