diff --git a/CHANGELOG.md b/CHANGELOG.md index 9862bea..05d2146 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Cleaned up the selector logic. - Fixed issue with greedy regex for charset detection. - Fixed bug causing infinite loops in some cases. +- Refactored the way we handle options. Removed the magical option array. ### Removed - Curl interface and curl implementation has been removed. diff --git a/composer.json b/composer.json index 79258c5..5549a5e 100755 --- a/composer.json +++ b/composer.json @@ -20,7 +20,8 @@ "paquettg/string-encode": "~1.0.0", "php-http/httplug": "^2.1", "php-http/guzzle6-adapter": "^2.0", - "guzzlehttp/psr7": "^1.6" + "guzzlehttp/psr7": "^1.6", + "myclabs/php-enum": "^1.7" }, "require-dev": { "phpunit/phpunit": "^7.5.1", diff --git a/src/PHPHtmlParser/Content.php b/src/PHPHtmlParser/Content.php index 66bc779..fdb741c 100755 --- a/src/PHPHtmlParser/Content.php +++ b/src/PHPHtmlParser/Content.php @@ -4,6 +4,7 @@ namespace PHPHtmlParser; +use PHPHtmlParser\Enum\StringToken; use PHPHtmlParser\Exceptions\ContentLengthException; use PHPHtmlParser\Exceptions\LogicalException; @@ -75,11 +76,12 @@ public function char(?int $char = null): string * Moves the current position forward. * * @chainable + * * @throws ContentLengthException */ public function fastForward(int $count): Content { - if (!$this->canFastForward()) { + if (!$this->canFastForward($count)) { // trying to go over the content length, throw exception throw new ContentLengthException('Attempt to fastForward pass the length of the content.'); } @@ -91,9 +93,9 @@ public function fastForward(int $count): Content /** * Checks if we can move the position forward. */ - public function canFastForward(): bool + public function canFastForward(int $count): bool { - return \strlen($this->content) > $this->pos; + return \strlen($this->content) >= $this->pos + $count; } /** @@ -175,8 +177,6 @@ public function copyUntil(string $string, bool $char = false, bool $escape = fal /** * Copies the content until the string is found and return it * unless the 'unless' is found in the substring. - * - * @return string */ public function copyUntilUnless(string $string, string $unless): string { @@ -197,13 +197,11 @@ public function copyUntilUnless(string $string, string $unless): string /** * Copies the content until it reaches the token string.,. * - * @return string - * * @uses $this->copyUntil() */ - public function copyByToken(string $token, bool $char = false, bool $escape = false) + public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string { - $string = $this->$token; + $string = $stringToken->getValue(); return $this->copyUntil($string, $char, $escape); } @@ -236,13 +234,11 @@ public function skip(string $string, bool $copy = false): string /** * Skip a given token of pre-defined characters. * - * @return Content|string - * * @uses $this->skip() */ - public function skipByToken(string $token, bool $copy = false) + public function skipByToken(StringToken $skipToken, bool $copy = false): string { - $string = $this->$token; + $string = $skipToken->getValue(); return $this->skip($string, $copy); } diff --git a/src/PHPHtmlParser/Dom.php b/src/PHPHtmlParser/Dom.php index d23110d..d2db15e 100755 --- a/src/PHPHtmlParser/Dom.php +++ b/src/PHPHtmlParser/Dom.php @@ -10,10 +10,10 @@ use PHPHtmlParser\Dom\Collection; use PHPHtmlParser\Dom\HtmlNode; use PHPHtmlParser\Dom\TextNode; +use PHPHtmlParser\Enum\StringToken; use PHPHtmlParser\Exceptions\ChildNotFoundException; use PHPHtmlParser\Exceptions\CircularException; use PHPHtmlParser\Exceptions\ContentLengthException; -use PHPHtmlParser\Exceptions\CurlException; use PHPHtmlParser\Exceptions\LogicalException; use PHPHtmlParser\Exceptions\NotLoadedException; use PHPHtmlParser\Exceptions\StrictException; @@ -72,9 +72,9 @@ class Dom /** * A global options array to be used by all load calls. * - * @var array + * @var ?Options */ - private $globalOptions = []; + private $globalOptions; /** * A persistent option object to be used for all options in the @@ -147,7 +147,7 @@ public function __get($name) * @throws StrictException * @throws LogicalException */ - public function loadFromFile(string $file, array $options = []): Dom + public function loadFromFile(string $file, ?Options $options = null): Dom { $content = @\file_get_contents($file); if ($content === false) { @@ -168,7 +168,7 @@ public function loadFromFile(string $file, array $options = []): Dom * @throws StrictException * @throws \Psr\Http\Client\ClientExceptionInterface */ - public function loadFromUrl(string $url, array $options = [], ?ClientInterface $client = null, ?RequestInterface $request = null): Dom + public function loadFromUrl(string $url, ?Options $options, ?ClientInterface $client = null, ?RequestInterface $request = null): Dom { if ($client === null) { $client = new Client(); @@ -191,11 +191,15 @@ public function loadFromUrl(string $url, array $options = [], ?ClientInterface $ * @throws CircularException * @throws StrictException */ - public function loadStr(string $str, array $option = []): Dom + public function loadStr(string $str, ?Options $options = null): Dom { $this->options = new Options(); - $this->options->setOptions($this->globalOptions) - ->setOptions($option); + if ($this->globalOptions !== null) { + $this->options->setFromOptions($this->globalOptions); + } + if ($options !== null) { + $this->options->setFromOptions($options); + } $this->rawSize = \strlen($str); $this->raw = $str; @@ -216,7 +220,7 @@ public function loadStr(string $str, array $option = []): Dom * * @chainable */ - public function setOptions(array $options): Dom + public function setOptions(Options $options): Dom { $this->globalOptions = $options; @@ -235,9 +239,7 @@ public function find(string $selector, int $nth = null) { $this->isLoaded(); - $result = $this->root->find($selector, $nth); - - return $result; + return $this->root->find($selector, $nth); } /** @@ -463,7 +465,7 @@ private function isLoaded(): void */ private function clean(string $str): string { - if ($this->options->get('cleanupInput') != true) { + if ($this->options->isCleanupInput() != true) { // skip entire cleanup step return $str; } @@ -488,7 +490,7 @@ private function clean(string $str): string // clean out the \n\r $replace = ' '; - if ($this->options->get('preserveLineBreaks')) { + if ($this->options->isPreserveLineBreaks()) { $replace = ' '; } $str = \str_replace(["\r\n", "\r", "\n"], $replace, $str); @@ -515,7 +517,7 @@ private function clean(string $str): string } // strip out "; $dom = new Dom(); - $dom->setOptions(['cleanupInput' => false]); + $dom->setOptions((new Options())->setCleanupInput(false)); $dom->loadStr($html); $this->assertSame($html, $dom->root->outerHtml()); } @@ -213,7 +214,7 @@ public function testLoadUtf8() public function testLoadFileWhitespace() { $dom = new Dom(); - $dom->setOptions(['cleanupInput' => false]); + $dom->setOptions((new Options())->setCleanupInput(false)); $dom->loadFromFile('tests/data/files/whitespace.html'); $this->assertEquals(1, \count($dom->find('.class'))); $this->assertEquals('', (string) $dom); @@ -237,7 +238,8 @@ public function testLoadFileBigTwice() public function testLoadFileBigTwicePreserveOption() { $dom = new Dom(); - $dom->loadFromFile('tests/data/files/big.html', ['preserveLineBreaks' => true]); + $dom->loadFromFile('tests/data/files/big.html', + (new Options)->setPreserveLineBreaks(true)); $post = $dom->find('.post-row', 0); $this->assertEquals( "
Журчанье воды
\nЧерно-белые тени
\nВновь на фонтане
Hello world'); $this->assertEquals('
Hello world', (string) $dom); } @@ -415,7 +415,7 @@ public function testGetComplexAttribute() public function testGetComplexAttributeHtmlSpecialCharsDecode() { $dom = new Dom(); - $dom->setOptions(['htmlSpecialCharsDecode' => true]); + $dom->setOptions((new Options())->setHtmlSpecialCharsDecode(true)); $dom->loadStr('Next >'); $a = $dom->find('a', 0); $this->assertEquals('Next >', $a->innerHtml); @@ -563,7 +563,7 @@ public function testLoadGetAttributeWithBackslash() public function test25ChildrenFound() { $dom = new Dom(); - $dom->setOptions(['whitespaceTextNode' => false]); + $dom->setOptions((new Options())->setWhitespaceTextNode(false)); $dom->loadFromFile('tests/data/files/51children.html'); $children = $dom->find('#red-line-g *'); $this->assertEquals(25, \count($children)); @@ -596,22 +596,21 @@ public function testLessThanCharacterInJavascript() $results = (new Dom())->loadStr('', - [ - 'cleanupInput' => false, - 'removeScripts' => false - ])->find('body'); + (new Options())->setCleanupInput(false) + ->setRemoveScripts(false) + )->find('body'); $this->assertCount(1, $results); } public function testUniqueIdForAllObjects() { // Create a dom which will be used as a parent/container for a paragraph - $dom1 = new \PHPHtmlParser\Dom; + $dom1 = new \PHPHtmlParser\Dom(); $dom1->loadStr('
Our new paragraph.
'); // Resets the counter $paragraph = $dom2->firstChild(); @@ -647,7 +646,6 @@ public function testCompatibleWithWordPressShortcode() $node = $dom->find('p', 0); $this->assertEquals(' [wprs_alert type="success" content="this is a short code" /] ', $node->innerHtml); - } public function testBrokenHtml() diff --git a/tests/Options/CleanupTest.php b/tests/Options/CleanupTest.php index b7e5325..914078a 100755 --- a/tests/Options/CleanupTest.php +++ b/tests/Options/CleanupTest.php @@ -3,6 +3,7 @@ declare(strict_types=1); use PHPHtmlParser\Dom; +use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; class CleanupTest extends TestCase @@ -10,9 +11,7 @@ class CleanupTest extends TestCase public function testCleanupInputTrue() { $dom = new Dom(); - $dom->setOptions([ - 'cleanupInput' => true, - ]); + $dom->setOptions((new Options())->setCleanupInput(true)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(0, \count($dom->find('style'))); $this->assertEquals(0, \count($dom->find('script'))); @@ -21,9 +20,7 @@ public function testCleanupInputTrue() public function testCleanupInputFalse() { $dom = new Dom(); - $dom->setOptions([ - 'cleanupInput' => false, - ]); + $dom->setOptions((new Options())->setCleanupInput(false)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(1, \count($dom->find('style'))); $this->assertEquals(22, \count($dom->find('script'))); @@ -32,9 +29,7 @@ public function testCleanupInputFalse() public function testRemoveStylesTrue() { $dom = new Dom(); - $dom->setOptions([ - 'removeStyles' => true, - ]); + $dom->setOptions((new Options())->setRemoveStyles(true)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(0, \count($dom->find('style'))); } @@ -42,9 +37,7 @@ public function testRemoveStylesTrue() public function testRemoveStylesFalse() { $dom = new Dom(); - $dom->setOptions([ - 'removeStyles' => false, - ]); + $dom->setOptions((new Options())->setRemoveStyles(false)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(1, \count($dom->find('style'))); $this->assertEquals('text/css', @@ -54,9 +47,7 @@ public function testRemoveStylesFalse() public function testRemoveScriptsTrue() { $dom = new Dom(); - $dom->setOptions([ - 'removeScripts' => true, - ]); + $dom->setOptions((new Options())->setRemoveScripts(true)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(0, \count($dom->find('script'))); } @@ -64,9 +55,7 @@ public function testRemoveScriptsTrue() public function testRemoveScriptsFalse() { $dom = new Dom(); - $dom->setOptions([ - 'removeScripts' => false, - ]); + $dom->setOptions((new Options())->setRemoveScripts(false)); $dom->loadFromFile('tests/data/files/big.html'); $this->assertEquals(22, \count($dom->find('script'))); $this->assertEquals('text/javascript', @@ -85,9 +74,7 @@ public function testSmartyScripts() public function testSmartyScriptsDisabled() { $dom = new Dom(); - $dom->setOptions([ - 'removeSmartyScripts' => false, - ]); + $dom->setOptions((new Options())->setRemoveSmartyScripts(false)); $dom->loadStr(' aa={123} '); diff --git a/tests/Options/PreserveLineBreaks.php b/tests/Options/PreserveLineBreaks.php index ad095a3..be39649 100755 --- a/tests/Options/PreserveLineBreaks.php +++ b/tests/Options/PreserveLineBreaks.php @@ -3,6 +3,7 @@ declare(strict_types=1); use PHPHtmlParser\Dom; +use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; class PreserveLineBreaks extends TestCase @@ -10,9 +11,8 @@ class PreserveLineBreaks extends TestCase public function testPreserveLineBreakTrue() { $dom = new Dom(); - $dom->setOptions([ - 'preserveLineBreaks' => true, - ]); + $dom->setOptions((new Options())->setPreserveLineBreaks(true)); + $dom->loadStr('Hey you
Ya you!
Hey you
Ya you!
Hey you
Ya you!
Hey you
Ya you!
Hey you
Ya you!
Hey you
Ya you!