From a430293a6ca3fd661c7608338c7b5aff4c66ce20 Mon Sep 17 00:00:00 2001 From: Albert Date: Fri, 17 Jul 2020 14:38:26 +0800 Subject: [PATCH] feature: innerText property support related: #222 related: !225 ```html 123 456789 101112 ``` ```javascript document.body.innerText; ``` output ``` 123 456789 101112 ``` ```PHP $html = << 123 456789 101112 EOF; $dom = new Dom(); $dom->load($html); echo $dom->innerText; ``` asset output ``` 123 456789 101112 ``` --- src/PHPHtmlParser/Dom/Node/AbstractNode.php | 2 ++ src/PHPHtmlParser/Dom/Node/HtmlNode.php | 22 +++++++++++++++++++++ tests/DomTest.php | 10 ++++++++++ tests/Node/HtmlTest.php | 14 +++++++++++++ 4 files changed, 48 insertions(+) diff --git a/src/PHPHtmlParser/Dom/Node/AbstractNode.php b/src/PHPHtmlParser/Dom/Node/AbstractNode.php index 79beda5..97353c0 100644 --- a/src/PHPHtmlParser/Dom/Node/AbstractNode.php +++ b/src/PHPHtmlParser/Dom/Node/AbstractNode.php @@ -114,6 +114,8 @@ public function __get(string $key) return $this->outerHtml(); case 'innerhtml': return $this->innerHtml(); + case 'innertext': + return $this->innerText(); case 'text': return $this->text(); case 'tag': diff --git a/src/PHPHtmlParser/Dom/Node/HtmlNode.php b/src/PHPHtmlParser/Dom/Node/HtmlNode.php index 95b369f..d60d4b8 100644 --- a/src/PHPHtmlParser/Dom/Node/HtmlNode.php +++ b/src/PHPHtmlParser/Dom/Node/HtmlNode.php @@ -27,6 +27,13 @@ class HtmlNode extends InnerNode */ protected $outerHtml; + /** + * Remembers what the innerText was if it was scanned previously. + * + * @var ?string + */ + protected $innerText = null; + /** * Remembers what the text was if it was scanned previously. * @@ -111,6 +118,21 @@ public function innerHtml(): string return $string; } + /** + * Gets the inner text of this node. + * @return string + * @throws ChildNotFoundException + * @throws UnknownChildTypeException + */ + public function innerText(): string + { + if (is_null($this->innerText)) { + $this->innerText = strip_tags($this->innerHtml()); + } + + return $this->innerText; + } + /** * Gets the html of this node, including it's own * tag. diff --git a/tests/DomTest.php b/tests/DomTest.php index 1183128..6fd7d95 100755 --- a/tests/DomTest.php +++ b/tests/DomTest.php @@ -330,6 +330,16 @@ public function testEmptyAttribute() $this->assertEquals(1, \count($items)); } + public function testInnerText() + { + $html = <<123456789101112 +EOF; + $dom = new Dom(); + $dom->loadStr($html); + $this->assertEquals($dom->innerText, "123456789101112"); + } + public function testMultipleSquareSelector() { $dom = new Dom(); diff --git a/tests/Node/HtmlTest.php b/tests/Node/HtmlTest.php index 55e2ad6..a81c167 100755 --- a/tests/Node/HtmlTest.php +++ b/tests/Node/HtmlTest.php @@ -323,6 +323,20 @@ public function testTextLookInChildren() $this->assertEquals('Please click me!', $node->text(true)); } + public function testInnerText() + { + $node = new HtmlNode('div'); + $node->addChild(new TextNode('123 ')); + $anode = new HtmlNode('a'); + $anode->addChild(new TextNode('456789 ')); + $span_node = new HtmlNode('span'); + $span_node->addChild(new TextNode('101112')); + + $node->addChild($anode); + $node->addChild($span_node); + $this->assertEquals($node->innerText(), '123 456789 101112'); + } + public function testTextLookInChildrenAndNoChildren() { $p = new HtmlNode('p');