Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Install PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}

- name: Cache PHP dependencies
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: vendor
key: ${{ runner.os }}-php-${{ matrix.php }}-composer-${{ hashFiles('**/composer.json') }}
Expand Down
55 changes: 40 additions & 15 deletions src/Document.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,56 @@ public function __construct(Extractor $extractor)
preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $contentType, $match);
if (!empty($match[1])) {
$encoding = trim($match[1], ',');
try {
$ret = mb_encoding_aliases($encoding ?? '');
if ($ret === false) {
$encoding = null;
}
} catch (\ValueError $exception) {
$encoding = null;
}
$encoding = $this->getValidEncoding($encoding);
}
if (is_null($encoding) && !empty($html)) {
preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $html, $match);
if (!empty($match[1])) {
$encoding = trim($match[1], ',');
$encoding = $this->getValidEncoding($encoding);
}
}
$this->document = !empty($html) ? Parser::parse($html, $encoding) : new DOMDocument();
$this->initXPath();
}

/**
* Get valid encoding name if it exists, otherwise return null
*
* Uses mb_encoding_aliases() to verify the encoding is valid.
*
* TODO: When dropping PHP 7.4 support, remove the PHP_VERSION_ID < 80000 branch.
* PHP version differences:
* - PHP 7.4: mb_encoding_aliases() returns false for invalid encoding and throws Warning for empty string
* - PHP 8.0+: mb_encoding_aliases() throws ValueError for invalid/empty encoding
*
* @see https://www.php.net/manual/en/function.mb-encoding-aliases.php
*/
private function getValidEncoding(?string $encoding): ?string
{
if (PHP_VERSION_ID < 80000) {
// PHP 7.4: Check return value (false = invalid encoding)
// Need to check empty() first to avoid Warning
// TODO: Remove this entire branch when PHP 7.4 support is dropped
if (empty($encoding)) {
return null;
}
$ret = mb_encoding_aliases($encoding);
if ($ret === false) {
return null;
} else {
return $encoding;
}
} else {
// PHP 8.0+: ValueError exception is thrown for invalid/empty encoding
try {
$ret = mb_encoding_aliases($encoding ?? '');
if ($ret === false) {
$encoding = null;
}
$aliases = mb_encoding_aliases($encoding ?? '');
// Check if aliases array is not empty (valid encoding should have at least one alias)
return !empty($aliases) ? $encoding : null;
} catch (\ValueError $exception) {
$encoding = null;
return null;
}
}
$this->document = !empty($html) ? Parser::parse($html, $encoding) : new DOMDocument();
$this->initXPath();
}
Comment on lines +59 to 84
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

PHP 8.0+ ブランチのエンコーディング検証ロジックに潜在的な問題があります。

Line 79 の !empty($aliases) チェックは、エイリアスを持たない有効なエンコーディングを誤って拒否する可能性があります。PHP 8.0+ では、mb_encoding_aliases()ValueError をスローしない場合、そのエンコーディングは有効です。エイリアス配列が空でも、エンコーディング自体は有効である可能性があります。

以下の修正を適用してください:

         } else {
             // PHP 8.0+: ValueError exception is thrown for invalid/empty encoding
             try {
                 $aliases = mb_encoding_aliases($encoding ?? '');
-                // Check if aliases array is not empty (valid encoding should have at least one alias)
-                return !empty($aliases) ? $encoding : null;
+                // If no ValueError thrown, encoding is valid (even if aliases array is empty)
+                return $encoding;
             } catch (\ValueError $exception) {
                 return null;
             }
         }

任意の改善として、PHP 7.4 ブランチの else 句を簡略化できます:

             $ret = mb_encoding_aliases($encoding);
-            if ($ret === false) {
-                return null;
-            } else {
-                return $encoding;
-            }
+            return $ret === false ? null : $encoding;
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
private function getValidEncoding(?string $encoding): ?string
{
if (PHP_VERSION_ID < 80000) {
// PHP 7.4: Check return value (false = invalid encoding)
// Need to check empty() first to avoid Warning
// TODO: Remove this entire branch when PHP 7.4 support is dropped
if (empty($encoding)) {
return null;
}
$ret = mb_encoding_aliases($encoding);
if ($ret === false) {
return null;
} else {
return $encoding;
}
} else {
// PHP 8.0+: ValueError exception is thrown for invalid/empty encoding
try {
$ret = mb_encoding_aliases($encoding ?? '');
if ($ret === false) {
$encoding = null;
}
$aliases = mb_encoding_aliases($encoding ?? '');
// Check if aliases array is not empty (valid encoding should have at least one alias)
return !empty($aliases) ? $encoding : null;
} catch (\ValueError $exception) {
$encoding = null;
return null;
}
}
$this->document = !empty($html) ? Parser::parse($html, $encoding) : new DOMDocument();
$this->initXPath();
}
private function getValidEncoding(?string $encoding): ?string
{
if (PHP_VERSION_ID < 80000) {
// PHP 7.4: Check return value (false = invalid encoding)
// Need to check empty() first to avoid Warning
// TODO: Remove this entire branch when PHP 7.4 support is dropped
if (empty($encoding)) {
return null;
}
$ret = mb_encoding_aliases($encoding);
return $ret === false ? null : $encoding;
} else {
// PHP 8.0+: ValueError exception is thrown for invalid/empty encoding
try {
$aliases = mb_encoding_aliases($encoding ?? '');
// If no ValueError thrown, encoding is valid (even if aliases array is empty)
return $encoding;
} catch (\ValueError $exception) {
return null;
}
}
}
🧰 Tools
🪛 PHPMD (2.15.0)

71-73: The method getValidEncoding uses an else expression. Else clauses are basically not necessary and you can simplify the code by not using them. (undefined)

(ElseExpression)


74-83: The method getValidEncoding uses an else expression. Else clauses are basically not necessary and you can simplify the code by not using them. (undefined)

(ElseExpression)

🤖 Prompt for AI Agents
In src/Document.php around lines 59 to 84, the PHP 8.0+ branch incorrectly
rejects encodings whose mb_encoding_aliases() returns an empty array; instead of
checking !empty($aliases) you should treat any non-exception return as a valid
encoding and simply return $encoding (or null on ValueError). Update the
try/catch to return $encoding when no exception is thrown and only return null
inside the catch; optionally simplify the PHP 7.4 branch else clause to just
return $encoding when $ret !== false.


private function initXPath()
Expand Down
2 changes: 1 addition & 1 deletion tests/DocumentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public function testSelectors()
$extractor = self::getEmbed()->get('http://www.wired.com/?p=2064839');
$document = $extractor->getDocument();

$expected = 23;
$expected = 3;

$this->assertCount($expected, $document->select('.//p')->nodes());
$this->assertCount($expected, $document->selectCss('p')->nodes());
Expand Down
3,832 changes: 1,999 additions & 1,833 deletions tests/cache/www.wired.com..1202600986b37d2c6a30336f82c671f8.php

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
'feeds' => [],
'icon' => 'https://4pda.to/s/as6ywymaTWM6wnea1mxojxCz0Yet7IeumfOBnaxb.png',
'image' => 'https://i.4pda.ws/s/as6yueQrUwnKt0LgJ5m26uBjbZsccTet21FqwJkADfGw.jpg?v=1669981373',
'keywords' => ['состоялся релиз clown of duty — пародии на call of duty'],
'keywords' => [
'состоялся релиз clown of duty — пародии на call of duty'
],
'language' => 'ru-RU',
'languages' => [],
'license' => null,
Expand All @@ -23,5 +25,5 @@
'url' => 'https://4pda.to/2022/12/04/406834/sostoyalsya_reliz_clown_of_duty_parodii_na_call_of_duty/',
'linkedData' => [],
'oEmbed' => [],
'allLinkedData' => [],
'allLinkedData' => []
];
73 changes: 39 additions & 34 deletions tests/fixtures/animoto.com.play-gjsj1gu0wdrfr4pgw12xzq.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,55 @@
declare(strict_types = 1);

return [
'authorName' => null,
'authorUrl' => null,
'authorName' => '@animoto',
'authorUrl' => 'https://twitter.com/animoto',
'cms' => null,
'code' => [
'html' => '<iframe id="vp1GjsJ1" title="Video Player" width="640" height="360" frameborder="0" src="https://s3.amazonaws.com/embed.animoto.com/play.html?w=swf/production/vp1&e=1617549702&f=GjsJ1gu0WDRfr4pGw12xZQ&d=0&m=p&r=360p&i=m&asset_domain=s3-p.animoto.com&animoto_domain=animoto.com&options=start_hq" allowfullscreen></iframe>',
'width' => 640,
'height' => 360,
'ratio' => 56.25
'code' => null,
'description' => 'Create, edit, and share videos with our free video maker. Combine your photos, video clips, and music to make quality videos in minutes. Get started free!',
'favicon' => 'https://animoto.com/favicon-32x32.png?v=f7bad0df2a4af8688773dca5ee0b1ed6',
'feeds' => [
'https://animoto.com/rss.xml'
],
'description' => 'Animoto makes video creation easy! Animoto\'s video maker turns your photos and video clips into professional videos in minutes. Fast and shockingly simple!',
'favicon' => 'https://d14pr3cu5atb0x.cloudfront.net/images/icons/favicon-fbb19e53d0.ico',
'feeds' => [],
'icon' => 'https://d14pr3cu5atb0x.cloudfront.net/images/icons/touchicon-144-4a42d97241.png',
'image' => 'https://d2m23yiuv18ohn.cloudfront.net/Video/GjsJ1gu0WDRfr4pGw12xZQ/cover_648x360.jpg',
'icon' => 'https://animoto.com/icons/icon-48x48.png?v=f7bad0df2a4af8688773dca5ee0b1ed6',
'image' => null,
'keywords' => [],
'language' => null,
'languages' => [],
'license' => null,
'providerName' => 'Animoto',
'providerUrl' => 'https://animoto.com/',
'providerUrl' => 'https://animoto.com',
'publishedTime' => null,
'redirect' => null,
'title' => 'taco bell',
'title' => 'Free Video Maker | Create & Edit Your Videos Easily',
'url' => 'https://animoto.com/play/GjsJ1gu0WDRfr4pGw12xZQ',
'linkedData' => [],
'linkedData' => [
'@context' => 'http://schema.org',
'@type' => 'WebSite',
'url' => 'https://animoto.com',
'name' => 'Animoto video maker - Stand out on social media. Easily.',
'alternateName' => ''
],
'oEmbed' => [
'version' => 1.0,
'provider_name' => 'Animoto',
'provider_url' => 'https://animoto.com/',
'type' => 'video',
'author_name' => null,
'title' => 'taco bell',
'description' => '',
'thumbnail_url' => 'https://d2m23yiuv18ohn.cloudfront.net/Video/GjsJ1gu0WDRfr4pGw12xZQ/cover_648x360.jpg',
'thumbnail_height' => 360,
'thumbnail_width' => 648,
'icon_url' => 'https://d2m23yiuv18ohn.cloudfront.net/Video/GjsJ1gu0WDRfr4pGw12xZQ/cover_224x126.jpg',
'icon_height' => 54,
'icon_width' => 54,
'width' => 640,
'height' => 360,
'cache_age' => 604800,
'video_url' => 'https://d150hyw1dtprld.cloudfront.net/swf/w.swf?w=swf/production/vp1&e=1617549702&f=GjsJ1gu0WDRfr4pGw12xZQ&d=0&m=p&r=360p&i=m&asset_domain=s3-p.animoto.com&animoto_domain=animoto.com&options=start_hq',
'html' => '<iframe id="vp1GjsJ1" title="Video Player" width="640" height="360" frameborder="0" src="https://s3.amazonaws.com/embed.animoto.com/play.html?w=swf/production/vp1&e=1617549702&f=GjsJ1gu0WDRfr4pGw12xZQ&d=0&m=p&r=360p&i=m&asset_domain=s3-p.animoto.com&animoto_domain=animoto.com&options=start_hq" allowfullscreen></iframe>'
'error' => 'URL not supported or not found',
'url' => 'https://animoto.com/play/GjsJ1gu0WDRfr4pGw12xZQ'
],
'allLinkedData' => []
'allLinkedData' => [
[
'@context' => 'http://schema.org',
'@type' => 'WebSite',
'url' => 'https://animoto.com',
'name' => 'Animoto video maker - Stand out on social media. Easily.',
'alternateName' => ''
],
[
'@context' => 'http://schema.org',
'@type' => 'VideoObject',
'name' => 'Animoto: Free Online Video Maker',
'contentUrl' => 'https://d2of6bhnpl91ni.cloudfront.net/cms/animoto-free-online-video-maker-e8d6870030.mp4',
'description' => 'With Animoto, you\'ll have everything you need to create your own professional videos in minutes. No experience required. All it takes is an idea. ',
'thumbnailUrl' => '//images.ctfassets.net/00i767ygo3tc/010bacg5wwIhMMx6xYS3qj/18e39c5d16f5614a3b477a284faea1a2/free-online-video-maker.webp',
'transcript' => 'We all have a story to tell, and video is the best way to make yours stand out. With Animoto, you\'ll have everything you need to create your own professional videos in minutes. No experience required. All it takes is an idea. We\'ve made it easy to get started with customizable templates for everything from saying Happy Birthday to selling your product. From there, it\'s as simple as dragging and dropping your photos and video clips, choosing from our millions of Getty stock images or recording with our built-in screen and webcam recorder. Customize your videos with transitions, music, voiceovers, and more. Then bring your videos on brand with a single click. Make sure your story is heard with Animoto. Everything you need to create your own videos is right at your fingertips. Start creating for free.',
'uploadDate' => '2020-09-15T21:58:55.636Z'
]
]
];
15 changes: 8 additions & 7 deletions tests/fixtures/archive.org.details-dn2015-0220_vid.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
],
'oEmbed' => [],
'api' => [
'server' => 'ia802600.us.archive.org',
'dir' => '/20/items/dn2015-0220_vid',
'server' => 'ia801600.us.archive.org',
'dir' => '/34/items/dn2015-0220_vid',
'metadata' => [
'identifier' => [
'dn2015-0220_vid'
Expand Down Expand Up @@ -786,11 +786,12 @@
'/dn2015-0220_vid_files.xml' => [
'source' => 'original',
'format' => 'Metadata',
'md5' => '745ea2f6dde93e4b70b1c0b238d4c0e2'
'md5' => 'c8085d21bd5d528af0697f7d1cfff599',
'summation' => 'md5'
],
'/dn2015-0220_vid_meta.xml' => [
'source' => 'original',
'mtime' => '1542757137',
'mtime' => '1675274129',
'size' => '1973',
'format' => 'Metadata',
'md5' => '6a144c80a58ab5f08c0ecffdb580954a',
Expand All @@ -799,12 +800,12 @@
]
],
'misc' => [
'image' => 'https://ia802600.us.archive.org/20/items/dn2015-0220_vid/dn2015-0220.gif',
'image' => 'https://ia801600.us.archive.org/34/items/dn2015-0220_vid/dn2015-0220.gif',
'collection-title' => 'Democracy Now!'
],
'item' => [
'downloads' => 132,
'month' => 2,
'downloads' => 156,
'month' => 0,
'item_size' => 3667677269,
'files_count' => 68,
'item_count' => null,
Expand Down
12 changes: 6 additions & 6 deletions tests/fixtures/codepen.io.zhouzi-pen-jorazp.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

return [
'authorName' => 'Gabin Aureche',
'authorUrl' => 'https://codepen.io/Zhouzi/',
'authorUrl' => 'https://codepen.io/Zhouzi',
'cms' => null,
'code' => [
'html' => '<iframe id="cp_embed_JoRazP" src="https://codepen.io/Zhouzi/embed/preview/JoRazP?height=300&amp;slug-hash=JoRazP&amp;default-tabs=js,result&amp;host=https://codepen.io" title="TheaterJS" scrolling="no" frameborder="0" height="300" allowtransparency="true" class="cp_embed_iframe" style="width: 100%; overflow: hidden;"></iframe>',
'html' => '<iframe id="cp_embed_JoRazP" src="https://codepen.io/Zhouzi/embed/preview/JoRazP?default-tabs=js%2Cresult&amp;height=300&amp;host=https%3A%2F%2Fcodepen.io&amp;slug-hash=JoRazP" title="TheaterJS" scrolling="no" frameborder="0" height="300" allowtransparency="true" class="cp_embed_iframe" style="width: 100%; overflow: hidden;"></iframe>',
'width' => 800,
'height' => 300,
'ratio' => 37.5
Expand All @@ -15,7 +15,7 @@
'favicon' => 'https://codepen.io/favicon.ico',
'feeds' => [],
'icon' => null,
'image' => 'https://assets.codepen.io/99102/internal/screenshots/pens/JoRazP.default.png?fit=cover&format=auto&ha=true&height=360&quality=75&v=2&version=1467971314&width=640',
'image' => 'https://shots.codepen.io/username/pen/JoRazP-512.jpg?version=1467971314',
'keywords' => [],
'language' => 'en-US',
'languages' => [],
Expand All @@ -35,13 +35,13 @@
'provider_url' => 'https://codepen.io',
'title' => 'TheaterJS',
'author_name' => 'Gabin Aureche',
'author_url' => 'https://codepen.io/Zhouzi/',
'author_url' => 'https://codepen.io/Zhouzi',
'height' => '300',
'width' => '800',
'thumbnail_width' => '384',
'thumbnail_height' => '225',
'thumbnail_url' => 'https://assets.codepen.io/99102/internal/screenshots/pens/JoRazP.default.png?fit=cover&amp;format=auto&amp;ha=true&amp;height=360&amp;quality=75&amp;v=2&amp;version=1467971314&amp;width=640',
'html' => '<iframe id="cp_embed_JoRazP" src="https://codepen.io/Zhouzi/embed/preview/JoRazP?height=300&amp;slug-hash=JoRazP&amp;default-tabs=js,result&amp;host=https://codepen.io" title="TheaterJS" scrolling="no" frameborder="0" height="300" allowtransparency="true" class="cp_embed_iframe" style="width: 100%; overflow: hidden;"></iframe>'
'thumbnail_url' => 'https://shots.codepen.io/username/pen/JoRazP-512.jpg?version=1467971314',
'html' => '<iframe id="cp_embed_JoRazP" src="https://codepen.io/Zhouzi/embed/preview/JoRazP?default-tabs=js%2Cresult&amp;height=300&amp;host=https%3A%2F%2Fcodepen.io&amp;slug-hash=JoRazP" title="TheaterJS" scrolling="no" frameborder="0" height="300" allowtransparency="true" class="cp_embed_iframe" style="width: 100%; overflow: hidden;"></iframe>'
],
'allLinkedData' => []
];
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@
'cms' => null,
'code' => null,
'description' => null,
'favicon' => 'https://ssl.gstatic.com/images/branding/product/1x/drive_2020q4_32dp.png',
'favicon' => 'https://drive.google.com/favicon.ico',
'feeds' => [],
'icon' => null,
'image' => null,
'keywords' => [],
'language' => null,
'languages' => [],
'license' => null,
'providerName' => 'Google Docs',
'providerName' => 'Google',
'providerUrl' => 'https://drive.google.com',
'publishedTime' => null,
'redirect' => null,
'title' => 'Entrevista_Rianxo_RadioFusion_150724.mp3',
'title' => null,
'url' => 'https://drive.google.com/file/d/0B2rwN8wAbVSWbmFJdUdnV2VSTTg/view',
'linkedData' => [],
'oEmbed' => [],
Expand Down
Loading