Skip to content

Commit 04129ff

Browse files
authored
[CybernewsBridge] add LIBXML_NONET and fix lint (#4727)
1 parent d9f0ef1 commit 04129ff

File tree

1 file changed

+10
-14
lines changed

1 file changed

+10
-14
lines changed

bridges/CybernewsBridge.php

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,39 @@ class CybernewsBridge extends BridgeAbstract
1313

1414
public function collectData()
1515
{
16-
$sitemapXml = getContents(self::URI . '/news-sitemap.xml');
16+
$sitemapUrl = self::URI . '/news-sitemap.xml';
1717

18+
$sitemapXml = getContents($sitemapUrl);
1819
if (!$sitemapXml) {
1920
throwServerException('Unable to retrieve Cybernews sitemap');
2021
}
2122

22-
$sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA);
23-
23+
$sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA | LIBXML_NONET);
2424
if (!$sitemap) {
2525
throwServerException('Unable to parse Cybernews sitemap');
2626
}
2727

2828
foreach ($sitemap->url as $entry) {
29-
$url = trim((string) $entry->loc);
30-
$lastmod = trim((string) $entry->lastmod);
29+
$url = trim((string) $entry->loc);
30+
$lastmod = trim((string) $entry->lastmod);
3131

3232
if (!$url) {
3333
continue;
3434
}
3535

36-
$pathParts = explode('/', trim(parse_url($url, PHP_URL_PATH), '/'));
37-
$category = isset($pathParts[0]) && $pathParts[0] !== '' ? $pathParts[0] : '';
36+
$pathParts = explode('/', trim(parse_url($url, PHP_URL_PATH), '/'));
37+
$category = isset($pathParts[0]) && $pathParts[0] !== '' ? $pathParts[0] : '';
3838

3939
// Skip non-English versions
40-
if (in_array($category, ['nl', 'de'], true)) {
41-
continue;
42-
}
40+
// if (in_array($category, ['nl', 'de', 'es', 'it'], true)) {
41+
// continue;
42+
// }
4343

4444
$namespaces = $entry->getNamespaces(true);
4545
$title = '';
4646

4747
if (isset($namespaces['news'])) {
4848
$news = $entry->children($namespaces['news'])->news;
49-
5049
if ($news) {
5150
$title = trim((string) $news->title);
5251
}
@@ -74,18 +73,15 @@ public function collectData()
7473
private function fetchFullArticle(string $url): string
7574
{
7675
$html = getSimpleHTMLDOMCached($url);
77-
7876
if (!$html) {
7977
return 'Unable to fetch article content';
8078
}
8179

8280
$article = $html->find('article', 0);
83-
8481
if (!$article) {
8582
return 'Unable to parse article content';
8683
}
8784

88-
// Remove unnecessary elements
8985
$removeSelectors = [
9086
'script',
9187
'style',

0 commit comments

Comments
 (0)