diff --git a/bridges/CybernewsBridge.php b/bridges/CybernewsBridge.php index 64146f6f913..1a01a8912d2 100644 --- a/bridges/CybernewsBridge.php +++ b/bridges/CybernewsBridge.php @@ -13,40 +13,39 @@ class CybernewsBridge extends BridgeAbstract public function collectData() { - $sitemapXml = getContents(self::URI . '/news-sitemap.xml'); + $sitemapUrl = self::URI . '/news-sitemap.xml'; + $sitemapXml = getContents($sitemapUrl); if (!$sitemapXml) { throwServerException('Unable to retrieve Cybernews sitemap'); } - $sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA); - + $sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA | LIBXML_NONET); if (!$sitemap) { throwServerException('Unable to parse Cybernews sitemap'); } foreach ($sitemap->url as $entry) { - $url = trim((string) $entry->loc); - $lastmod = trim((string) $entry->lastmod); + $url = trim((string) $entry->loc); + $lastmod = trim((string) $entry->lastmod); if (!$url) { continue; } - $pathParts = explode('/', trim(parse_url($url, PHP_URL_PATH), '/')); - $category = isset($pathParts[0]) && $pathParts[0] !== '' ? $pathParts[0] : ''; + $pathParts = explode('/', trim(parse_url($url, PHP_URL_PATH), '/')); + $category = isset($pathParts[0]) && $pathParts[0] !== '' ? $pathParts[0] : ''; // Skip non-English versions - if (in_array($category, ['nl', 'de'], true)) { - continue; - } + // if (in_array($category, ['nl', 'de', 'es', 'it'], true)) { + // continue; + // } $namespaces = $entry->getNamespaces(true); $title = ''; if (isset($namespaces['news'])) { $news = $entry->children($namespaces['news'])->news; - if ($news) { $title = trim((string) $news->title); } @@ -74,18 +73,15 @@ public function collectData() private function fetchFullArticle(string $url): string { $html = getSimpleHTMLDOMCached($url); - if (!$html) { return 'Unable to fetch article content'; } $article = $html->find('article', 0); - if (!$article) { return 'Unable to parse article content'; } - // Remove unnecessary elements $removeSelectors = [ 'script', 'style',