diff --git a/res/templates/page.html b/res/templates/page.html deleted file mode 100644 index 3035a0430..000000000 --- a/res/templates/page.html +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - __ARTICLE_CANONICAL_LINK__ - __ARTICLE_CSS_LIST__ - __CSS_LINKS__ - __JS_SCRIPTS__ - - - -
-
-
- -
-

-
-
-
-
-
-
- __ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ - - - diff --git a/res/templates/pageWikimediaDesktop.html b/res/templates/pageWikimediaDesktop.html new file mode 100644 index 000000000..b2141299a --- /dev/null +++ b/res/templates/pageWikimediaDesktop.html @@ -0,0 +1,24 @@ + + + + + + __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ + __CSS_LINKS__ __JS_SCRIPTS__ + + +
+
+
+ +
+

+
+
+
+
+
+ __ARTICLE_CONFIGVARS_LIST__ + __ARTICLE_JS_LIST__ + + diff --git a/res/templates/pageWikimediaMobile.html b/res/templates/pageWikimediaMobile.html new file mode 100644 index 000000000..79d43efc0 --- /dev/null +++ b/res/templates/pageWikimediaMobile.html @@ -0,0 +1,24 @@ + + + + + + __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ + __CSS_LINKS__ __ARTICLE_JS_LIST__ + + +
+
+
+ +
+

+
+
+
+
+
+ __ARTICLE_CONFIGVARS_LIST__ + __JS_SCRIPTS__ + + diff --git a/res/wm_mobile_override_script.js b/res/wm_mobile_override_script.js new file mode 100644 index 000000000..ce316eff0 --- /dev/null +++ b/res/wm_mobile_override_script.js @@ -0,0 +1,15 @@ +function importScript() { return 1 } // this is to avoid the error from site.js + +window.onload = function () { + // Check if there is a Wikimedia mobile output page + if (document.querySelector('#pcs')) { + const supElements = document.querySelectorAll('sup'); + const linkElements = document.querySelectorAll('a'); + const disabledElems = Array.from(supElements).concat(Array.from(linkElements)) + disabledElems.forEach((elem) => { + elem.addEventListener('click', (event) => { + event.stopPropagation(); + }, true); + }); + } +} diff --git a/res/wm_mobile_override_style.css b/res/wm_mobile_override_style.css new file mode 100644 index 000000000..ab31918bf --- /dev/null +++ b/res/wm_mobile_override_style.css @@ -0,0 +1,20 @@ +body { + margin: 0 auto !important; +} +p#pcs-edit-section-add-title-description { + display: none !important; +} +span.noviewer { + display: none !important; +} +.reference-link::after { + content: none !important; +} +.mw-body h3, .mw-body h2 { + width: auto; +} + +.thumbinner img.pcs-widen-image-override { + width: auto !important; + max-width: 100% !important; +} diff --git a/src/Downloader.ts b/src/Downloader.ts index 8a8ca7122..afb053573 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -87,6 +87,8 @@ class Downloader { public arrayBufferRequestOptions: AxiosRequestConfig public jsonRequestOptions: AxiosRequestConfig public streamRequestOptions: AxiosRequestConfig + public wikimediaMobileJsDependenciesList: string[] = [] + public wikimediaMobileStyleDependenciesList: string[] = [] private readonly uaString: string private activeRequests = 0 @@ -171,21 +173,23 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, ]) } else { switch (forceRender) { case 'WikimediaDesktop': - if (MediaWiki.hasWikimediaDesktopRestApi()) { - this.baseUrl = MediaWiki.desktopRestApiUrl.href - this.baseUrlForMainPage = MediaWiki.desktopRestApiUrl.href + if (MediaWiki.hasWikimediaDesktopApi()) { + this.baseUrl = MediaWiki.WikimediaDesktopApiUrl.href + this.baseUrlForMainPage = MediaWiki.WikimediaDesktopApiUrl.href break } break @@ -196,6 +200,13 @@ class Downloader { break } break + case 'WikimediaMobile': + if (MediaWiki.hasWikimediaMobileApi()) { + this.baseUrl = MediaWiki.WikimediaMobileApiUrl.href + this.baseUrlForMainPage = MediaWiki.WikimediaMobileApiUrl.href + break + } + break default: throw new Error('Unable to find specific API end-point to retrieve article HTML') } @@ -685,7 +696,27 @@ class Downloader { jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view' - return { jsConfigVars, jsDependenciesList, styleDependenciesList } + // Download mobile page dependencies only once + if ((await MediaWiki.hasWikimediaMobileApi()) && this.wikimediaMobileJsDependenciesList.length === 0 && this.wikimediaMobileStyleDependenciesList.length === 0) { + try { + // TODO: An arbitrary title can be placed since all Wikimedia wikis have the same mobile offline resources + const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}Test`) + mobileModulesData.forEach((module: string) => { + if (module.includes('javascript')) { + this.wikimediaMobileJsDependenciesList.push(module) + } else if (module.includes('css')) { + this.wikimediaMobileStyleDependenciesList.push(module) + } + }) + } catch (err) { + throw new Error(`Error getting mobile modules ${err.message}`) + } + } + return { + jsConfigVars, + jsDependenciesList: jsDependenciesList.concat(this.wikimediaMobileJsDependenciesList), + styleDependenciesList: styleDependenciesList.concat(this.wikimediaMobileStyleDependenciesList), + } } // Solution to handle aws js sdk v3 from https://github.com/aws/aws-sdk-js-v3/issues/1877 diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 65ca1055d..60faa856d 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -9,7 +9,8 @@ import semver from 'semver' import basicURLDirector from './util/builders/url/basic.director.js' import BaseURLDirector from './util/builders/url/base.director.js' import ApiURLDirector from './util/builders/url/api.director.js' -import DesktopURLDirector from './util/builders/url/desktop.director.js' +import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js' +import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js' import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' import { checkApiAvailability } from './util/mw-api.js' import { BLACKLISTED_NS } from './util/const.js' @@ -43,23 +44,27 @@ class MediaWiki { public queryOpts: QueryOpts #wikiPath: string - #restApiPath: string + #apiPath: string #username: string #password: string - #apiPath: string + #apiActionPath: string #domain: string private apiUrlDirector: ApiURLDirector - private wikimediaDesktopUrlDirector: DesktopURLDirector - private visualEditorURLDirector: VisualEditorURLDirector + private wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector + private wikimediaMobileUrlDirector: WikimediaMobileURLDirector + private VisualEditorURLDirector: VisualEditorURLDirector public visualEditorApiUrl: URL public apiUrl: URL public modulePath: string // only for reading public _modulePathOpt: string // only for whiting to generate modulePath + public mobileModulePath: string public webUrl: URL - public desktopRestApiUrl: URL + public WikimediaDesktopApiUrl: URL + public WikimediaMobileApiUrl: URL - #hasWikimediaDesktopRestApi: boolean | null + #hasWikimediaDesktopApi: boolean | null + #hasWikimediaMobileApi: boolean | null #hasVisualEditorApi: boolean | null #hasCoordinates: boolean | null @@ -71,12 +76,12 @@ class MediaWiki { this.#password = value } - set apiPath(value: string) { - this.#apiPath = value + set apiActionPath(value: string) { + this.#apiActionPath = value } - set restApiPath(value: string) { - this.#restApiPath = value + set apiPath(value: string) { + this.#apiPath = value } set domain(value: string) { @@ -105,7 +110,7 @@ class MediaWiki { this.namespaces = {} this.namespacesToMirror = [] - this.#apiPath = 'w/api.php' + this.#apiActionPath = 'w/api.php' this.#wikiPath = 'wiki/' this.apiCheckArticleId = 'MediaWiki:Sidebar' @@ -119,7 +124,8 @@ class MediaWiki { formatversion: '2', } - this.#hasWikimediaDesktopRestApi = null + this.#hasWikimediaDesktopApi = null + this.#hasWikimediaMobileApi = null this.#hasVisualEditorApi = null this.#hasCoordinates = null } @@ -128,17 +134,25 @@ class MediaWiki { this.initializeMediaWikiDefaults() } - public async hasWikimediaDesktopRestApi(): Promise { - if (this.#hasWikimediaDesktopRestApi === null) { - this.#hasWikimediaDesktopRestApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) - return this.#hasWikimediaDesktopRestApi + public async hasWikimediaDesktopApi(): Promise { + if (this.#hasWikimediaDesktopApi === null) { + this.#hasWikimediaDesktopApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaDesktopApi + } + return this.#hasWikimediaDesktopApi + } + + public async hasWikimediaMobileApi(): Promise { + if (this.#hasWikimediaMobileApi === null) { + this.#hasWikimediaMobileApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaMobileApi } - return this.#hasWikimediaDesktopRestApi + return this.#hasWikimediaMobileApi } public async hasVisualEditorApi(): Promise { if (this.#hasVisualEditorApi === null) { - this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) + this.#hasVisualEditorApi = await checkApiAvailability(this.VisualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) return this.#hasVisualEditorApi } return this.#hasVisualEditorApi @@ -166,13 +180,16 @@ class MediaWiki { private initMWApis() { const baseUrlDirector = new BaseURLDirector(this.baseUrl.href) this.webUrl = baseUrlDirector.buildURL(this.#wikiPath) - this.apiUrl = baseUrlDirector.buildURL(this.#apiPath) + this.apiUrl = baseUrlDirector.buildURL(this.#apiActionPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(this.#restApiPath) + this.WikimediaDesktopApiUrl = baseUrlDirector.buildWikimediaDesktopApiUrl(this.#apiPath) + this.WikimediaMobileApiUrl = baseUrlDirector.buildWikimediaMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) - this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopRestApiUrl.href) - this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) + this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() + this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.WikimediaDesktopApiUrl.href) + this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.WikimediaMobileApiUrl.href) + this.VisualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } public async login(downloader: Downloader) { @@ -398,10 +415,11 @@ class MediaWiki { webUrl: this.webUrl.href, apiUrl: this.apiUrl.href, modulePath: this.modulePath, + mobileModulePath: this.mobileModulePath, webUrlPath: this.webUrl.pathname, wikiPath: this.#wikiPath, baseUrl: this.baseUrl.href, - apiPath: this.#apiPath, + apiActionPath: this.#apiActionPath, domain: this.#domain, textDir: textDir as TextDirection, diff --git a/src/Templates.ts b/src/Templates.ts index e947c6ac0..ff083684a 100644 --- a/src/Templates.ts +++ b/src/Templates.ts @@ -2,7 +2,6 @@ import swig from 'swig-templates' import pathParser from 'path' import { config } from './config.js' import { readFileSync } from 'fs' -import { genHeaderCSSLink, genHeaderScript } from './util/index.js' import * as path from 'path' import { fileURLToPath } from 'url' @@ -22,21 +21,12 @@ const categoriesTemplate = swig.compile(readTemplate(config.output.templates.cat const subCategoriesTemplate = swig.compile(readTemplate(config.output.templates.subCategories)) const subPagesTemplate = swig.compile(readTemplate(config.output.templates.subPages)) -const htmlTemplateCode = (articleId: string) => { - const cssLinks = config.output.cssResources.reduce((buf, css) => { - return buf + genHeaderCSSLink(config, css, articleId) - }, '') - - const jsScripts = config.output.jsResources.reduce((buf, js) => { - return ( - buf + - (js === 'script' - ? genHeaderScript(config, js, articleId, '', `data-article-id="${articleId.replace(/"/g, '\\\\"')}" id="script-js"`) - : genHeaderScript(config, js, articleId)) - ) - }, '') +const htmlWikimediaMobileTemplateCode = () => { + return readTemplate(config.output.templates.pageWikimediaMobile) +} - return readTemplate(config.output.templates.page).replace('__CSS_LINKS__', cssLinks).replace('__JS_SCRIPTS__', jsScripts) +const htmlWikimediaDesktopTemplateCode = () => { + return readTemplate(config.output.templates.pageWikimediaDesktop) } const articleListHomeTemplate = readTemplate(config.output.templates.articleListHomeTemplate) @@ -46,7 +36,8 @@ export { leadSectionTemplate, sectionTemplate, subSectionTemplate, - htmlTemplateCode, + htmlWikimediaMobileTemplateCode, + htmlWikimediaDesktopTemplateCode, articleListHomeTemplate, categoriesTemplate, subCategoriesTemplate, diff --git a/src/config.ts b/src/config.ts index d30ae9252..1bc3da663 100644 --- a/src/config.ts +++ b/src/config.ts @@ -53,11 +53,15 @@ const config = { }, output: { - // CSS resources added by Kiwix - cssResources: ['style', 'content.parsoid', 'inserted_style'], - mainPageCssResources: ['mobile_main_page'], + // CSS and JS resources added by Kiwix + cssResourcesCommon: ['style', 'mobile_main_page'], + jsResourcesCommon: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min'], - jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], + cssResources: ['content.parsoid', 'inserted_style'], + jsResources: ['../node_modules/details-element-polyfill/dist/details-element-polyfill'], + + wikimediaMobileCssResources: ['wm_mobile_override_style'], + mwMobileJsResources: ['wm_mobile_override_script'], // JS/CSS resources to be imported from MediaWiki mw: { @@ -101,7 +105,8 @@ const config = { * __ARTICLE_CSS_LIST__ ==> list of link tags linking to all the css modules dependencies * __CSS_LINKS__ ==> list of link tags for config.output.cssResources */ - page: './templates/page.html', + pageWikimediaDesktop: './templates/pageWikimediaDesktop.html', + pageWikimediaMobile: './templates/pageWikimediaMobile.html', categories: './templates/categories.html', diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 91e3a8f22..53f214639 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -75,8 +75,8 @@ async function execute(argv: any) { keepEmptyParagraphs, mwUrl, mwWikiPath, + mwActionApiPath, mwApiPath, - mwRestApiPath, mwModulePath, mwDomain, mwUsername, @@ -158,8 +158,8 @@ async function execute(argv: any) { /* Wikipedia/... URL; Normalize by adding trailing / as necessary */ MediaWiki.base = mwUrl MediaWiki.getCategories = !!argv.getCategories + MediaWiki.apiActionPath = mwActionApiPath MediaWiki.apiPath = mwApiPath - MediaWiki.restApiPath = mwRestApiPath MediaWiki.modulePathOpt = mwModulePath MediaWiki.domain = mwDomain MediaWiki.password = mwPassword @@ -211,14 +211,14 @@ async function execute(argv: any) { MediaWiki.apiCheckArticleId = mwMetaData.mainPage await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() + const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls(forceRender) RedisStore.setOptions(argv.redis || config.defaults.redisPath) await RedisStore.connect() const { articleDetailXId, filesToDownloadXPath, filesToRetryXPath, redirectsXId } = RedisStore - await downloader.setBaseUrls(forceRender) // Output directory const outputDirectory = path.isAbsolute(_outputDirectory || '') ? _outputDirectory : path.join(process.cwd(), _outputDirectory || 'out') await mkdirPromise(outputDirectory) @@ -398,9 +398,6 @@ async function execute(argv: any) { }) zimCreator.addArticle(scraperArticle) - logger.info('Copying Static Resource Files') - await saveStaticFiles(config, zimCreator) - logger.info('Finding stylesheets to download') const stylesheetsToGet = await dump.getRelevantStylesheetUrls(downloader) logger.log(`Found [${stylesheetsToGet.length}] stylesheets to download`) @@ -420,12 +417,15 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, forceRender) + const { jsModuleDependencies, cssModuleDependencies, staticFilesList } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`) + logger.info('Copying Static Resource Files') + await saveStaticFiles(staticFilesList, zimCreator) + const allDependenciesWithType = [ { type: 'js', moduleList: Array.from(jsModuleDependencies) }, { type: 'css', moduleList: Array.from(cssModuleDependencies) }, diff --git a/src/parameterList.ts b/src/parameterList.ts index 6f31e7973..eee869e40 100644 --- a/src/parameterList.ts +++ b/src/parameterList.ts @@ -17,8 +17,8 @@ export const parameterDescriptions = { 'Specify a flavour for the scraping. If missing, scrape all article contents. Each --format argument will cause a new local file to be created but options can be combined. Supported options are:\n * novid: no video & audio content\n * nopic: no pictures (implies "novid")\n * nopdf: no PDF files\n * nodet: only the first/head paragraph (implies "novid")\nFormat names can also be aliased using a ":"\nExample: "... --format=nopic:mini --format=novid,nopdf"', keepEmptyParagraphs: 'Keep all paragraphs, even empty ones.', mwWikiPath: 'Mediawiki wiki base path (per default "/wiki/")', - mwApiPath: 'Mediawiki API path (per default "/w/api.php")', - mwRestApiPath: 'Mediawiki Rest API path (per default "/api/rest_v1")', + mwActionApiPath: 'Mediawiki action API path (per default "/w/api.php")', + mwApiPath: 'Mediawiki Rest API path (per default "/api/rest_v1")', mwModulePath: 'Mediawiki module load path (per default "/w/load.php")', mwDomain: 'Mediawiki user domain (thought for private wikis)', mwUsername: 'Mediawiki username (thought for private wikis)', diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index a6522ca09..c4bbfebee 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -9,7 +9,7 @@ import DU from '../DOMUtils.js' import { config } from '../config.js' import { Dump } from '../Dump.js' import { rewriteUrlsOfDoc } from '../util/rewriteUrls.js' -import { footerTemplate, htmlTemplateCode } from '../Templates.js' +import { footerTemplate } from '../Templates.js' import { getFullUrl, getMediaBase, @@ -17,10 +17,8 @@ import { getRelativeFilePath, isWebpCandidateImageMimeType, interpolateTranslationString, - genCanonicalLink, - genHeaderScript, - genHeaderCSSLink, encodeArticleIdForZimHtmlUrl, + getStaticFiles, } from '../util/misc.js' type renderType = 'auto' | 'desktop' | 'mobile' | 'specific' @@ -58,12 +56,21 @@ export interface RenderSingleOutput { displayTitle: string html: string mediaDependencies: any + moduleDependencies: any + staticFiles: string[] subtitles: any } export type RenderOutput = RenderSingleOutput[] export abstract class Renderer { + public staticFilesListCommon: string[] = [] + constructor() { + if (this.staticFilesListCommon.length === 0) { + this.staticFilesListCommon = getStaticFiles(config.output.jsResourcesCommon, config.output.cssResourcesCommon) + } + } + protected async treatVideo( dump: Dump, srcCache: KVS, @@ -386,7 +393,8 @@ export abstract class Renderer { return thumbDiv } - public async processHtml(html: string, dump: Dump, articleId: string, articleDetail: any, _moduleDependencies: any, webp: boolean) { + // TODO: The first part of this method is common for all renders + public async processHtml(html: string, dump: Dump, articleId: string, articleDetail: any, _moduleDependencies: any, webp: boolean, callback) { let mediaDependencies: Array<{ url: string; path: string }> = [] let subtitles: Array<{ url: string; path: string }> = [] let doc = domino.createDocument(html) @@ -430,7 +438,8 @@ export abstract class Renderer { doc = await dump.customProcessor.preProcessArticle(articleId, doc) } - let templatedDoc = await this.templateArticle(doc, _moduleDependencies, dump, articleId, articleDetail, RedisStore.articleDetailXId) + let templatedDoc = callback(_moduleDependencies, articleId) + templatedDoc = await this.mergeTemplateDoc(templatedDoc, doc, dump, articleDetail, RedisStore.articleDetailXId, articleId) if (dump.customProcessor && dump.customProcessor.postProcessArticle) { templatedDoc = await dump.customProcessor.postProcessArticle(articleId, templatedDoc) @@ -458,36 +467,14 @@ export abstract class Renderer { } } - private async templateArticle( + private async mergeTemplateDoc( + htmlTemplateDoc: DominoElement, parsoidDoc: DominoElement, - moduleDependencies: any, dump: Dump, - articleId: string, articleDetail: ArticleDetail, articleDetailXId: RKVS, - ): Promise { - const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as { - jsConfigVars: string | RegExpExecArray - jsDependenciesList: string[] - styleDependenciesList: string[] - } - - const htmlTemplateDoc = domino.createDocument( - htmlTemplateCode(articleId) - .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) - .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') - .replace( - '__ARTICLE_JS_LIST__', - jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', - ) - .replace( - '__ARTICLE_CSS_LIST__', - styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ), - ) - + articleId: string, + ) { /* Create final document by merging template and parsoid documents */ htmlTemplateDoc.getElementById('mw-content-text').style.setProperty('direction', dump.mwMetaData.textDir) htmlTemplateDoc.getElementById('mw-content-text').innerHTML = parsoidDoc.getElementsByTagName('body')[0].innerHTML diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts new file mode 100644 index 000000000..3fac4ffe8 --- /dev/null +++ b/src/renderers/abstractDesktop.render.ts @@ -0,0 +1,73 @@ +import * as domino from 'domino' +import { Renderer } from './abstract.renderer.js' +import { getStaticFiles } from '../util/misc.js' +import { config } from '../config.js' +import MediaWiki from '../MediaWiki.js' + +import { htmlWikimediaDesktopTemplateCode } from '../Templates.js' +import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' + +export abstract class DesktopRenderer extends Renderer { + public staticFilesListDesktop: string[] = [] + constructor() { + super() + this.staticFilesListDesktop = this.staticFilesListCommon.concat(getStaticFiles(config.output.jsResources, config.output.cssResources)) + } + + public filterWikimediaDesktopModules(_moduleDependencies) { + const { jsConfigVars, jsDependenciesList, styleDependenciesList } = _moduleDependencies as { + jsConfigVars: string + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const wikimediaDesktopJsModuleDependencies = jsDependenciesList.filter((item) => !item.includes('javascript/mobile')) + const wikimediaDesktopCssModuleDependencies = styleDependenciesList.filter((item) => !item.includes('css/mobile')) + + const wikimediaDesktopModuleDependencies = { + jsConfigVars, + jsDependenciesList: wikimediaDesktopJsModuleDependencies, + styleDependenciesList: wikimediaDesktopCssModuleDependencies, + } + + return wikimediaDesktopModuleDependencies + } + + public templateDesktopArticle(moduleDependencies: any, articleId: string): Document { + const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as { + jsConfigVars + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const cssLinks = config.output.cssResources.reduce((buf, css) => { + return buf + genHeaderCSSLink(config, css, articleId) + }, '') + + const jsScripts = config.output.jsResources.reduce((buf, js) => { + return ( + buf + + (js === 'script' + ? genHeaderScript(config, js, articleId, '', `data-article-id="${articleId.replace(/"/g, '\\\\"')}" id="script-js"`) + : genHeaderScript(config, js, articleId)) + ) + }, '') + + const htmlTemplateString = htmlWikimediaDesktopTemplateCode() + .replace('__CSS_LINKS__', cssLinks) + .replace('__JS_SCRIPTS__', jsScripts) + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') + .replace( + '__ARTICLE_JS_LIST__', + jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', + ) + .replace( + '__ARTICLE_CSS_LIST__', + styleDependenciesList.length !== 0 ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', + ) + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) + return htmlTemplateDoc + } +} diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts new file mode 100644 index 000000000..b771bd0b7 --- /dev/null +++ b/src/renderers/abstractMobile.render.ts @@ -0,0 +1,64 @@ +import * as domino from 'domino' +import { Renderer } from './abstract.renderer.js' +import { getStaticFiles } from '../util/misc.js' +import { config } from '../config.js' +import MediaWiki from '../MediaWiki.js' + +import { htmlWikimediaMobileTemplateCode } from '../Templates.js' +import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' + +export abstract class MobileRenderer extends Renderer { + public staticFilesListMobile: string[] = [] + constructor() { + super() + this.staticFilesListMobile = this.staticFilesListCommon.concat(getStaticFiles(config.output.mwMobileJsResources, config.output.wikimediaMobileCssResources)) + } + + public filterWikimediaMobileModules(_moduleDependencies) { + const { jsDependenciesList, styleDependenciesList } = _moduleDependencies as { + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const wikimediaMobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile')) + const wikimediaMobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) + + const wikimediaMobileModuleDependencies = { + jsDependenciesList: wikimediaMobileJsModuleDependencies, + styleDependenciesList: wikimediaMobileCssModuleDependencies, + } + + return wikimediaMobileModuleDependencies + } + + private genWikimediaMobileOverrideCSSLink(css: string) { + return `` + } + + private genWikimediaMobileOverrideScript(js: string) { + return `` + } + + public templateMobileArticle(moduleDependencies: any, articleId: string): Document { + const { jsDependenciesList, styleDependenciesList } = moduleDependencies + + const htmlTemplateString = htmlWikimediaMobileTemplateCode() + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', '') + .replace('__JS_SCRIPTS__', this.genWikimediaMobileOverrideScript(config.output.mwMobileJsResources[0])) + .replace('__CSS_LINKS__', this.genWikimediaMobileOverrideCSSLink(config.output.wikimediaMobileCssResources[0])) + .replace( + '__ARTICLE_JS_LIST__', + jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep: string) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', + ) + .replace( + '__ARTICLE_CSS_LIST__', + styleDependenciesList.length !== 0 + ? styleDependenciesList.map((oneCssDep: string) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) + return htmlTemplateDoc + } +} diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index dc6ed04ee..cffb1cabb 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -2,6 +2,7 @@ import MediaWiki from './../MediaWiki.js' import { Renderer } from './abstract.renderer.js' import { VisualEditorRenderer } from './visual-editor.renderer.js' import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js' +import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js' import { RendererBuilderOptions } from './abstract.renderer.js' import * as logger from './../Logger.js' @@ -9,11 +10,15 @@ export class RendererBuilder { public async createRenderer(options: RendererBuilderOptions): Promise { const { renderType, renderName } = options - const [hasVisualEditorApi, hasWikimediaDesktopRestApi] = await Promise.all([MediaWiki.hasVisualEditorApi(), MediaWiki.hasWikimediaDesktopRestApi()]) + const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([ + MediaWiki.hasVisualEditorApi(), + MediaWiki.hasWikimediaDesktopApi(), + MediaWiki.hasWikimediaMobileApi(), + ]) switch (renderType) { case 'desktop': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { @@ -23,14 +28,19 @@ export class RendererBuilder { process.exit(1) } case 'mobile': - // TODO: return WikimediaMobile renderer - break + if (hasWikimediaMobileApi) { + return new WikimediaMobileRenderer() + } + logger.error('No available mobile renderer.') + process.exit(1) case 'auto': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { return new VisualEditorRenderer() + } else if (hasWikimediaMobileApi) { + return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') process.exit(1) @@ -39,7 +49,7 @@ export class RendererBuilder { // renderName argument is required for 'specific' mode switch (renderName) { case 'WikimediaDesktop': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { return new WikimediaDesktopRenderer() } logger.error('Cannot create an instance of WikimediaDesktop renderer.') @@ -51,8 +61,11 @@ export class RendererBuilder { logger.error('Cannot create an instance of VisualEditor renderer.') process.exit(1) case 'WikimediaMobile': - // TODO: return WikimediaMobile renderer - return + if (hasWikimediaMobileApi) { + return new WikimediaMobileRenderer() + } + logger.error('No available mobile renderer.') + process.exit(1) default: throw new Error(`Unknown renderName for specific mode: ${renderName}`) } diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index 4fb505d2d..1aa28069e 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -1,6 +1,6 @@ -import { DELETED_ARTICLE_ERROR } from '../util/const.js' import * as logger from '../Logger.js' -import { Renderer } from './abstract.renderer.js' +import { DELETED_ARTICLE_ERROR } from '../util/const.js' +import { DesktopRenderer } from './abstractDesktop.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' @@ -9,7 +9,7 @@ Represent 'https://{wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=h or 'https://{3rd-part-wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&page={title}' */ -export class VisualEditorRenderer extends Renderer { +export class VisualEditorRenderer extends DesktopRenderer { constructor() { super() } @@ -55,12 +55,23 @@ export class VisualEditorRenderer extends Renderer { const { articleId, articleDetail, webp, _moduleDependencies, dump } = renderOpts const { html, displayTitle } = await this.retrieveHtml(renderOpts) if (html) { - const { finalHTML, mediaDependencies, subtitles } = await super.processHtml(html, dump, articleId, articleDetail, _moduleDependencies, webp) + const moduleDependenciesFiltered = super.filterWikimediaDesktopModules(_moduleDependencies) + const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( + html, + dump, + articleId, + articleDetail, + moduleDependenciesFiltered, + webp, + super.templateDesktopArticle.bind(this), + ) result.push({ articleId, displayTitle, html: finalHTML, mediaDependencies, + moduleDependencies: moduleDependenciesFiltered, + staticFiles: this.staticFilesListDesktop, subtitles, }) return result diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 9bba2c3cb..91113cf88 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -1,10 +1,10 @@ import domino from 'domino' -import { Renderer } from './abstract.renderer.js' +import { DesktopRenderer } from './abstractDesktop.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' -export class WikimediaDesktopRenderer extends Renderer { +export class WikimediaDesktopRenderer extends DesktopRenderer { constructor() { super() } @@ -35,8 +35,15 @@ export class WikimediaDesktopRenderer extends Renderer { public async render(renderOpts: RenderOpts): Promise { const result: RenderOutput = [] const { data, articleId, articleDetailXId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + + if (!data) { + throw new Error(`Cannot render [${data}] into an article`) + } + const articleDetail = await renderOpts.articleDetailXId.get(articleId) + const moduleDependenciesFiltered = super.filterWikimediaDesktopModules(_moduleDependencies) + // Paginate when there are more than 200 subCategories const numberOfPagesToSplitInto = Math.max(Math.ceil((articleDetail.subCategories || []).length / 200), 1) @@ -46,13 +53,23 @@ export class WikimediaDesktopRenderer extends Renderer { if (!isMainPage) { dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) } - const { finalHTML, mediaDependencies, subtitles } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( + dataWithHeader || data, + dump, + articleId, + articleDetail, + moduleDependenciesFiltered, + webp, + super.templateDesktopArticle.bind(this), + ) result.push({ articleId: _articleId, displayTitle: (strippedTitle || articleId.replace(/_/g, ' ')) + (i === 0 ? '' : `/${i}`), html: finalHTML, mediaDependencies, + moduleDependencies: moduleDependenciesFiltered, + staticFiles: this.staticFilesListDesktop, subtitles, }) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts new file mode 100644 index 000000000..1697f4935 --- /dev/null +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -0,0 +1,151 @@ +import * as domino from 'domino' +import * as logger from '../Logger.js' +import { MobileRenderer } from './abstractMobile.render.js' +import { getStrippedTitleFromHtml } from '../util/misc.js' +import { RenderOpts, RenderOutput } from './abstract.renderer.js' + +type PipeFunction = (value: DominoElement) => DominoElement | Promise + +// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' +export class WikimediaMobileRenderer extends MobileRenderer { + constructor() { + super() + } + + private getStrippedTitle(renderOpts: RenderOpts): string { + const { data, articleId } = renderOpts + + const strippedTitle = getStrippedTitleFromHtml(data) + return strippedTitle || articleId.replace('_', ' ') + } + + public async render(renderOpts: RenderOpts): Promise { + try { + const result: RenderOutput = [] + const { data, articleId, webp, _moduleDependencies, dump } = renderOpts + const articleDetail = await renderOpts.articleDetailXId.get(articleId) + + const displayTitle = this.getStrippedTitle(renderOpts) + if (data) { + const moduleDependenciesFiltered = super.filterWikimediaMobileModules(_moduleDependencies) + let mediaDependenciesVal + let subtitlesVal + const mobileHTML = domino.createDocument(data) + const finalHTMLMobile = await this.pipeMobileTransformations( + mobileHTML, + this.convertLazyLoadToImages, + this.removeEditContainer, + this.removeHiddenClass, + async (doc) => { + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml( + doc.documentElement.outerHTML, + dump, + articleId, + articleDetail, + moduleDependenciesFiltered, + webp, + super.templateMobileArticle.bind(this), + ) + + mediaDependenciesVal = mediaDependencies + subtitlesVal = subtitles + return domino.createDocument(finalHTML) + }, + this.restoreLinkDefaults, + ) + + result.push({ + articleId, + displayTitle, + html: finalHTMLMobile.documentElement.outerHTML, + mediaDependencies: mediaDependenciesVal, + moduleDependencies: moduleDependenciesFiltered, + staticFiles: this.staticFilesListMobile, + subtitles: subtitlesVal, + }) + return result + } else { + throw new Error(`Cannot render [${data}] into an article`) + } + } catch (err) { + logger.error(err.message) + throw new Error(err.message) + } + } + + private async pipeMobileTransformations(value: DominoElement, ...fns: PipeFunction[]): Promise { + let result: DominoElement | Promise = value + for (const fn of fns) { + result = fn(await result) + } + return result + } + + private removeEditContainer(doc: DominoElement) { + const editContainers = doc.querySelectorAll('.pcs-edit-section-link-container') + + editContainers.forEach((elem: DominoElement) => { + elem.remove() + }) + + return doc + } + + private convertLazyLoadToImages(doc: DominoElement) { + const protocol = 'https://' + const spans = doc.querySelectorAll('.pcs-lazy-load-placeholder') + + spans.forEach((span: DominoElement) => { + // Create a new img element + const img = doc.createElement('img') as DominoElement + + // Set the attributes for the img element based on the data attributes in the span + img.src = protocol + span.getAttribute('data-src') + img.setAttribute('decoding', 'async') + img.width = span.getAttribute('data-width') + img.height = span.getAttribute('data-height') + img.className = span.getAttribute('data-class') + + // Replace the span with the img element + span.parentNode.replaceChild(img, span) + }) + + return doc + } + + private removeHiddenClass(doc: DominoElement) { + const pcsSectionHidden = 'pcs-section-hidden' + const hiddenSections = doc.querySelectorAll(`.${pcsSectionHidden}`) + hiddenSections.forEach((section) => { + section.classList.remove(pcsSectionHidden) + }) + return doc + } + + private restoreLinkDefaults(doc: DominoElement) { + const supElements = doc.querySelectorAll('sup') + + Array.from(supElements).forEach((sup: DominoElement) => { + const anchor = doc.createElement('a') + const mwRefLinkTextElement = sup.querySelector('.mw-reflink-text') as DominoElement + + let mwRefLinkText = '' + if (mwRefLinkTextElement) { + mwRefLinkText = mwRefLinkTextElement.textContent || '' + } + + const existedAnchor = sup.querySelector('.reference-link') + + if (existedAnchor?.getAttribute('href')) { + anchor.setAttribute('href', existedAnchor.getAttribute('href')) + } + anchor.className = 'reference-link' + anchor.textContent = mwRefLinkText + + sup.innerHTML = '' + sup.appendChild(anchor) + }) + + return doc + } +} diff --git a/src/types.d.ts b/src/types.d.ts index 9e25f7526..1170a392f 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -159,12 +159,13 @@ interface MWMetaData { baseUrl: string wikiPath: string - apiPath: string + apiActionPath: string domain: string webUrl: string apiUrl: string webUrlPath: string modulePath: string + mobileModulePath: string } interface MWNamespaces { @@ -178,8 +179,8 @@ interface MWNamespaces { interface MWConfig { base: string wikiPath?: string + apiActionPath?: string apiPath?: string - restApiPath?: string domain?: string username?: string password?: string diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index 3aa7ba3a9..6006441b9 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -14,24 +14,38 @@ export default class BaseURLDirector { return urlBuilder.setDomain(this.baseDomain).setPath(path).build(true) } - buildRestApiURL(path?: string) { + buildWikimediaApiURL(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1') .build(true, '/') } - buildDesktopRestApiURL(path?: string) { + buildWikimediaDesktopApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/html') .build(true, '/') } + buildWikimediaMobileApiUrl(path?: string) { + return urlBuilder + .setDomain(this.baseDomain) + .setPath(path ?? 'api/rest_v1/page/mobile-html') + .build(true, '/') + } + buildModuleURL(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'w/load.php') .build(false, '?') } + + buildMobileModuleURL(path?: string) { + return urlBuilder + .setDomain(this.baseDomain) + .setPath(path ?? 'api/rest_v1/page/mobile-html-offline-resources') + .build(false, '/') + } } diff --git a/src/util/builders/url/desktop.director.ts b/src/util/builders/url/desktop.director.ts index 100163aed..2f157debf 100644 --- a/src/util/builders/url/desktop.director.ts +++ b/src/util/builders/url/desktop.director.ts @@ -3,7 +3,7 @@ import urlBuilder from './url.builder.js' /** * Interface to build URLs based on Downloader desktop URL */ -export default class DesktopURLDirector { +export default class WikimediaDesktopURLDirector { baseDomain: string constructor(baseDomain: string) { diff --git a/src/util/builders/url/mobile.director.ts b/src/util/builders/url/mobile.director.ts new file mode 100644 index 000000000..d33dcf9e7 --- /dev/null +++ b/src/util/builders/url/mobile.director.ts @@ -0,0 +1,16 @@ +import urlBuilder from './url.builder.js' + +/** + * Interface to build URLs based on MediaWiki mobile URL + */ +export default class WikimediaMobileURLDirector { + baseDomain: string + + constructor(baseDomain: string) { + this.baseDomain = baseDomain + } + + buildArticleURL(articleId: string) { + return urlBuilder.setDomain(this.baseDomain).setPath(encodeURIComponent(articleId)).build() + } +} diff --git a/src/util/const.ts b/src/util/const.ts index 6c511f56f..b5985ec7f 100644 --- a/src/util/const.ts +++ b/src/util/const.ts @@ -20,4 +20,4 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/ export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js' export const MAX_FILE_DOWNLOAD_RETRIES = 5 export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853 -export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor'] +export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'WikimediaMobile'] diff --git a/src/util/dump.ts b/src/util/dump.ts index 2d6e63c57..0a3963189 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -117,13 +117,19 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: } let apiParameterOnly + let moduleApiUrl: string if (type === 'js') { apiParameterOnly = 'scripts' } else if (type === 'css') { apiParameterOnly = 'styles' } - const moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) + if (!module.includes('javascript/mobile') && !module.includes('css/mobile')) { + moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) + } else { + moduleApiUrl = encodeURI(`https:${module}`) + } + logger.info(`Getting [${type}] module [${moduleApiUrl}]`) const { content } = await downloader.downloadContent(moduleApiUrl) @@ -141,7 +147,16 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: } try { - const articleId = type === 'js' ? jsPath(module, config.output.dirs.mediawiki) : cssPath(module, config.output.dirs.mediawiki) + let articleId + const pathFunctions = { + js: jsPath, + css: cssPath, + } + + const pathFunction = pathFunctions[type] + if (pathFunction) { + articleId = pathFunction(module, config.output.dirs.mediawiki) + } const article = new ZimArticle({ url: articleId, data: text, ns: '-' }) zimCreator.addArticle(article) logger.info(`Saved module [${module}]`) diff --git a/src/util/misc.ts b/src/util/misc.ts index ee31a5f8e..e7d6fb2eb 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -162,27 +162,22 @@ export function interpolateTranslationString(str: string, parameters: { [key: st return newString } -export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { - const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map(async (css) => { - try { - const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${css}.css`)) - const article = new ZimArticle({ url: cssPath(css), data: cssCont, ns: '-' }) +export async function saveStaticFiles(staticFiles: Set, zimCreator: ZimCreator) { + try { + staticFiles.forEach(async (file) => { + const staticFilesContent = await readFilePromise(pathParser.resolve(__dirname, `../../res/${file}`)) + const article = new ZimArticle({ url: file.endsWith('.css') ? cssPath(file) : jsPath(file), data: staticFilesContent, ns: '-' }) zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create ${css} file : ${error}`) - } - }) + }) + } catch (err) { + logger.error(err) + } +} - const jsPromises = config.output.jsResources.map(async (js) => { - try { - const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${js}.js`)) - const article = new ZimArticle({ url: jsPath(js), data: jsCont, ns: '-' }) - zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create ${js} file : ${error}`) - } - }) - return Promise.all([...cssPromises, ...jsPromises]) +export function getStaticFiles(jsStaticFiles: string[], cssStaticFiles: string[]): string[] { + jsStaticFiles = jsStaticFiles.map((jsFile) => jsFile.concat('.js')) + cssStaticFiles = cssStaticFiles.map((cssFile) => cssFile.concat('.css')) + return jsStaticFiles.concat(cssStaticFiles) } export function cssPath(css: string, subDirectory = '') { diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index cf60d2bb0..847ffae82 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -12,7 +12,7 @@ import { config } from '../config.js' import { getSizeFromUrl, cleanupAxiosError } from './misc.js' import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } from './const.js' import urlHelper from './url.helper.js' -import { RendererBuilderOptions, Renderer } from '../renderers/abstract.renderer.js' +import { Renderer } from '../renderers/abstract.renderer.js' import { RendererBuilder } from '../renderers/renderer.builder.js' export async function downloadFiles(fileStore: RKVS, retryStore: RKVS, zimCreator: ZimCreator, dump: Dump, downloader: Downloader, retryCounter = 0) { @@ -129,33 +129,13 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK await articleDetailXId.iterateItems(downloader.speed, async (articleKeyValuePairs) => { for (const [articleId, articleDetail] of Object.entries(articleKeyValuePairs)) { const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) + let rets: any try { const articleUrl = getArticleUrl(downloader, dump, articleId) - let rets: any - if (dump.isMainPage) { - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - mainPageRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - } - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - articlesRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) + const isMainPage = dump.isMainPage(articleId) + const renderer = isMainPage ? mainPageRenderer : articlesRenderer + + rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, html } of rets) { if (!html) { continue @@ -202,11 +182,13 @@ async function saveArticle( try { const filesToDownload: KVS = {} - subtitles.forEach((s) => { - filesToDownload[s.path] = { url: s.url, namespace: '-' } - }) + if (subtitles?.length > 0) { + subtitles.forEach((s) => { + filesToDownload[s.path] = { url: s.url, namespace: '-' } + }) + } - if (mediaDependencies.length) { + if (mediaDependencies && mediaDependencies.length) { const existingVals = await RedisStore.filesToDownloadXPath.getMany(mediaDependencies.map((dep) => dep.path)) for (const dep of mediaDependencies) { @@ -249,9 +231,10 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str /* * Fetch Articles */ -export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, forceRender = null) { +export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() + const staticFilesList = new Set() let jsConfigVars = '' let prevPercentProgress: string const { articleDetailXId } = RedisStore @@ -259,22 +242,23 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade const rendererBuilder = new RendererBuilder() - let rendererBuilderOptions: RendererBuilderOptions + let mainPageRenderer + let articlesRenderer if (forceRender) { - rendererBuilderOptions = { + // All articles and main page will use the same renderer if 'forceRender' is specified + const renderer = await rendererBuilder.createRenderer({ renderType: 'specific', renderName: forceRender, - } + }) + mainPageRenderer = renderer + articlesRenderer = renderer } else { - rendererBuilderOptions = { - renderType: 'auto', - } + mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }) + articlesRenderer = await rendererBuilder.createRenderer({ + renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', + }) } - const mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - // TODO: article renderer will be switched to the mobile mode later - const articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - if (dump.customProcessor?.shouldKeepArticle) { await getAllArticlesToKeep(downloader, articleDetailXId, dump, mainPageRenderer, articlesRenderer) } @@ -310,45 +294,30 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let rets: any try { const articleUrl = getArticleUrl(downloader, dump, articleId) - if (dump.isMainPage) { - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - mainPageRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - } - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - articlesRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - - for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, subtitles } of rets) { + const isMainPage = dump.isMainPage(articleId) + const renderer = isMainPage ? mainPageRenderer : articlesRenderer + + rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) + + for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, moduleDependencies, staticFiles, subtitles } of rets) { if (!finalHTML) { logger.warn(`No HTML returned for article [${articleId}], skipping`) continue } curStage += 1 - for (const dep of _moduleDependencies.jsDependenciesList) { + for (const dep of moduleDependencies.jsDependenciesList) { jsModuleDependencies.add(dep) } - for (const dep of _moduleDependencies.styleDependenciesList) { + for (const dep of moduleDependencies.styleDependenciesList) { cssModuleDependencies.add(dep) } - jsConfigVars = jsConfigVars || _moduleDependencies.jsConfigVars + + for (const file of staticFiles) { + staticFilesList.add(file) + } + + jsConfigVars = moduleDependencies.jsConfigVars || '' /* * getModuleDependencies and downloader.getArticle are @@ -423,10 +392,13 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade logger.log(`Done with downloading a total of [${articlesTotal}] articles`) - const jsConfigVarArticle = new ZimArticle({ url: jsPath('jsConfigVars', config.output.dirs.mediawiki), data: jsConfigVars, ns: '-' }) - zimCreator.addArticle(jsConfigVarArticle) + if (jsConfigVars) { + const jsConfigVarArticle = new ZimArticle({ url: jsPath('jsConfigVars', config.output.dirs.mediawiki), data: jsConfigVars, ns: '-' }) + zimCreator.addArticle(jsConfigVarArticle) + } return { + staticFilesList, jsModuleDependencies, cssModuleDependencies, } diff --git a/test/e2e/articleLists.test.ts b/test/e2e/articleLists.test.ts index 845476812..ad85cc1ce 100644 --- a/test/e2e/articleLists.test.ts +++ b/test/e2e/articleLists.test.ts @@ -22,6 +22,7 @@ describe('articleList', () => { outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', } test('articleList and articleListIgnore check', async () => { diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index e957a3330..e1198ec88 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -18,6 +18,7 @@ describe('bm', () => { outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', } test('Simple articleList', async () => { diff --git a/test/e2e/downloadImage.e2e.test.ts b/test/e2e/downloadImage.e2e.test.ts index cc3c9078d..774d67b05 100644 --- a/test/e2e/downloadImage.e2e.test.ts +++ b/test/e2e/downloadImage.e2e.test.ts @@ -20,6 +20,7 @@ describeIf('Check image downloading from S3 using optimisationCacheUrl parameter articleList: 'Paris', format: ['nodet'], optimisationCacheUrl: process.env.S3_URL, + forceRender: 'WikimediaDesktop', } test('right scrapping from fr.wikipedia.org with optimisationCacheUrl parameter', async () => { diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts index 19f669289..fb333e796 100644 --- a/test/e2e/en.e2e.test.ts +++ b/test/e2e/en.e2e.test.ts @@ -1,6 +1,6 @@ import { testAllRenders } from '../testAllRenders.js' import domino from 'domino' -import { zimdump } from '../util.js' +import { zimdump, zimcheck } from '../util.js' import 'dotenv/config.js' import { jest } from '@jest/globals' import rimraf from 'rimraf' @@ -20,15 +20,18 @@ const verifyImgElements = (imgFilesArr, imgElements) => { } const mwUrl = 'https://en.wikipedia.org' -const articleList = 'User:Kelson/MWoffliner_CI_reference' +const articleList = 'BMW' const format = '' await testAllRenders(mwUrl, articleList, format, async (outFiles) => { const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) describe('e2e test for en.wikipedia.org', () => { const articleDoc = domino.createDocument(articleFromDump) + test(`test zim integrity for ${outFiles[0]?.renderer} renderer`, async () => { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + }) test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => { - expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() + expect(articleDoc.querySelector('h1.article-header, h1.pcs-edit-section-title')).toBeTruthy() }) test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => { const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) diff --git a/test/e2e/en10.e2e.test.ts b/test/e2e/en10.e2e.test.ts index 543fe9017..f62e66774 100644 --- a/test/e2e/en10.e2e.test.ts +++ b/test/e2e/en10.e2e.test.ts @@ -21,6 +21,7 @@ describe('en10', () => { redis: process.env.REDIS, // format: ['nopic', 'novid', 'nopdf', 'nodet'], format: ['nopic', 'nopdf'], + forceRender: 'WikimediaDesktop', } test('Simple articleList', async () => { diff --git a/test/e2e/extra.e2e.test.ts b/test/e2e/extra.e2e.test.ts index 78562f804..6ab70a06b 100644 --- a/test/e2e/extra.e2e.test.ts +++ b/test/e2e/extra.e2e.test.ts @@ -36,6 +36,7 @@ AC/DC` outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', }) // Created 1 outputs diff --git a/test/e2e/mobileRenderFormatParams.test.ts b/test/e2e/mobileRenderFormatParams.test.ts new file mode 100644 index 000000000..826f2273e --- /dev/null +++ b/test/e2e/mobileRenderFormatParams.test.ts @@ -0,0 +1,125 @@ +import 'dotenv/config.js' +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import * as logger from '../../src/Logger.js' +import domino from 'domino' +import rimraf from 'rimraf' +import { execa } from 'execa' +import { jest } from '@jest/globals' +import { zimdumpAvailable, zimdump } from '../util.js' + +jest.setTimeout(200000) + +let zimdumpIsAvailable + +beforeAll(async () => { + zimdumpIsAvailable = await zimdumpAvailable() + if (!zimdumpIsAvailable) { + logger.error('Zimdump not installed, exiting test') + process.exit(1) + } +}) + +async function getOutFiles(testId: string, articleList: string, mwUrl: string, format?: string): Promise { + const parameters = { + mwUrl, + adminEmail: 'mail@mail.com', + outputDirectory: testId, + redis: process.env.REDIS, + articleList, + forceRender: 'WikimediaMobile', + format, + } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles +} + +// TODO: articulate this test with /pull/1898 once merged +describe('Mobile render with multiple format params', () => { + const mwUrl = 'https://en.wikipedia.org' + + test('Test WikimediaMobile with en.wikipedia.org using format:nopic param', async () => { + const articleList = 'BMW' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nopic') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const imgElements = Array.from(articleDoc.querySelectorAll('img')) + + expect(imgElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:nodet param', async () => { + const articleList = 'BMW' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nodet') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const sectionsElements = Array.from(articleDoc.querySelectorAll('section')) + + expect(sectionsElements).toHaveLength(1) + expect(sectionsElements[0].getAttribute('data-mw-section-id')).toEqual('0') + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:novid param to check no video tags', async () => { + const articleList = 'Animation' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'novid') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const videoElements = Array.from(articleDoc.querySelectorAll('video')) + + expect(videoElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:novid param to check no audio tags', async () => { + const articleList = 'English_alphabet' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'novid') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const audioElements = Array.from(articleDoc.querySelectorAll('audio')) + + expect(audioElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + test.skip('Test WikimediaMobile render with en.wikipedia.org using format:nopdf', async () => { + const articleList = 'PDF' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nopdf') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const anchorElements = Array.from(articleDoc.querySelectorAll('a')) + + anchorElements.forEach(() => { + // TODO: Check valid links to pdf source + }) + + rimraf.sync(`./${testId}`) + }) +}) diff --git a/test/e2e/multimediaContent.test.ts b/test/e2e/multimediaContent.test.ts index 511a3280f..f16d5808a 100644 --- a/test/e2e/multimediaContent.test.ts +++ b/test/e2e/multimediaContent.test.ts @@ -18,6 +18,7 @@ describe('Multimedia', () => { outputDirectory: testId, redis: process.env.REDIS, customZimDescription: 'Example of the description', + forceRender: 'WikimediaDesktop', } test('check multimedia content from wikipedia test page', async () => { diff --git a/test/e2e/treatMedia.e2e.test.ts b/test/e2e/treatMedia.e2e.test.ts index 22e045ec8..f5ac7a13c 100644 --- a/test/e2e/treatMedia.e2e.test.ts +++ b/test/e2e/treatMedia.e2e.test.ts @@ -18,6 +18,7 @@ describe('treatment test', () => { articleList, outputDirectory: testId, redis: process.env.REDIS, + forcdRender: 'WikimediaDesktop', } test('media file from hidden element should not be downloaded', async () => { diff --git a/test/e2e/wikisource.e2e.test.ts b/test/e2e/wikisource.e2e.test.ts index 3c8def915..d8c4e11ac 100644 --- a/test/e2e/wikisource.e2e.test.ts +++ b/test/e2e/wikisource.e2e.test.ts @@ -18,6 +18,7 @@ describe('wikisource', () => { redis: process.env.REDIS, format: ['nopic'], noLocalParserFallback: true, + forcdRender: 'WikimediaDesktop', } test('Wikisource List', async () => { diff --git a/test/e2e/zimMetadata.e2e.test.ts b/test/e2e/zimMetadata.e2e.test.ts index cc60ee594..75161c150 100644 --- a/test/e2e/zimMetadata.e2e.test.ts +++ b/test/e2e/zimMetadata.e2e.test.ts @@ -23,6 +23,7 @@ describe('zimMetadata', () => { customZimLongDescription: 'Example of the long description', customZimTitle: 'Example of the title', publisher: 'Example of the publisher', + forcdRender: 'WikimediaDesktop', } test('check all zim metadata using zimdump', async () => { diff --git a/test/unit/builders/url/base.director.test.ts b/test/unit/builders/url/base.director.test.ts index 9282ff8c7..f679d2f9e 100644 --- a/test/unit/builders/url/base.director.test.ts +++ b/test/unit/builders/url/base.director.test.ts @@ -11,29 +11,43 @@ describe('BaseURLDirector', () => { }) }) - describe('buildRestApiURL', () => { + describe('buildWikimediaApiURL', () => { it('should return rest URL with provided path and trailing char at the end', () => { - const url = baseUrlDirector.buildRestApiURL('api/rest_v2') + const url = baseUrlDirector.buildWikimediaApiURL('api/rest_v2') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/') }) it('should return rest URL with default path and trailing char at the end', () => { - const url = baseUrlDirector.buildRestApiURL() + const url = baseUrlDirector.buildWikimediaApiURL() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/') }) }) - describe('buildDesktopRestApiURL', () => { + describe('buildWikimediaMobileApiUrl', () => { + it('should return mobile rest URL with provided path and trailing char', () => { + const url = baseUrlDirector.buildWikimediaMobileApiUrl('api/rest_v2/page/mobile-html') + + expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/mobile-html/') + }) + + it('should return mobile rest URL with default path and trailing char', () => { + const url = baseUrlDirector.buildWikimediaMobileApiUrl() + + expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/') + }) + }) + + describe('buildWikimediaDesktopApiUrl', () => { it('should return a desktop URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildDesktopRestApiURL('api/rest_v2/page/html') + const url = baseUrlDirector.buildWikimediaDesktopApiUrl('api/rest_v2/page/html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/html/') }) it('should return a desktop URL with default path and trailing char', () => { - const url = baseUrlDirector.buildDesktopRestApiURL() + const url = baseUrlDirector.buildWikimediaDesktopApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/html/') }) diff --git a/test/unit/builders/url/desktop.director.test.ts b/test/unit/builders/url/desktop.director.test.ts index 642a07356..5ca6b5d89 100644 --- a/test/unit/builders/url/desktop.director.test.ts +++ b/test/unit/builders/url/desktop.director.test.ts @@ -1,7 +1,7 @@ -import DesktopURLDirector from '../../../../src/util/builders/url/desktop.director.js' +import WikimediaDesktopURLDirector from '../../../../src/util/builders/url/desktop.director.js' -describe('DesktopURLDirector', () => { - const wikimediaDesktopUrlDirector = new DesktopURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/html/') +describe('WikimediaDesktopURLDirector', () => { + const wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/html/') describe('buildArticleURL', () => { it('should return the URL to retrieve a desktop article', () => { diff --git a/test/unit/builders/url/mobile.director.test.ts b/test/unit/builders/url/mobile.director.test.ts new file mode 100644 index 000000000..07dd29ae7 --- /dev/null +++ b/test/unit/builders/url/mobile.director.test.ts @@ -0,0 +1,13 @@ +import WikimediaMobileURLDirector from '../../../../src/util/builders/url/mobile.director.js' + +describe('WikimediaMobileURLDirector', () => { + const mobuleUrlDirector = new WikimediaMobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/') + + describe('buildArticleURL', () => { + it('should return a URL for retrieving mobile article', () => { + const url = mobuleUrlDirector.buildArticleURL('article-123') + + expect(url).toBe('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/article-123') + }) + }) +}) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 153666e05..f4a208f73 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -32,7 +32,8 @@ describe('Downloader class', () => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() }) diff --git a/test/unit/mwApi.test.ts b/test/unit/mwApi.test.ts index 9b73fadd7..4c44f9b2d 100644 --- a/test/unit/mwApi.test.ts +++ b/test/unit/mwApi.test.ts @@ -18,7 +18,7 @@ afterAll(stopRedis) const initMW = async (downloader: Downloader) => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasVisualEditorApi() await MediaWiki.getNamespaces([], downloader) diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index c071eff07..9a6687ee0 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -65,7 +65,7 @@ describe('RendererBuilder', () => { const { MediaWiki } = await setupScrapeClasses() // en wikipedia // Force MediaWiki to have capability for the WikimediaDesktop for test purpose - jest.spyOn(MediaWiki, 'hasWikimediaDesktopRestApi').mockResolvedValue(true) + jest.spyOn(MediaWiki, 'hasWikimediaDesktopApi').mockResolvedValue(true) const rendererBuilderOptions = { MediaWiki, @@ -81,7 +81,8 @@ describe('RendererBuilder', () => { it('should throw an error for unknown RendererAPI in specific mode', async () => { const { downloader, MediaWiki } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index d1644e4be..e539f1fc0 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -21,9 +21,10 @@ describe('saveArticles', () => { test('Article html processing', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') const _articlesDetail = await downloader.getArticleDetailsIds(['London']) const articlesDetail = mwRetToArticleDetail(_articlesDetail) const { articleDetailXId } = RedisStore @@ -44,6 +45,8 @@ describe('saveArticles', () => { } as any, downloader, dump, + true, + 'WikimediaDesktop', ) // Successfully scrapped existent articles @@ -90,6 +93,9 @@ describe('saveArticles', () => { case 'WikimediaDesktop': rendererInstance = new WikimediaDesktopRenderer() break + case 'WikimediaMobile': + rendererInstance = new WikimediaDesktopRenderer() + break default: throw new Error(`Unknown renderer: ${renderer}`) } @@ -127,7 +133,7 @@ describe('saveArticles', () => { test('Load main page and check that it is without header', async () => { const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') const articleId = 'Main_Page' const articleUrl = getArticleUrl(downloader, dump, articleId) const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) @@ -223,7 +229,8 @@ describe('saveArticles', () => { test('--customFlavour', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() class CustomFlavour implements CustomProcessor { @@ -271,6 +278,8 @@ describe('saveArticles', () => { } as any, downloader, dump, + true, + 'WikimediaDesktop', ) const ParisDocument = domino.createDocument(writtenArticles.Paris.bufferData) diff --git a/test/unit/saveStaticFiles.test.ts b/test/unit/saveStaticFiles.test.ts new file mode 100644 index 000000000..ba63f5195 --- /dev/null +++ b/test/unit/saveStaticFiles.test.ts @@ -0,0 +1,48 @@ +import { startRedis, stopRedis } from './bootstrap.js' +import { jest } from '@jest/globals' +import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js' +import { WikimediaMobileRenderer } from '../../src/renderers/wikimedia-mobile.renderer.js' + +jest.setTimeout(10000) + +describe('saveStaticFiles', () => { + beforeAll(startRedis) + afterAll(stopRedis) + + test('Compare desktop static files list', async () => { + const desktopAndCommonStaticFiles = [ + 'script.js', + 'masonry.min.js', + 'article_list_home.js', + 'images_loaded.min.js', + 'style.css', + 'mobile_main_page.css', + '../node_modules/details-element-polyfill/dist/details-element-polyfill.js', + 'content.parsoid.css', + 'inserted_style.css', + ] + + const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() + const staticFilesFromRenderer = wikimediaDesktopRenderer.staticFilesListDesktop + + expect(desktopAndCommonStaticFiles).toEqual(staticFilesFromRenderer) + }) + + test('Compare mobile static files list', async () => { + const mobileAndCommonStatiFiles = [ + 'script.js', + 'masonry.min.js', + 'article_list_home.js', + 'images_loaded.min.js', + 'style.css', + 'mobile_main_page.css', + 'wm_mobile_override_script.js', + 'wm_mobile_override_style.css', + ] + + const wikimediaMobileRenderer = new WikimediaMobileRenderer() + const staticFilesFromRenderer = wikimediaMobileRenderer.staticFilesListMobile + + expect(mobileAndCommonStatiFiles).toEqual(staticFilesFromRenderer) + }) +}) diff --git a/test/unit/treatments/article.treatment.test.ts b/test/unit/treatments/article.treatment.test.ts index ab9a129ba..cbe4d1502 100644 --- a/test/unit/treatments/article.treatment.test.ts +++ b/test/unit/treatments/article.treatment.test.ts @@ -56,6 +56,7 @@ describe('ArticleTreatment', () => { } as any, downloader, dump, + true, ) // Successfully scrapped existent articles diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 4e76d0c7c..03b17b5c5 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -140,9 +140,10 @@ describe('Styles', () => { await RedisStore.redirectsXId.flush() const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') await getArticleIds(downloader, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area']) @@ -159,6 +160,7 @@ describe('Styles', () => { } as any, downloader, dump, + true, ) const html = LondonArticle.bufferData.toString() diff --git a/test/unit/webpAndRedirection.test.ts b/test/unit/webpAndRedirection.test.ts index 4a094a767..33f931504 100644 --- a/test/unit/webpAndRedirection.test.ts +++ b/test/unit/webpAndRedirection.test.ts @@ -35,6 +35,7 @@ Real-time computer graphics` outputDirectory: testId, redis: process.env.REDIS, webp: true, + forceRender: 'WikimediaDesktop', }) const zimFile = new ZimReader(outFiles[0].outFile) diff --git a/test/util.ts b/test/util.ts index c4d6ebc4d..7625cb789 100644 --- a/test/util.ts +++ b/test/util.ts @@ -37,7 +37,8 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() const dump = new Dump(format, {} as any, MediaWiki.metaData)