Skip to content

Commit 0e33df9

Browse files
Implement Mediawiki REST API render
1 parent cb4f7a2 commit 0e33df9

14 files changed

+163
-11
lines changed

src/Downloader.ts

+9
Original file line numberDiff line numberDiff line change
@@ -177,13 +177,15 @@ class Downloader {
177177
{ condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href },
178178
{ condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href },
179179
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
180+
{ condition: await MediaWiki.hasMediawikiRestApi(), value: MediaWiki.mediawikiRestApiUrl.href },
180181
])
181182

182183
//* Objects order in array matters!
183184
this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([
184185
{ condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href },
185186
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
186187
{ condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href },
188+
{ condition: await MediaWiki.hasMediawikiRestApi(), value: MediaWiki.mediawikiRestApiUrl.href },
187189
])
188190
} else {
189191
switch (forceRender) {
@@ -208,6 +210,13 @@ class Downloader {
208210
break
209211
}
210212
break
213+
case 'MediawikiRestApi':
214+
if (MediaWiki.hasMediawikiRestApi()) {
215+
this.baseUrl = MediaWiki.mediawikiRestApiUrl.href
216+
this.baseUrlForMainPage = MediaWiki.mediawikiRestApiUrl.href
217+
break
218+
}
219+
break
211220
default:
212221
throw new Error('Unable to find specific API end-point to retrieve article HTML')
213222
}

src/MediaWiki.ts

+20
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import ApiURLDirector from './util/builders/url/api.director.js'
1212
import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js'
1313
import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js'
1414
import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'
15+
import MediawikiRestApiDirector from './util/builders/url/mediawiki-rest-api.director.js'
1516
import { checkApiAvailability } from './util/mw-api.js'
1617
import { BLACKLISTED_NS } from './util/const.js'
1718

@@ -47,6 +48,7 @@ class MediaWiki {
4748
#actionApiPath: string
4849
#restApiPath: string
4950
#modulePathOpt: string
51+
#mediawikiRestApiPath: string
5052
#username: string
5153
#password: string
5254
#domain: string
@@ -55,6 +57,7 @@ class MediaWiki {
5557
private wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector
5658
private wikimediaMobileUrlDirector: WikimediaMobileURLDirector
5759
private visualEditorURLDirector: VisualEditorURLDirector
60+
private mediawikiRestApiDirector: MediawikiRestApiDirector
5861

5962
public visualEditorApiUrl: URL
6063
public actionApiUrl: URL
@@ -63,10 +66,12 @@ class MediaWiki {
6366
public webUrl: URL
6467
public WikimediaDesktopApiUrl: URL
6568
public WikimediaMobileApiUrl: URL
69+
public mediawikiRestApiUrl: URL
6670

6771
#hasWikimediaDesktopApi: boolean | null
6872
#hasWikimediaMobileApi: boolean | null
6973
#hasVisualEditorApi: boolean | null
74+
#hasMediawikiRestApi: boolean | null
7075
#hasCoordinates: boolean | null
7176

7277
set username(value: string) {
@@ -134,6 +139,8 @@ class MediaWiki {
134139

135140
this.#actionApiPath = 'w/api.php'
136141
this.#restApiPath = 'api/rest_v1'
142+
// TODO: there is no CLI param for this api yet
143+
this.#mediawikiRestApiPath = 'w/rest.php/v1/page/'
137144
this.#wikiPath = 'wiki/'
138145
this.#modulePathOpt = 'w/load.php'
139146

@@ -154,6 +161,7 @@ class MediaWiki {
154161
this.#hasWikimediaDesktopApi = null
155162
this.#hasWikimediaMobileApi = null
156163
this.#hasVisualEditorApi = null
164+
this.#hasMediawikiRestApi = null
157165
this.#hasCoordinates = null
158166
}
159167

@@ -185,6 +193,14 @@ class MediaWiki {
185193
return this.#hasVisualEditorApi
186194
}
187195

196+
public async hasMediawikiRestApi(): Promise<boolean> {
197+
if (this.#hasMediawikiRestApi === null) {
198+
this.#hasMediawikiRestApi = await checkApiAvailability(this.mediawikiRestApiDirector.buildArticleURL(this.apiCheckArticleId))
199+
return this.#hasMediawikiRestApi
200+
}
201+
return this.#hasMediawikiRestApi
202+
}
203+
188204
public async hasCoordinates(downloader: Downloader): Promise<boolean> {
189205
if (this.#hasCoordinates === null) {
190206
const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num)
@@ -215,9 +231,12 @@ class MediaWiki {
215231
private initApiURLDirector() {
216232
this.webUrl = this.baseUrlDirector.buildURL(this.#wikiPath)
217233
this.actionApiUrl = this.baseUrlDirector.buildURL(this.#actionApiPath)
234+
// TODO: refactor this workaround once PR#1929 is merged
235+
this.mediawikiRestApiUrl = this.baseUrlDirector.buildURL(this.#mediawikiRestApiPath)
218236
this.apiUrlDirector = new ApiURLDirector(this.actionApiUrl.href)
219237
this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL()
220238
this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href)
239+
this.mediawikiRestApiDirector = new MediawikiRestApiDirector(this.baseUrl.href)
221240
}
222241

223242
public async login(downloader: Downloader) {
@@ -442,6 +461,7 @@ class MediaWiki {
442461
const mwMetaData: MWMetaData = {
443462
webUrl: this.webUrl.href,
444463
actionApiUrl: this.actionApiUrl.href,
464+
mediawikiRestApiPath: this.mediawikiRestApiUrl.href,
445465
modulePathOpt: this.#modulePathOpt,
446466
modulePath: this.modulePath,
447467
mobileModulePath: this.mobileModulePath,

src/mwoffliner.lib.ts

+1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ async function execute(argv: any) {
214214
await MediaWiki.hasWikimediaDesktopApi()
215215
const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi()
216216
await MediaWiki.hasVisualEditorApi()
217+
await MediaWiki.hasMediawikiRestApi()
217218
await downloader.setBaseUrls(forceRender)
218219

219220
RedisStore.setOptions(argv.redis || config.defaults.redisPath)

src/renderers/abstract.renderer.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import {
2222
} from '../util/misc.js'
2323

2424
type renderType = 'auto' | 'desktop' | 'mobile' | 'specific'
25-
type renderName = 'VisualEditor' | 'WikimediaDesktop' | 'WikimediaMobile'
25+
type renderName = 'VisualEditor' | 'WikimediaDesktop' | 'WikimediaMobile' | 'MediawikiRestApi'
2626

2727
interface RendererBuilderOptionsBase {
2828
renderType: renderType
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js'
2+
3+
export class MediawikiRestApiRenderer extends WikimediaDesktopRenderer {
4+
constructor() {
5+
super()
6+
}
7+
}

src/renderers/renderer.builder.ts

+15-3
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@ import { Renderer } from './abstract.renderer.js'
33
import { VisualEditorRenderer } from './visual-editor.renderer.js'
44
import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js'
55
import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js'
6+
import { MediawikiRestApiRenderer } from './mediawiki-rest-api.renderer.js'
67
import { RendererBuilderOptions } from './abstract.renderer.js'
78
import * as logger from './../Logger.js'
89

910
export class RendererBuilder {
1011
public async createRenderer(options: RendererBuilderOptions): Promise<Renderer> {
1112
const { renderType, renderName } = options
1213

13-
const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([
14+
const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi, hasMediawikiRestApi] = await Promise.all([
1415
MediaWiki.hasVisualEditorApi(),
1516
MediaWiki.hasWikimediaDesktopApi(),
1617
MediaWiki.hasWikimediaMobileApi(),
18+
MediaWiki.hasMediawikiRestApi(),
1719
])
1820

1921
switch (renderType) {
@@ -23,6 +25,8 @@ export class RendererBuilder {
2325
return new WikimediaDesktopRenderer()
2426
} else if (hasVisualEditorApi) {
2527
return new VisualEditorRenderer()
28+
} else if (hasMediawikiRestApi) {
29+
return new MediawikiRestApiRenderer()
2630
} else {
2731
logger.error('No available desktop renderer.')
2832
process.exit(1)
@@ -39,6 +43,8 @@ export class RendererBuilder {
3943
return new WikimediaDesktopRenderer()
4044
} else if (hasVisualEditorApi) {
4145
return new VisualEditorRenderer()
46+
} else if (hasMediawikiRestApi) {
47+
return new MediawikiRestApiRenderer()
4248
} else if (hasWikimediaMobileApi) {
4349
return new WikimediaMobileRenderer()
4450
} else {
@@ -47,13 +53,13 @@ export class RendererBuilder {
4753
}
4854
case 'specific':
4955
// renderName argument is required for 'specific' mode
50-
return this.handleSpecificRender(renderName, hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi)
56+
return this.handleSpecificRender(renderName, hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi, hasMediawikiRestApi)
5157
default:
5258
throw new Error(`Unknown render: ${renderType}`)
5359
}
5460
}
5561

56-
private handleSpecificRender(renderName: string, hasVisualEditorApi: boolean, hasWikimediaDesktopApi: boolean, hasWikimediaMobileApi: boolean) {
62+
private handleSpecificRender(renderName: string, hasVisualEditorApi: boolean, hasWikimediaDesktopApi: boolean, hasWikimediaMobileApi: boolean, hasMediawikiRestApi: boolean) {
5763
// renderName argument is required for 'specific' mode
5864
switch (renderName) {
5965
case 'WikimediaDesktop':
@@ -74,6 +80,12 @@ export class RendererBuilder {
7480
}
7581
logger.error('No available mobile renderer.')
7682
process.exit(1)
83+
case 'MediawikiRestApi':
84+
if (hasMediawikiRestApi) {
85+
return new MediawikiRestApiRenderer()
86+
}
87+
logger.error('Cannot create an instance of MediawikiRestApi renderer.')
88+
process.exit(1)
7789
default:
7890
throw new Error(`Unknown renderName for specific mode: ${renderName}`)
7991
}

src/types.d.ts

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ interface MWMetaData {
160160
baseUrl: string
161161
wikiPath: string
162162
actionApiPath: string
163+
mediawikiRestApiPath: string
163164
restApiPath: string
164165
domain: string
165166
webUrl: string
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import urlBuilder from './url.builder.js'
2+
3+
/**
4+
* Interface to build URLs based on MediaWiki REST API URL
5+
*/
6+
export default class MediawikiRestApiURL {
7+
baseDomain: string
8+
9+
constructor(baseDomain: string) {
10+
this.baseDomain = baseDomain
11+
}
12+
13+
buildArticleURL(articleId?: string) {
14+
const base = urlBuilder.setDomain(this.baseDomain).build()
15+
// TODO: refactor this workaround once PR#1929 is merged
16+
return articleId ? `${base}/w/rest.php/v1/page/${articleId}/html` : `${base}/w/rest.php/v1/page/`
17+
}
18+
}

src/util/const.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/
2020
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
2121
export const MAX_FILE_DOWNLOAD_RETRIES = 5
2222
export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853
23-
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'WikimediaMobile']
23+
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'WikimediaMobile', 'MediawikiRestApi']

src/util/saveArticles.ts

+8-4
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,9 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK
131131
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
132132
let rets: any
133133
try {
134-
const articleUrl = getArticleUrl(downloader, dump, articleId)
135134
const isMainPage = dump.isMainPage(articleId)
136135
const renderer = isMainPage ? mainPageRenderer : articlesRenderer
136+
const articleUrl = getArticleUrl(downloader, dump, articleId, renderer)
137137

138138
rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage)
139139
for (const { articleId, html } of rets) {
@@ -224,8 +224,12 @@ async function saveArticle(
224224
}
225225
}
226226

227-
export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: string): string {
228-
return `${dump.isMainPage(articleId) ? downloader.baseUrlForMainPage : downloader.baseUrl}${encodeURIComponent(articleId)}`
227+
// TODO: remove this workaround once PR#1929 is merged
228+
export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: string, renderer?): string {
229+
const baseUrl = dump.isMainPage(articleId) ? downloader.baseUrlForMainPage : downloader.baseUrl
230+
const articleUrl = `${baseUrl}${encodeURIComponent(articleId)}`
231+
232+
return renderer?.constructor?.name === 'MediawikiRestApiRenderer' ? `${articleUrl}/html` : articleUrl
229233
}
230234

231235
/*
@@ -293,9 +297,9 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade
293297

294298
let rets: any
295299
try {
296-
const articleUrl = getArticleUrl(downloader, dump, articleId)
297300
const isMainPage = dump.isMainPage(articleId)
298301
const renderer = isMainPage ? mainPageRenderer : articlesRenderer
302+
const articleUrl = getArticleUrl(downloader, dump, articleId, renderer)
299303

300304
rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage)
301305

test/e2e/openstreetmap.e2e.test.ts

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import { testRenders } from '../testRenders.js'
2+
import domino from 'domino'
3+
import { zimdump } from '../util.js'
4+
import 'dotenv/config.js'
5+
import { jest } from '@jest/globals'
6+
import rimraf from 'rimraf'
7+
8+
jest.setTimeout(60000)
9+
10+
// Check the integrity of img elements between zim file and article html taken from it
11+
const verifyImgElements = (imgFilesArr, imgElements) => {
12+
for (const img of imgElements) {
13+
for (const imgFile of imgFilesArr) {
14+
if (img.getAttribute('src').includes(imgFile)) {
15+
return true
16+
}
17+
}
18+
}
19+
return false
20+
}
21+
22+
const parameters = {
23+
mwUrl: 'https://wiki.openstreetmap.org',
24+
articleList: 'London',
25+
adminEmail: '[email protected]',
26+
}
27+
28+
await testRenders(
29+
parameters,
30+
async (outFiles) => {
31+
const articleFromDump = await zimdump(`show --url A/${parameters.articleList} ${outFiles[0].outFile}`)
32+
describe('e2e test for wiki.openstreetmap.org', () => {
33+
const articleDoc = domino.createDocument(articleFromDump)
34+
// TODO: blocked by issues/1931
35+
/*
36+
test(`test zim integrity for ${outFiles[0]?.renderer} renderer`, async () => {
37+
await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
38+
})
39+
*/
40+
test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => {
41+
expect(articleDoc.querySelector('h1.article-header, h1.pcs-edit-section-title')).toBeTruthy()
42+
})
43+
test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => {
44+
const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`)
45+
const mediaFilesArr = mediaFiles.split('\n')
46+
const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg'))
47+
const imgElements = Array.from(articleDoc.querySelectorAll('img'))
48+
expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true)
49+
})
50+
51+
afterAll(() => {
52+
rimraf.sync(`./${outFiles[0].testId}`)
53+
})
54+
})
55+
},
56+
['MediawikiRestApi'],
57+
)

test/unit/downloader.test.ts

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import domino from 'domino'
1717
import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js'
1818
import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js'
1919
import { WikimediaMobileRenderer } from '../../src/renderers/wikimedia-mobile.renderer.js'
20+
import { MediawikiRestApiRenderer } from '../../src/renderers/mediawiki-rest-api.renderer.js'
2021
import { RENDERERS_LIST } from '../../src/util/const.js'
2122

2223
jest.setTimeout(200000)
@@ -38,6 +39,7 @@ describe('Downloader class', () => {
3839
await MediaWiki.hasWikimediaDesktopApi()
3940
await MediaWiki.hasWikimediaMobileApi()
4041
await MediaWiki.hasVisualEditorApi()
42+
await MediaWiki.hasMediawikiRestApi()
4143
await downloader.setBaseUrls()
4244
})
4345

@@ -225,6 +227,9 @@ describe('Downloader class', () => {
225227
case 'WikimediaMobile':
226228
rendererInstance = new WikimediaMobileRenderer()
227229
break
230+
case 'MediawikiRestApi':
231+
rendererInstance = new MediawikiRestApiRenderer()
232+
break
228233
default:
229234
throw new Error(`Unknown renderer: ${renderer}`)
230235
}

0 commit comments

Comments
 (0)