Skip to content

Commit ad5dc1d

Browse files
authored
Merge pull request #1947 from openzim/mediawiki-rest-api
Implement Mediawiki REST API render
2 parents 23f7084 + 900f58d commit ad5dc1d

22 files changed

+291
-43
lines changed

src/Downloader.ts

+10-5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import urlHelper from './util/url.helper.js'
2727
import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js'
2828
import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js'
2929
import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'
30+
import MediawikiRestApiURLDirector from './util/builders/url/mediawiki-rest-api.director.js'
3031

3132
const imageminOptions = new Map()
3233
imageminOptions.set('default', new Map())
@@ -78,6 +79,7 @@ export const defaultStreamRequestOptions: AxiosRequestConfig = {
7879
method: 'GET',
7980
}
8081

82+
type URLDirector = WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector | MediawikiRestApiURLDirector
8183
/**
8284
* Downloader is a class providing content retrieval functionalities for both Mediawiki and S3 remote instances.
8385
*/
@@ -100,8 +102,9 @@ class Downloader {
100102
private readonly optimisationCacheUrl: string
101103
private s3: S3
102104
private apiUrlDirector: ApiURLDirector
103-
private articleUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector
104-
private mainPageUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector
105+
106+
private articleUrlDirector: URLDirector
107+
private mainPageUrlDirector: URLDirector
105108

106109
constructor({ uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions }: DownloaderOpts) {
107110
this.uaString = uaString
@@ -177,11 +180,13 @@ class Downloader {
177180
private getUrlDirector(renderer: object) {
178181
switch (renderer.constructor.name) {
179182
case 'WikimediaDesktopRenderer':
180-
return new WikimediaDesktopURLDirector(MediaWiki.wikimediaDesktopApiUrl.href)
183+
return MediaWiki.wikimediaDesktopUrlDirector
181184
case 'VisualEditorRenderer':
182-
return new VisualEditorURLDirector(MediaWiki.visualEditorApiUrl.href)
185+
return MediaWiki.visualEditorUrlDirector
183186
case 'WikimediaMobileRenderer':
184-
return new WikimediaMobileURLDirector(MediaWiki.wikimediaMobileApiUrl.href)
187+
return MediaWiki.wikimediaMobileUrlDirector
188+
case 'MediawikiRestApiRenderer':
189+
return MediaWiki.mediawikiRestApiUrlDirector
185190
}
186191
}
187192

src/MediaWiki.ts

+37-10
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import ApiURLDirector from './util/builders/url/api.director.js'
1212
import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js'
1313
import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js'
1414
import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'
15+
import MediawikiRestApiURLDirector from './util/builders/url/mediawiki-rest-api.director.js'
1516
import { checkApiAvailability } from './util/mw-api.js'
1617
import { BLACKLISTED_NS } from './util/const.js'
1718

@@ -48,16 +49,19 @@ class MediaWiki {
4849
#actionApiPath: string
4950
#restApiPath: string
5051
#modulePathOpt: string
52+
#mediawikiRestApiPath: string
5153
#username: string
5254
#password: string
5355
#domain: string
5456

5557
public wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector
5658
public wikimediaMobileUrlDirector: WikimediaMobileURLDirector
57-
public visualEditorURLDirector: VisualEditorURLDirector
59+
public visualEditorUrlDirector: VisualEditorURLDirector
60+
public mediawikiRestApiUrlDirector: MediawikiRestApiURLDirector
5861

5962
public visualEditorApiUrl: URL
6063
public actionApiUrl: URL
64+
public mediawikiRestApiUrl: URL
6165
public webUrl: URL
6266
public wikimediaDesktopApiUrl: URL
6367
public wikimediaMobileApiUrl: URL
@@ -66,12 +70,10 @@ class MediaWiki {
6670
public mobileModulePath: string
6771

6872
#apiUrlDirector: ApiURLDirector
69-
#wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector
70-
#wikimediaMobileUrlDirector: WikimediaMobileURLDirector
71-
#visualEditorURLDirector: VisualEditorURLDirector
7273
#hasWikimediaDesktopApi: boolean | null
7374
#hasWikimediaMobileApi: boolean | null
7475
#hasVisualEditorApi: boolean | null
76+
#hasMediawikiRestApi: boolean | null
7577
#hasCoordinates: boolean | null
7678

7779
set username(value: string) {
@@ -98,6 +100,13 @@ class MediaWiki {
98100
}
99101
}
100102

103+
set mediawikiRestApiPath(value: string) {
104+
if (value) {
105+
this.#mediawikiRestApiPath = value
106+
this.setMediawikiRestApiURL()
107+
}
108+
}
109+
101110
set domain(value: string) {
102111
this.#domain = value
103112
}
@@ -117,6 +126,7 @@ class MediaWiki {
117126
this.actionApiUrl = this.urlDirector.buildURL(this.#actionApiPath)
118127
this.setWikimediaDesktopApiUrl()
119128
this.setWikimediaMobileApiUrl()
129+
this.setMediawikiRestApiURL()
120130
this.setVisualEditorURL()
121131
this.setModuleURL()
122132
this.setMobileModuleUrl()
@@ -143,6 +153,7 @@ class MediaWiki {
143153

144154
this.#actionApiPath = 'w/api.php'
145155
this.#restApiPath = 'api/rest_v1'
156+
this.#mediawikiRestApiPath = 'w/rest.php/v1/page/'
146157
this.#wikiPath = 'wiki/'
147158
this.#modulePathOpt = 'w/load.php'
148159

@@ -163,6 +174,7 @@ class MediaWiki {
163174
this.#hasWikimediaDesktopApi = null
164175
this.#hasWikimediaMobileApi = null
165176
this.#hasVisualEditorApi = null
177+
this.#hasMediawikiRestApi = null
166178
this.#hasCoordinates = null
167179
}
168180

@@ -172,31 +184,40 @@ class MediaWiki {
172184

173185
public async hasWikimediaDesktopApi(): Promise<boolean> {
174186
if (this.#hasWikimediaDesktopApi === null) {
175-
this.#wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.wikimediaDesktopApiUrl.href)
176-
this.#hasWikimediaDesktopApi = await checkApiAvailability(this.#wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId))
187+
this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.wikimediaDesktopApiUrl.href)
188+
this.#hasWikimediaDesktopApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId))
177189
return this.#hasWikimediaDesktopApi
178190
}
179191
return this.#hasWikimediaDesktopApi
180192
}
181193

182194
public async hasWikimediaMobileApi(): Promise<boolean> {
183195
if (this.#hasWikimediaMobileApi === null) {
184-
this.#wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.wikimediaMobileApiUrl.href)
185-
this.#hasWikimediaMobileApi = await checkApiAvailability(this.#wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId))
196+
this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.wikimediaMobileApiUrl.href)
197+
this.#hasWikimediaMobileApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId))
186198
return this.#hasWikimediaMobileApi
187199
}
188200
return this.#hasWikimediaMobileApi
189201
}
190202

191203
public async hasVisualEditorApi(): Promise<boolean> {
192204
if (this.#hasVisualEditorApi === null) {
193-
this.#visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href)
194-
this.#hasVisualEditorApi = await checkApiAvailability(this.#visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId))
205+
this.visualEditorUrlDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href)
206+
this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorUrlDirector.buildArticleURL(this.apiCheckArticleId))
195207
return this.#hasVisualEditorApi
196208
}
197209
return this.#hasVisualEditorApi
198210
}
199211

212+
public async hasMediawikiRestApi(): Promise<boolean> {
213+
if (this.#hasMediawikiRestApi === null) {
214+
this.mediawikiRestApiUrlDirector = new MediawikiRestApiURLDirector(this.mediawikiRestApiUrl.href)
215+
this.#hasMediawikiRestApi = await checkApiAvailability(this.mediawikiRestApiUrlDirector.buildArticleURL(this.apiCheckArticleId))
216+
return this.#hasMediawikiRestApi
217+
}
218+
return this.#hasMediawikiRestApi
219+
}
220+
200221
public async hasCoordinates(downloader: Downloader): Promise<boolean> {
201222
if (this.#hasCoordinates === null) {
202223
const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num)
@@ -224,6 +245,10 @@ class MediaWiki {
224245
this.wikimediaMobileApiUrl = this.urlDirector.buildWikimediaMobileApiUrl(this.#restApiPath)
225246
}
226247

248+
private setMediawikiRestApiURL() {
249+
this.mediawikiRestApiUrl = this.urlDirector.buildMediawikiRestApiUrl(this.#mediawikiRestApiPath)
250+
}
251+
227252
private setVisualEditorURL() {
228253
this.#apiUrlDirector = new ApiURLDirector(this.actionApiUrl.href)
229254
this.visualEditorApiUrl = this.#apiUrlDirector.buildVisualEditorURL()
@@ -459,13 +484,15 @@ class MediaWiki {
459484
const mwMetaData: MWMetaData = {
460485
webUrl: this.webUrl.href,
461486
actionApiUrl: this.actionApiUrl.href,
487+
mediawikiRestApiUrl: this.mediawikiRestApiUrl.href,
462488
modulePathOpt: this.#modulePathOpt,
463489
modulePath: this.modulePath,
464490
mobileModulePath: this.mobileModulePath,
465491
webUrlPath: this.webUrl.pathname,
466492
wikiPath: this.#wikiPath,
467493
baseUrl: this.baseUrl.href,
468494
actionApiPath: this.#actionApiPath,
495+
mediawikiRestApiPath: this.#mediawikiRestApiPath,
469496
restApiPath: this.#restApiPath,
470497
domain: this.#domain,
471498

src/mwoffliner.lib.ts

+3
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ async function execute(argv: any) {
7777
mwWikiPath,
7878
mwActionApiPath,
7979
mwRestApiPath,
80+
mwMediaWikiRestApiPath,
8081
mwModulePath,
8182
mwDomain,
8283
mwUsername,
@@ -161,6 +162,7 @@ async function execute(argv: any) {
161162
MediaWiki.wikiPath = mwWikiPath
162163
MediaWiki.actionApiPath = mwActionApiPath
163164
MediaWiki.restApiPath = mwRestApiPath
165+
MediaWiki.mediawikiRestApiPath = mwMediaWikiRestApiPath
164166
MediaWiki.modulePathOpt = mwModulePath
165167
MediaWiki.domain = mwDomain
166168
MediaWiki.password = mwPassword
@@ -213,6 +215,7 @@ async function execute(argv: any) {
213215
await MediaWiki.hasCoordinates(downloader)
214216
await MediaWiki.hasWikimediaDesktopApi()
215217
const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi()
218+
await MediaWiki.hasMediawikiRestApi()
216219
await MediaWiki.hasVisualEditorApi()
217220

218221
RedisStore.setOptions(argv.redis || config.defaults.redisPath)

src/parameterList.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ export const parameterDescriptions = {
1818
keepEmptyParagraphs: 'Keep all paragraphs, even empty ones.',
1919
mwWikiPath: 'Mediawiki wiki base path (per default "/wiki/")',
2020
mwActionApiPath: 'Mediawiki API path (per default "/w/api.php")',
21-
mwRestApiPath: 'Mediawiki Rest API path (per default "/api/rest_v1")',
21+
mwRestApiPath: 'Wikimedia Rest API path (per default "/api/rest_v1")',
22+
mwMediaWikiRestApiPath: 'Mediawiki Rest API path (per default "w/rest.php/v1/page/")',
2223
mwModulePath: 'Mediawiki module load path (per default "/w/load.php")',
2324
mwDomain: 'Mediawiki user domain (thought for private wikis)',
2425
mwUsername: 'Mediawiki username (thought for private wikis)',

src/renderers/abstract.renderer.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import {
2222
} from '../util/misc.js'
2323

2424
type renderType = 'auto' | 'desktop' | 'mobile' | 'specific'
25-
type renderName = 'VisualEditor' | 'WikimediaDesktop' | 'WikimediaMobile'
25+
type renderName = 'VisualEditor' | 'WikimediaDesktop' | 'WikimediaMobile' | 'MediawikiRestApi'
2626

2727
interface RendererBuilderOptionsBase {
2828
renderType: renderType
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js'
2+
3+
export class MediawikiRestApiRenderer extends WikimediaDesktopRenderer {
4+
constructor() {
5+
super()
6+
}
7+
}

src/renderers/renderer.builder.ts

+15-3
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@ import { Renderer } from './abstract.renderer.js'
33
import { VisualEditorRenderer } from './visual-editor.renderer.js'
44
import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js'
55
import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js'
6+
import { MediawikiRestApiRenderer } from './mediawiki-rest-api.renderer.js'
67
import { RendererBuilderOptions } from './abstract.renderer.js'
78
import * as logger from './../Logger.js'
89

910
export class RendererBuilder {
1011
public async createRenderer(options: RendererBuilderOptions): Promise<Renderer> {
1112
const { renderType, renderName } = options
1213

13-
const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([
14+
const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi, hasMediawikiRestApi] = await Promise.all([
1415
MediaWiki.hasVisualEditorApi(),
1516
MediaWiki.hasWikimediaDesktopApi(),
1617
MediaWiki.hasWikimediaMobileApi(),
18+
MediaWiki.hasMediawikiRestApi(),
1719
])
1820

1921
switch (renderType) {
@@ -23,6 +25,8 @@ export class RendererBuilder {
2325
return new WikimediaDesktopRenderer()
2426
} else if (hasVisualEditorApi) {
2527
return new VisualEditorRenderer()
28+
} else if (hasMediawikiRestApi) {
29+
return new MediawikiRestApiRenderer()
2630
} else {
2731
logger.error('No available desktop renderer.')
2832
process.exit(1)
@@ -39,6 +43,8 @@ export class RendererBuilder {
3943
return new WikimediaDesktopRenderer()
4044
} else if (hasVisualEditorApi) {
4145
return new VisualEditorRenderer()
46+
} else if (hasMediawikiRestApi) {
47+
return new MediawikiRestApiRenderer()
4248
} else if (hasWikimediaMobileApi) {
4349
return new WikimediaMobileRenderer()
4450
} else {
@@ -47,13 +53,13 @@ export class RendererBuilder {
4753
}
4854
case 'specific':
4955
// renderName argument is required for 'specific' mode
50-
return this.handleSpecificRender(renderName, hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi)
56+
return this.handleSpecificRender(renderName, hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi, hasMediawikiRestApi)
5157
default:
5258
throw new Error(`Unknown render: ${renderType}`)
5359
}
5460
}
5561

56-
private handleSpecificRender(renderName: string, hasVisualEditorApi: boolean, hasWikimediaDesktopApi: boolean, hasWikimediaMobileApi: boolean) {
62+
private handleSpecificRender(renderName: string, hasVisualEditorApi: boolean, hasWikimediaDesktopApi: boolean, hasWikimediaMobileApi: boolean, hasMediawikiRestApi: boolean) {
5763
// renderName argument is required for 'specific' mode
5864
switch (renderName) {
5965
case 'WikimediaDesktop':
@@ -74,6 +80,12 @@ export class RendererBuilder {
7480
}
7581
logger.error('No available mobile renderer.')
7682
process.exit(1)
83+
case 'MediawikiRestApi':
84+
if (hasMediawikiRestApi) {
85+
return new MediawikiRestApiRenderer()
86+
}
87+
logger.error('Cannot create an instance of MediawikiRestApi renderer.')
88+
process.exit(1)
7789
default:
7890
throw new Error(`Unknown renderName for specific mode: ${renderName}`)
7991
}

src/types.d.ts

+2
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,12 @@ interface MWMetaData {
160160
baseUrl: string
161161
wikiPath: string
162162
actionApiPath: string
163+
mediawikiRestApiPath: string
163164
restApiPath: string
164165
domain: string
165166
webUrl: string
166167
actionApiUrl: string
168+
mediawikiRestApiUrl: string
167169
webUrlPath: string
168170
modulePath: string
169171
modulePathOpt: string

src/util/builders/url/base.director.ts

+7
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,11 @@ export default class BaseURLDirector {
4848
.setPath(path ?? 'api/rest_v1/page/mobile-html-offline-resources')
4949
.build(false, '/')
5050
}
51+
52+
buildMediawikiRestApiUrl(path?: string) {
53+
return urlBuilder
54+
.setDomain(this.baseDomain)
55+
.setPath(path ?? 'w/rest.php/v1/page/')
56+
.build(true, '/')
57+
}
5158
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import urlBuilder from './url.builder.js'
2+
3+
/**
4+
* Interface to build URLs based on MediaWiki REST API URL
5+
*/
6+
export default class MediawikiRestApiURLDirector {
7+
baseDomain: string
8+
9+
constructor(baseDomain: string) {
10+
this.baseDomain = baseDomain
11+
}
12+
13+
buildArticleURL(articleId: string) {
14+
const base = urlBuilder.setDomain(this.baseDomain).build()
15+
return `${base}${articleId}/html`
16+
}
17+
}

src/util/const.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,9 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/
2020
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
2121
export const MAX_FILE_DOWNLOAD_RETRIES = 5
2222
export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853
23-
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'WikimediaMobile']
23+
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'WikimediaMobile', 'MediawikiRestApi']
24+
/*
25+
Handle redirection pages for 3rd party wikis that have 200 response code
26+
Check this link: https://pokemon.fandom.com/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&formatversion=2&page=MediaWiki%3ASidebar
27+
*/
28+
export const REDIRECT_PAGE_SIGNATURE = 'Moved to'

src/util/mw-api.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import Timer from './Timer.js'
66
import axios from 'axios'
77
import RedisStore from '../RedisStore.js'
88
import MediaWiki from '../MediaWiki.js'
9+
import { REDIRECT_PAGE_SIGNATURE } from './const.js'
910

1011
export async function getArticlesByIds(articleIds: string[], downloader: Downloader, log = true): Promise<void> {
1112
let from = 0
@@ -260,7 +261,11 @@ export function mwRetToArticleDetail(obj: QueryMwRet): KVS<ArticleDetail> {
260261
export async function checkApiAvailability(url: string, loginCookie = ''): Promise<boolean> {
261262
try {
262263
const resp = await axios.get(decodeURI(url), { maxRedirects: 0, headers: { cookie: loginCookie } })
263-
return resp.status === 200 && !resp.headers['mediawiki-api-error']
264+
265+
const isRedirectPage = typeof resp.data === 'string' && resp.data.startsWith(REDIRECT_PAGE_SIGNATURE)
266+
const isSuccess = resp.status === 200 && !resp.headers['mediawiki-api-error']
267+
268+
return !isRedirectPage && isSuccess
264269
} catch (err) {
265270
return false
266271
}

0 commit comments

Comments
 (0)