Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logs for API checks #2125

Merged
merged 2 commits into from
Jan 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions src/MediaWiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ class MediaWiki {
public async hasWikimediaDesktopApi(): Promise<boolean> {
if (this.#hasWikimediaDesktopApi === null) {
this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.wikimediaDesktopApiUrl.href)
this.#hasWikimediaDesktopApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId))
const checkUrl = this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)
this.#hasWikimediaDesktopApi = await checkApiAvailability(checkUrl)
logger.log('Checked for WikimediaDesktopApi at', checkUrl, '-- result is: ', this.#hasWikimediaDesktopApi)
return this.#hasWikimediaDesktopApi
}
return this.#hasWikimediaDesktopApi
Expand All @@ -184,7 +186,9 @@ class MediaWiki {
public async hasWikimediaMobileApi(): Promise<boolean> {
if (this.#hasWikimediaMobileApi === null) {
this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.wikimediaMobileApiUrl.href)
this.#hasWikimediaMobileApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId))
const checkUrl = this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)
this.#hasWikimediaMobileApi = await checkApiAvailability(checkUrl)
logger.log('Checked for WikimediaMobileApi at', checkUrl, '-- result is: ', this.#hasWikimediaMobileApi)
return this.#hasWikimediaMobileApi
}
return this.#hasWikimediaMobileApi
Expand All @@ -193,11 +197,9 @@ class MediaWiki {
public async hasVisualEditorApi(): Promise<boolean> {
if (this.#hasVisualEditorApi === null) {
this.visualEditorUrlDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href)
this.#hasVisualEditorApi = await checkApiAvailability(
this.visualEditorUrlDirector.buildArticleURL(this.apiCheckArticleId),
'' /* empty login cookie */,
this.visualEditorUrlDirector.validMimeTypes,
)
const checkUrl = this.visualEditorUrlDirector.buildArticleURL(this.apiCheckArticleId)
this.#hasVisualEditorApi = await checkApiAvailability(checkUrl, '' /* empty login cookie */, this.visualEditorUrlDirector.validMimeTypes)
logger.log('Checked for VisualEditorApi at', checkUrl, '-- result is: ', this.#hasVisualEditorApi)
return this.#hasVisualEditorApi
}
return this.#hasVisualEditorApi
Expand All @@ -206,7 +208,9 @@ class MediaWiki {
public async hasRestApi(): Promise<boolean> {
if (this.#hasRestApi === null) {
this.restApiUrlDirector = new RestApiURLDirector(this.restApiUrl.href)
this.#hasRestApi = await checkApiAvailability(this.restApiUrlDirector.buildArticleURL(this.apiCheckArticleId))
const checkUrl = this.restApiUrlDirector.buildArticleURL(this.apiCheckArticleId)
this.#hasRestApi = await checkApiAvailability(checkUrl)
logger.log('Checked for RestApi at', checkUrl, '-- result is: ', this.#hasRestApi)
return this.#hasRestApi
}
return this.#hasRestApi
Expand Down
2 changes: 2 additions & 0 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@
renderType: hasWikimediaMobileApi ? 'mobile' : 'auto',
})
}
logger.log(`Using ${mainPageRenderer.constructor.name} for main page renderer`)
logger.log(`Using ${articlesRenderer.constructor.name} for articles renderer`)
downloader.setUrlsDirectors(mainPageRenderer, articlesRenderer)

if (dump.customProcessor?.shouldKeepArticle) {
Expand All @@ -264,127 +266,127 @@
const timeout = Math.max(downloader.requestTimeout * 2, 10 * 60 * 1000)

await articleDetailXId.iterateItems(downloader.speed, (articleKeyValuePairs, workerId) => {
return new Promise(async (resolve, reject) => {
/*
* timer to detect freezes
*/
let curStage = 0
let curArticle = ''
const timer = new Timer(() => {
const errorMessage = `Worker timed out at ${stages[curStage]} ${curArticle}`
logger.error(errorMessage)
reject(new Error(errorMessage))
}, timeout)

logger.info(`Worker [${workerId}] processing batch of article ids [${logger.logifyArray(Object.keys(articleKeyValuePairs))}]`)

const parsePromiseQueue: [string, Promise<Error>][] = []

for (const [articleId, articleDetail] of Object.entries(articleKeyValuePairs)) {
timer.reset()
curStage = 0
curArticle = articleId
const promises: [string, Promise<Error>][] = []

const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)

let rets: any
try {
const isMainPage = dump.isMainPage(articleId)
const renderer = isMainPage ? mainPageRenderer : articlesRenderer
const articleUrl = isMainPage ? downloader.getMainPageUrl(articleId) : downloader.getArticleUrl(articleId)

rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage)

for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, moduleDependencies, staticFiles, subtitles } of rets) {
if (!finalHTML) {
logger.warn(`No HTML returned for article [${articleId}], skipping`)
continue
}

curStage += 1
for (const dep of moduleDependencies.jsDependenciesList) {
jsModuleDependencies.add(dep)
}
for (const dep of moduleDependencies.styleDependenciesList) {
cssModuleDependencies.add(dep)
}

for (const file of staticFiles) {
staticFilesList.add(file)
}

jsConfigVars = moduleDependencies.jsConfigVars || ''

/*
* getModuleDependencies and downloader.getArticle are
* network heavy while parsing and saving is I/O.
* To parse and download simultaniously, we don't await on save,
* but instead cache the promise in a queue and check it later
*/
promises.push([articleId, saveArticle(zimCreator, finalHTML, mediaDependencies, subtitles, articleId, articleTitle, articleDetail)])
}
} catch (err) {
dump.status.articles.fail += 1
logger.error(`Error downloading article ${articleId}`)
if ((!err.response || err.response.status !== 404) && err.message !== DELETED_ARTICLE_ERROR) {
reject(cleanupAxiosError(err))
return
}
}

if (parsePromiseQueue.length) {
curStage += 1
const [articleId, parsePromise] = parsePromiseQueue.shift()
curArticle = articleId
/*
* in normal circumstances, where downloading is slower than
* saving, this promise will always be resolved here already
*/
const err = await parsePromise
if (err) {
console.log(err)

logger.error(`Error parsing article ${articleId}`)
timer.clear()
reject(err)
return
}
dump.status.articles.success += 1
}

if (promises.length) {
parsePromiseQueue.push(flattenPromises(promises))
}

if ((dump.status.articles.success + dump.status.articles.fail) % 10 === 0) {
const percentProgress = (((dump.status.articles.success + dump.status.articles.fail) / articlesTotal) * 100).toFixed(1)
if (percentProgress !== prevPercentProgress) {
prevPercentProgress = percentProgress
logger.log(`Progress downloading articles [${dump.status.articles.success + dump.status.articles.fail}/${articlesTotal}] [${percentProgress}%]`)
}
}
}

/*
* clear up potentially still pending promises
*/
curStage += 1
if (parsePromiseQueue.length) {
const [articleId, parsePromise] = flattenPromises(parsePromiseQueue)
curArticle = articleId
const err = await parsePromise
if (err) {
timer.clear()
reject(err)
return
}
dump.status.articles.success += parsePromiseQueue.length
}

timer.clear()
resolve()
})

Check notice on line 389 in src/util/saveArticles.ts

View check run for this annotation

codefactor.io / CodeFactor

src/util/saveArticles.ts#L269-L389

Complex Method
})

logger.log(`Done with downloading a total of [${articlesTotal}] articles`)
Expand Down
Loading