diff --git a/plugin/js/parsers/KemonopartyParser.js b/plugin/js/parsers/KemonopartyParser.js index 2f2735ef..a6b4ff32 100644 --- a/plugin/js/parsers/KemonopartyParser.js +++ b/plugin/js/parsers/KemonopartyParser.js @@ -15,6 +15,61 @@ class KemonopartyParser extends Parser{ )).reverse(); }; + async getChapterUrlsFromMultipleTocPages(dom, extractPartialChapterList, getUrlsOfTocPages, chapterUrlsUI) { + let urlsOfTocPages = getUrlsOfTocPages(dom); + return await this.getChaptersFromAllTocPages([], extractPartialChapterList, urlsOfTocPages, chapterUrlsUI); + } + + async fetchChapter(url) { + return (new TextDecoder().decode((await HttpClient.wrapFetch(url)).arrayBuffer)); + } + + async getChaptersFromAllTocPages(chapters, extractPartialChapterList, urlsOfTocPages, chapterUrlsUI) { + if (0 < chapters.length) { + chapterUrlsUI.showTocProgress(chapters); + } + for(let url of urlsOfTocPages) { + await this.rateLimitDelay(); + let json_string = await this.fetchChapter(url); + let partialList = extractPartialChapterList(json_string); + chapterUrlsUI.showTocProgress(partialList); + chapters = chapters.concat(partialList); + } + return chapters; + } + + async fetchWebPageContent(webPage) { + let that = this; + ChapterUrlsUI.showDownloadState(webPage.row, ChapterUrlsUI.DOWNLOAD_STATE_SLEEPING); + await this.rateLimitDelay(); + ChapterUrlsUI.showDownloadState(webPage.row, ChapterUrlsUI.DOWNLOAD_STATE_DOWNLOADING); + let pageParser = webPage.parser; + return pageParser.fetchChapter(webPage.sourceUrl).then(function (json_string) { + delete webPage.error; + webPage.rawDom = Document.parseHTMLUnsafe(``); + let content = pageParser.findContent(webPage.rawDom); + if (content == null) { + let errorMsg = chrome.i18n.getMessage("errorContentNotFound", [webPage.sourceUrl]); + throw new Error(errorMsg); + } + return pageParser.fetchImagesUsedInDocument(content, webPage); + }).catch(function (error) { + if (that.userPreferences.skipChaptersThatFailFetch.value) { + ErrorLog.log(error); + webPage.error = error; + } else { + webPage.isIncludeable = false; + throw error; + } + }); + } + + findCoverImageUrl(dom) { + let cover = dom.querySelector(".user-header__avatar img"); + return cover.src ?? null; + } + + getUrlsOfTocPages(dom) { let urls = []; let paginator = dom.querySelector("div.paginator menu"); @@ -22,31 +77,44 @@ class KemonopartyParser extends Parser{ return urls; } let pages = [...paginator.querySelectorAll("a:not(.next)")]; + // add /api/v1/ right after the domain name + pages[pages.length - 1].href = pages[pages.length - 1].href.replace("https://kemono.su", "https://kemono.su/api/v1"); + // add /posts-legacy right before the query string + pages[pages.length - 1].href = pages[pages.length - 1].href.replace("?", "/posts-legacy?"); let url = new URL(pages[pages.length - 1]); let lastPageOffset = url.searchParams.get("o"); - for(let i = 50; i <= lastPageOffset; i += 50) { + for(let i = 0; i <= lastPageOffset; i += 50) { url.searchParams.set("o", i); urls.push(url.href); } return urls; } - - extractPartialChapterList(dom) { - let links = [...dom.querySelectorAll(".card-list__items a")]; - return links.map(l => ({ - sourceUrl: l.href, - title: l.querySelector("header").textContent.trim() - })); - } - preprocessRawDom(webPageDom) { - util.removeChildElementsMatchingCss(webPageDom, ".ad-container"); - this.copyImagesIntoContent(webPageDom); + extractPartialChapterList(json_string) { + // get href from the dom, not the url of the page + try { + let data = JSON.parse(json_string); + let authorid = data.props.id; + let ids = data.results.map(result => result.id); + let titles = data.results.map(result => result.title); + let urls = ids.map(id => `https://kemono.su/api/v1/patreon/user/${authorid}/post/${id}`); + return urls.map((url, i) => ({ + sourceUrl: url, + title: titles[i] + })); + } catch (e) { + return []; + } } findContent(dom) { //the text of the chapter is always in .post__content, but if there is no chapter(e.g. only files), return .post__body instead of throwing an error - return dom.querySelector(".post__content") ?? dom.querySelector(".post__body"); + // return dom.querySelector(".post__content") ?? dom.querySelector(".post__body"); + let data = JSON.parse(dom.querySelector("script").innerHTML); + // create a dom element from data.post.content; + let content_dom = document.createElement("div"); + content_dom.innerHTML = data.post.content; + return content_dom; } copyImagesIntoContent(dom) { @@ -62,6 +130,6 @@ class KemonopartyParser extends Parser{ } findChapterTitle(dom) { - return dom.querySelector("h1.post__title > span"); + return JSON.parse(dom.querySelector("script").innerHTML).post.title; } }