Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Kemono.su #1589

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 82 additions & 14 deletions plugin/js/parsers/KemonopartyParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,38 +15,106 @@ class KemonopartyParser extends Parser{
)).reverse();
};

async getChapterUrlsFromMultipleTocPages(dom, extractPartialChapterList, getUrlsOfTocPages, chapterUrlsUI) {
let urlsOfTocPages = getUrlsOfTocPages(dom);
return await this.getChaptersFromAllTocPages([], extractPartialChapterList, urlsOfTocPages, chapterUrlsUI);
}

async fetchChapter(url) {
return (new TextDecoder().decode((await HttpClient.wrapFetch(url)).arrayBuffer));
}

async getChaptersFromAllTocPages(chapters, extractPartialChapterList, urlsOfTocPages, chapterUrlsUI) {
if (0 < chapters.length) {
chapterUrlsUI.showTocProgress(chapters);
}
for(let url of urlsOfTocPages) {
await this.rateLimitDelay();
let json_string = await this.fetchChapter(url);
let partialList = extractPartialChapterList(json_string);
chapterUrlsUI.showTocProgress(partialList);
chapters = chapters.concat(partialList);
}
return chapters;
}

async fetchWebPageContent(webPage) {
let that = this;
ChapterUrlsUI.showDownloadState(webPage.row, ChapterUrlsUI.DOWNLOAD_STATE_SLEEPING);
await this.rateLimitDelay();
ChapterUrlsUI.showDownloadState(webPage.row, ChapterUrlsUI.DOWNLOAD_STATE_DOWNLOADING);
let pageParser = webPage.parser;
return pageParser.fetchChapter(webPage.sourceUrl).then(function (json_string) {
delete webPage.error;
webPage.rawDom = Document.parseHTMLUnsafe(`<html><script>${json_string}</script></html>`);
let content = pageParser.findContent(webPage.rawDom);
if (content == null) {
let errorMsg = chrome.i18n.getMessage("errorContentNotFound", [webPage.sourceUrl]);
throw new Error(errorMsg);
}
return pageParser.fetchImagesUsedInDocument(content, webPage);
}).catch(function (error) {
if (that.userPreferences.skipChaptersThatFailFetch.value) {
ErrorLog.log(error);
webPage.error = error;
} else {
webPage.isIncludeable = false;
throw error;
}
});
}

findCoverImageUrl(dom) {
let cover = dom.querySelector(".user-header__avatar img");
return cover.src ?? null;
}


getUrlsOfTocPages(dom) {
let urls = [];
let paginator = dom.querySelector("div.paginator menu");
if (paginator === null) {
return urls;
}
let pages = [...paginator.querySelectorAll("a:not(.next)")];
// add /api/v1/ right after the domain name
pages[pages.length - 1].href = pages[pages.length - 1].href.replace("https://kemono.su", "https://kemono.su/api/v1");
// add /posts-legacy right before the query string
pages[pages.length - 1].href = pages[pages.length - 1].href.replace("?", "/posts-legacy?");
let url = new URL(pages[pages.length - 1]);
let lastPageOffset = url.searchParams.get("o");
for(let i = 50; i <= lastPageOffset; i += 50) {
for(let i = 0; i <= lastPageOffset; i += 50) {
url.searchParams.set("o", i);
urls.push(url.href);
}
return urls;
}

extractPartialChapterList(dom) {
let links = [...dom.querySelectorAll(".card-list__items a")];
return links.map(l => ({
sourceUrl: l.href,
title: l.querySelector("header").textContent.trim()
}));
}

preprocessRawDom(webPageDom) {
util.removeChildElementsMatchingCss(webPageDom, ".ad-container");
this.copyImagesIntoContent(webPageDom);
extractPartialChapterList(json_string) {
// get href from the dom, not the url of the page
try {
let data = JSON.parse(json_string);
let authorid = data.props.id;
let ids = data.results.map(result => result.id);
let titles = data.results.map(result => result.title);
let urls = ids.map(id => `https://kemono.su/api/v1/patreon/user/${authorid}/post/${id}`);
return urls.map((url, i) => ({
sourceUrl: url,
title: titles[i]
}));
} catch (e) {
return [];
}
}

findContent(dom) {
//the text of the chapter is always in .post__content, but if there is no chapter(e.g. only files), return .post__body instead of throwing an error
return dom.querySelector(".post__content") ?? dom.querySelector(".post__body");
// return dom.querySelector(".post__content") ?? dom.querySelector(".post__body");
let data = JSON.parse(dom.querySelector("script").innerHTML);
// create a dom element from data.post.content;
let content_dom = document.createElement("div");
content_dom.innerHTML = data.post.content;
return content_dom;
}

copyImagesIntoContent(dom) {
Expand All @@ -62,6 +130,6 @@ class KemonopartyParser extends Parser{
}

findChapterTitle(dom) {
return dom.querySelector("h1.post__title > span");
return JSON.parse(dom.querySelector("script").innerHTML).post.title;
}
}