Merge pull request #1500 from Darthagnon/MagicWizards

Add parser for mtgstory.com
dteviot · Sep 25, 2024 · 43b4af7 · 43b4af7
2 parents fafcee8 + d79da20
commit 43b4af7
Show file tree

Hide file tree

Showing 5 changed files with 90 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ eslint/packed.js
 eslint/index.csv
 node_modules
 plugin/jszip/dist/jszip.min.js
+package-lock.json
diff --git a/package.json b/package.json
@@ -69,7 +69,8 @@
     { "name": "ImmortalDreamer"},
     { "name": "ktrin"},
     { "name": "Tyderion"},
-    { "name": "nozwock" }
+    { "name": "nozwock"},
+    { "name": "Darthagnon"}
   ],
   "license": "GPL-3.0-only",
   "bugs": {

diff --git a/plugin/js/parsers/MagicWizardsParser.js b/plugin/js/parsers/MagicWizardsParser.js
@@ -0,0 +1,85 @@
+/*
+  MagicWizardsParser.js v0.72
+  
+  Parser for Magic the Gathering fiction, found on:
+  - mtgstory.com (redirect)
+  - https://magic.wizards.com/en/story (2023-2024)
+  - https://magic.wizards.com/en/articles/columns/magic-story (2014-2018)
+  - Archive.org versions of the above
+  - TODO: mtglore.com (redirects & mirrors)
+  - TODO: https://magic.wizards.com/en/story (Q4 2018-2022)
+  - TODO: Planeswalkers & Planes Databank
+  - TODO: Featured story slider Q1 2018
+  - UNTESTED: http://www.wizards.com/Magic/Magazine/Article.aspx (2014 and earlier)
+  - WONTFIX: hanweirchronicle.com (Tumblr blog, mostly image posts)
+*/
+"use strict";
+
+// Register the parser for magic.wizards.com (archive.org is implicit)
+parserFactory.register("magic.wizards.com", () => new MagicWizardsParser());
+
+class MagicWizardsParser extends Parser {
+    constructor() {
+        super();
+    }
+
+    // Extract the list of chapter URLs
+    async getChapterUrls(dom) {
+        let chapterLinks = [];
+        chapterLinks = [...dom.querySelectorAll("article a, .article-content a, window.location.hostname, #content article a, #content .article-content a, .articles-listing .article-item a, .articles-bloc .article .details a")];
+        // Filter out author links using their URL pattern
+        chapterLinks = chapterLinks.filter(link => !this.isAuthorLink(link));
+        return chapterLinks.map(this.linkToChapter);
+    }
+
+    // Helper function to detect if a link is an author link
+    isAuthorLink(link) {
+        const href = link.href;
+        const authorPattern = /\/archive\?author=/;
+
+        // Check if the link matches the author URL pattern or CSS selector
+        return authorPattern.test(href);
+    }
+
+    // Format chapter links into a standardized structure
+    linkToChapter(link) {
+        const titleSelectors = [
+            "h3",                     // First option: <h3> tag
+            ".article-item .title",   // Second option: <p class="title">
+            ".details .title"         // Third option: <p class="title" inside .details>
+        ];
+
+        let titleElement = null;
+
+        // Iterate through the selectors and find the first matching element
+        for (const selector of titleSelectors) {
+            titleElement = link.closest("article")?.querySelector(selector) || 
+                        link.closest(".article-item")?.querySelector(selector) || 
+                        link.closest(".details")?.querySelector(selector);
+
+            if (titleElement) {
+                break; // Exit the loop if a title element is found
+            }
+        }
+
+        // Fallback to the link text itself if no titleElement found (this handles simpler cases)
+        let title = titleElement ? titleElement.textContent.trim() : link.textContent.trim();
+
+        return {
+            sourceUrl: link.href,
+            title: title
+        };
+    }
+
+    // Extract the content of the chapter
+    findContent(dom) {
+        return dom.querySelector("#content article, .article_detail #main-content article, #article-body article, #primary-area section, section article, section, .article_detail #main-content");
+    }
+
+    // Grab cover image
+    findCoverImageUrl(dom) {
+        return util.getFirstImgSrc(dom, ".swiper-slide img, article img");
+    }
+
+
+}
diff --git a/plugin/popup.html b/plugin/popup.html
@@ -637,6 +637,7 @@ <h3>Instructions</h3>
     <script src="js/parsers/LnmtlParser.js"></script>
     <script src="js/parsers/MachineTranslationParser.js"></script>
     <script src="js/parsers/MadnovelParser.js"></script>
+    <script src="js/parsers/MagicWizardsParser.js"></script>
     <script src="js/parsers/MangadexParser.js"></script>
     <script src="js/parsers/MandarinducktalesParser.js"></script>
     <script src="js/parsers/MangakakalotParser.js"></script>

diff --git a/readme.md b/readme.md
@@ -58,6 +58,7 @@ Credits
 * ktrin
 * nozwock
 * Tyderion
+* Darthagnon
 
 ## How to use with Baka-Tsuki:
 * Browse to a Baka-Tsuki web page that has the full text of a story.