From 754f31f1c8ade916c263f6d3ac9ab605ea1a21eb Mon Sep 17 00:00:00 2001 From: bunglegrind Date: Sat, 2 Oct 2021 12:42:17 +0200 Subject: [PATCH 1/6] fixes #52 --- web-extension/extractHtml.js | 59 +++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/web-extension/extractHtml.js b/web-extension/extractHtml.js index ef56046..79f797d 100644 --- a/web-extension/extractHtml.js +++ b/web-extension/extractHtml.js @@ -18,7 +18,7 @@ var allowedTags = [ 'math', 'maction', 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mlabeledtr', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', 'mroot', 'mrow', 'ms', 'mspace', 'msqrt', 'mstyle', 'msub', 'msup', 'msubsup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'msgroup', 'mlongdiv', 'mscarries', 'mscarry', 'mstack', 'semantics' - // TODO ? + // TODO ? // ,'form', 'button' // TODO svg support ? @@ -38,12 +38,12 @@ var tmpIdsToNewCssSTRING = {}; var supportedCss = [ 'background-color', 'border', - 'color', + 'color', 'font', 'line-height', 'list-style', 'padding', - 'text-align', + 'text-align', ]; ////// @@ -73,7 +73,7 @@ function getImageSrc(srcTxt) { filename: newImgFileName, // TODO name data: getBase64ImgData(srcTxt) }); - } else { + } else { allImages.push({ originalUrl: getImgDownloadUrl(srcTxt), filename: newImgFileName, // TODO name @@ -118,28 +118,31 @@ function extractSvgToImg($htmlObject) { // replaces all iframes by divs with the same innerHTML content function extractIFrames() { - let allIframes = document.getElementsByTagName('iframe') - let changeIFrames = [] - let newDivs = [] - for (let iFrame of allIframes) { - if (!iFrame.contentDocument || !iFrame.contentDocument.body) { - continue - } - let bodyContent = iFrame.contentDocument.body.innerHTML - let bbox = iFrame.getBoundingClientRect() - let newDiv = document.createElement('div') - newDiv.style.width = bbox.width - newDiv.style.height = bbox.height - newDiv.innerHTML = bodyContent - changeIFrames.push(iFrame) - newDivs.push(newDiv) - } - for (let i = 0; i < newDivs.length; i++) { - let newDiv = newDivs[i] - let iFrame = changeIFrames[i] - let iframeParent = iFrame.parentNode - iframeParent.replaceChild(newDiv, iFrame) + function editStyle(style, id) { + return style.split("\n").map(function (line) { + if(!/\{/.test(line)) { + return line; + } + return "#" + id + " " + line.replace("body", ""); + }).join("\n"); } + let iframes = Array.from(document.querySelectorAll("iframe")); + const divs = iframes.map(function (iframe, index) { + const div = document.createElement("div"); + div.id = "save-as-ebook-iframe-" + index; + if (!iframe.contentDocument || !iframe.contentDocument.body) { + return div; + } + let bbox = iframe.getBoundingClientRect(); + div.style.width = bbox.width; + div.style.height = bbox.height; + div.innerHTML = iframe.contentDocument.body.innerHTML ?? ""; + Array.from(div.querySelectorAll("style")).forEach(function (style) { + style.innerHTML = editStyle(style.innerHTML, div.id); + }); + return div; + }); + iframes.forEach((iframe, i) => iframe.parentNode.replaceChild(divs[i], iframe)); } function preProcess($htmlObject) { @@ -381,7 +384,7 @@ function extractCss(includeStyle, appliedStyles) { function deferredAddZip(url, filename) { let deferred = $.Deferred(); - JSZipUtils.getBinaryContent(url, function(err, data) { + JSZipUtils.getBinaryContent(url, function(err, data) { if (err) { // deferred.reject(err); TODO console.log('Error:', err); @@ -408,13 +411,13 @@ function deferredAddZip(url, filename) { } tmpGlobalContent = tmpGlobalContent.replace(oldFilename, filename) } - + extractedImages.push({ filename: filename, // TODO - must be JSON serializable data: base64ArrayBuffer(data) }); - + deferred.resolve(); } }); From 4037aa852861ac0a35fee0937ef1009e7757e863 Mon Sep 17 00:00:00 2001 From: bunglegrind Date: Sat, 2 Oct 2021 12:54:17 +0200 Subject: [PATCH 2/6] Update extractHtml.js changed let to const --- web-extension/extractHtml.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web-extension/extractHtml.js b/web-extension/extractHtml.js index 79f797d..8ee9566 100644 --- a/web-extension/extractHtml.js +++ b/web-extension/extractHtml.js @@ -126,17 +126,17 @@ function extractIFrames() { return "#" + id + " " + line.replace("body", ""); }).join("\n"); } - let iframes = Array.from(document.querySelectorAll("iframe")); + const iframes = Array.from(document.querySelectorAll("iframe")); const divs = iframes.map(function (iframe, index) { const div = document.createElement("div"); div.id = "save-as-ebook-iframe-" + index; if (!iframe.contentDocument || !iframe.contentDocument.body) { return div; } - let bbox = iframe.getBoundingClientRect(); + const bbox = iframe.getBoundingClientRect(); div.style.width = bbox.width; div.style.height = bbox.height; - div.innerHTML = iframe.contentDocument.body.innerHTML ?? ""; + div.innerHTML = iframe.contentDocument.body.innerHTML; Array.from(div.querySelectorAll("style")).forEach(function (style) { style.innerHTML = editStyle(style.innerHTML, div.id); }); From c3194d69bd905995b45005d06c53e07999018f6c Mon Sep 17 00:00:00 2001 From: bunglegrind Date: Sun, 26 Dec 2021 14:53:15 +0100 Subject: [PATCH 3/6] improved selector parser in in iframe style tags --- web-extension/extractHtml.js | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/web-extension/extractHtml.js b/web-extension/extractHtml.js index 8ee9566..3190c5a 100644 --- a/web-extension/extractHtml.js +++ b/web-extension/extractHtml.js @@ -118,14 +118,24 @@ function extractSvgToImg($htmlObject) { // replaces all iframes by divs with the same innerHTML content function extractIFrames() { - function editStyle(style, id) { - return style.split("\n").map(function (line) { - if(!/\{/.test(line)) { - return line; - } - return "#" + id + " " + line.replace("body", ""); - }).join("\n"); - } + function addIdInStyle(style, id) { + return style.split("{").map(function (segment) { + const selectors = segment.split("}"); + // if the CSS is well formed, selectors may be 1 element (for the first + // rule) or 2 elements array. Last element is the one which contains the + // actual selectors. + selectors[selectors.length - 1] = selectors[selectors.length - 1] + .split(",") + .map(function (selector) { + return ( + selector.trim().length > 0//check if it's just an empty line + ? "#" + id + " " + selector.replace("body", "") + : selector + ); + }); + return selectors.join("}"); + }).join("{"); + } const iframes = Array.from(document.querySelectorAll("iframe")); const divs = iframes.map(function (iframe, index) { const div = document.createElement("div"); @@ -138,7 +148,7 @@ function extractIFrames() { div.style.height = bbox.height; div.innerHTML = iframe.contentDocument.body.innerHTML; Array.from(div.querySelectorAll("style")).forEach(function (style) { - style.innerHTML = editStyle(style.innerHTML, div.id); + style.innerHTML = addIdInStyle(style.innerHTML, div.id); }); return div; }); From 7da18f3a4ab1ef30999bd2a335be4afddd926a3a Mon Sep 17 00:00:00 2001 From: bunglegrind Date: Sun, 9 Jan 2022 18:25:21 +0100 Subject: [PATCH 4/6] iframe fix --- web-extension/extractHtml.js | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/web-extension/extractHtml.js b/web-extension/extractHtml.js index 3190c5a..bde44cb 100644 --- a/web-extension/extractHtml.js +++ b/web-extension/extractHtml.js @@ -117,13 +117,17 @@ function extractSvgToImg($htmlObject) { } // replaces all iframes by divs with the same innerHTML content -function extractIFrames() { +function extractIFrames(iframes, prefix = "") { + if (!iframes.length) { + return; + } + function addIdInStyle(style, id) { return style.split("{").map(function (segment) { const selectors = segment.split("}"); - // if the CSS is well formed, selectors may be 1 element (for the first - // rule) or 2 elements array. Last element is the one which contains the - // actual selectors. + // if the CSS is well formed, selectors may be 1 element (for the first + // rule) or 2 elements array. Last element is the one which contains the + // actual selectors. selectors[selectors.length - 1] = selectors[selectors.length - 1] .split(",") .map(function (selector) { @@ -136,23 +140,38 @@ function extractIFrames() { return selectors.join("}"); }).join("{"); } - const iframes = Array.from(document.querySelectorAll("iframe")); + + function createResetStyle(id) { + const style = document.createElement("style"); + style.innerHTML = "#" + id + ",#" + id + " * {all:unset;}"; + return style; + } + const divs = iframes.map(function (iframe, index) { const div = document.createElement("div"); - div.id = "save-as-ebook-iframe-" + index; + div.id = prefix + "save-as-ebook-iframe-" + index; if (!iframe.contentDocument || !iframe.contentDocument.body) { + console.log("CORS not enabled or empty iframe. Discarding " + div.id); return div; } const bbox = iframe.getBoundingClientRect(); div.style.width = bbox.width; div.style.height = bbox.height; + console.log(div.id); div.innerHTML = iframe.contentDocument.body.innerHTML; Array.from(div.querySelectorAll("style")).forEach(function (style) { + console.log("style edited!"); style.innerHTML = addIdInStyle(style.innerHTML, div.id); }); + + div.insertBefore(createResetStyle(div.id), div.firstChild); return div; }); iframes.forEach((iframe, i) => iframe.parentNode.replaceChild(divs[i], iframe)); + return divs.forEach((div, i) => extractIFrames( + Array.from(div.querySelectorAll("iframe")), + i + "-" + )); } function preProcess($htmlObject) { @@ -441,7 +460,7 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { let tmpContent = ''; let styleFile = null; - extractIFrames() + extractIFrames(Array.from(document.querySelectorAll("iframe"))); if (request.type === 'extract-page') { styleFile = extractCss(request.includeStyle, request.appliedStyles) From 93b1a7ff8091e53b509bcec7198d0c276325c87c Mon Sep 17 00:00:00 2001 From: bunglegrind Date: Sun, 9 Jan 2022 20:43:26 +0100 Subject: [PATCH 5/6] support for nested iframes --- web-extension/extractHtml.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/web-extension/extractHtml.js b/web-extension/extractHtml.js index bde44cb..d2b99f4 100644 --- a/web-extension/extractHtml.js +++ b/web-extension/extractHtml.js @@ -141,12 +141,6 @@ function extractIFrames(iframes, prefix = "") { }).join("{"); } - function createResetStyle(id) { - const style = document.createElement("style"); - style.innerHTML = "#" + id + ",#" + id + " * {all:unset;}"; - return style; - } - const divs = iframes.map(function (iframe, index) { const div = document.createElement("div"); div.id = prefix + "save-as-ebook-iframe-" + index; @@ -160,14 +154,12 @@ function extractIFrames(iframes, prefix = "") { console.log(div.id); div.innerHTML = iframe.contentDocument.body.innerHTML; Array.from(div.querySelectorAll("style")).forEach(function (style) { - console.log("style edited!"); style.innerHTML = addIdInStyle(style.innerHTML, div.id); }); div.insertBefore(createResetStyle(div.id), div.firstChild); return div; }); - iframes.forEach((iframe, i) => iframe.parentNode.replaceChild(divs[i], iframe)); return divs.forEach((div, i) => extractIFrames( Array.from(div.querySelectorAll("iframe")), i + "-" From f58a49f9c912abdfff0af37deaa7cbce1c327377 Mon Sep 17 00:00:00 2001 From: bunglegrind Date: Mon, 10 Jan 2022 23:19:19 +0100 Subject: [PATCH 6/6] fix error --- web-extension/extractHtml.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-extension/extractHtml.js b/web-extension/extractHtml.js index d2b99f4..27c4497 100644 --- a/web-extension/extractHtml.js +++ b/web-extension/extractHtml.js @@ -157,9 +157,9 @@ function extractIFrames(iframes, prefix = "") { style.innerHTML = addIdInStyle(style.innerHTML, div.id); }); - div.insertBefore(createResetStyle(div.id), div.firstChild); return div; }); + iframes.forEach((iframe, i) => iframe.parentNode.replaceChild(divs[i], iframe)); return divs.forEach((div, i) => extractIFrames( Array.from(div.querySelectorAll("iframe")), i + "-"