diff --git a/build.gradle.kts b/build.gradle.kts index 603f95e..f4e8060 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -21,7 +21,7 @@ plugins { } group = "dev.dediamondpro" -version = "1.2.2" +version = "1.2.3" repositories { mavenCentral() diff --git a/src/main/java/dev/dediamondpro/minemark/MineMarkCore.java b/src/main/java/dev/dediamondpro/minemark/MineMarkCore.java index ed1c600..787ad1b 100644 --- a/src/main/java/dev/dediamondpro/minemark/MineMarkCore.java +++ b/src/main/java/dev/dediamondpro/minemark/MineMarkCore.java @@ -22,7 +22,7 @@ import dev.dediamondpro.minemark.elements.creators.TextElementCreator; import dev.dediamondpro.minemark.elements.formatting.FormattingElement; import dev.dediamondpro.minemark.style.Style; -import dev.dediamondpro.minemark.utils.PrefixedReader; +import dev.dediamondpro.minemark.utils.HtmlWhiteSpaceUtil; import org.commonmark.Extension; import org.commonmark.node.Node; import org.commonmark.parser.Parser; @@ -38,7 +38,6 @@ import java.nio.charset.StandardCharsets; import java.util.List; import java.util.concurrent.locks.ReentrantLock; -import java.util.regex.Pattern; /** * Class responsible for integrating parsing, layout and rendering @@ -47,7 +46,6 @@ * @param The class passed to the rendering implementation at render time */ public class MineMarkCore { - private static final Pattern ACTIVATION_PATTERN = Pattern.compile(".*?", Pattern.DOTALL); private final Parser markdownParser; private final HtmlRenderer htmlRenderer; private final MineMarkHtmlParser htmlParser; @@ -69,8 +67,8 @@ protected MineMarkCore(TextElementCreator textElement, List(textElement, elements, formattingElements); - xmlParser = new org.ccil.cowan.tagsoup.Parser(); - xmlParser.setContentHandler(htmlParser); + this.xmlParser = new org.ccil.cowan.tagsoup.Parser(); + this.xmlParser.setContentHandler(htmlParser); } /** @@ -84,9 +82,6 @@ protected MineMarkCore(TextElementCreator textElement, List parse(@NotNull S style, @NotNull String markdown, @NotNull Charset charSet) throws SAXException, IOException { - // Trick the markdown renderer to activate early, - // this makes it so some problematic whitespaces are handled for us - markdown = "\n\n**MineMark-activation**\n\n" + markdown; Node document = markdownParser.parse(markdown); return parseDocument(style, document, charSet); } @@ -115,9 +110,6 @@ public MineMarkElement parse(@NotNull S style, @NotNull String markdown) t * @throws IOException An IOException during parsing */ public MineMarkElement parse(@NotNull S style, @NotNull Reader markdown, @NotNull Charset charSet) throws SAXException, IOException { - // Trick the markdown renderer to activate early, - // this makes it so some problematic whitespaces are handled for us - markdown = new PrefixedReader("\n\n**MineMark-activation**\n\n", markdown); Node document = markdownParser.parseReader(markdown); return parseDocument(style, document, charSet); } @@ -138,12 +130,12 @@ public MineMarkElement parse(@NotNull S style, @NotNull Reader markdown) t private MineMarkElement parseDocument(@NotNull S style, Node document, @NotNull Charset charSet) throws SAXException, IOException { // Render the document to HTML String html = htmlRenderer.render(document); - // Remove the markdown activation part - html = ACTIVATION_PATTERN.matcher(html).replaceFirst(""); // Get the wrapper to wrap the content with, make sure the html does not include it String wrapper = getMineMarkWrapper(html); // Prepare the HTML for parsing html = "<" + wrapper + ">" + html + ""; + // Remove unnecessary whitespaces from the html + html = HtmlWhiteSpaceUtil.INSTANCE.removeUnnecessaryWhiteSpace(html); // Acquire the lock to make sure this thread is the only one using the parser parsingLock.lock(); try (InputStream stream = new ByteArrayInputStream(html.getBytes(charSet))) { diff --git a/src/main/java/dev/dediamondpro/minemark/MineMarkHtmlParser.java b/src/main/java/dev/dediamondpro/minemark/MineMarkHtmlParser.java index 1d616d6..937b101 100644 --- a/src/main/java/dev/dediamondpro/minemark/MineMarkHtmlParser.java +++ b/src/main/java/dev/dediamondpro/minemark/MineMarkHtmlParser.java @@ -107,31 +107,26 @@ public void endElement(String uri, String localName, String qName) { @Override public void characters(char[] ch, int start, int length) { - // All newlines are ignored unless this element is preformatted - int newLength = length; - if (isPreFormatted) { - if (ch[start] == '\n') { - newLength--; - } - if (ch[start + length - 1] == '\n') { - newLength--; - } - } else { + // Remove leading and trailing newlines + if (length > 0 && ch[start] == '\n') { + start++; + length--; + } + if (length > 0 && ch[start + length - 1] == '\n') { + length--; + } + // Replace all other newlines with a space if the current text isn't preformatted + if (!isPreFormatted) { for (int i = start; i < start + length; i++) { if (ch[i] == '\n') { - newLength--; + ch[i] = ' '; } } } - char[] modifiedCh = new char[newLength]; - int index = 0; - for (int i = start; i < start + length; i++) { - if (ch[i] != '\n' || (isPreFormatted && i != start && i != start + length - 1)) { - modifiedCh[index++] = ch[i]; - } + if (length > 0) { + textBuilder.append(ch, start, length); } - textBuilder.append(modifiedCh); } private void addText() { diff --git a/src/main/java/dev/dediamondpro/minemark/elements/impl/TextElement.java b/src/main/java/dev/dediamondpro/minemark/elements/impl/TextElement.java index 65c9259..b72afd6 100644 --- a/src/main/java/dev/dediamondpro/minemark/elements/impl/TextElement.java +++ b/src/main/java/dev/dediamondpro/minemark/elements/impl/TextElement.java @@ -31,8 +31,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; public abstract class TextElement extends Element implements Inline { + protected final Pattern LEADING_WHITESPACE = Pattern.compile("^ +"); protected final HashMap lines = new HashMap<>(); protected final String text; protected float baseLineHeight; @@ -48,18 +50,10 @@ public TextElement(@NotNull String text, @NotNull S style, @NotNull LayoutStyle public void generateLayout(LayoutData layoutData, R renderData) { lines.clear(); ArrayList allLines = new ArrayList<>(); - String actualText = text; - if (!getLayoutStyle().isPreFormatted()) { - actualText = actualText.replaceAll(" +", " "); - } - String[] predefinedLines = actualText.split("\n", -1); + String[] predefinedLines = text.split("\n", -1); for (int i = 0; i < predefinedLines.length; i++) { String line = predefinedLines[i].replace("\n", ""); - if (layoutStyle.isPreFormatted()) { - allLines.add(line); - } else { - allLines.addAll(wrapText(line, i == 0 ? layoutData.getX() : 0f, layoutData.getMaxWidth(), renderData)); - } + allLines.addAll(wrapText(line, i == 0 ? layoutData.getX() : 0f, layoutData.getMaxWidth(), renderData)); } float codeBlockPadding = layoutStyle.isPartOfCodeBlock() ? style.getCodeBlockStyle().getInlinePaddingTopBottom() : 0f; float padding = Math.max(style.getTextStyle().getPadding(), codeBlockPadding); @@ -115,8 +109,14 @@ protected List wrapText(String text, float startX, float maxWidth, R ren String[] words = text.split("(?= )"); float actualMaxWidth = maxWidth - startX; + boolean firstOfLine = actualMaxWidth == maxWidth; for (String word : words) { word = word.replace('\u00A0', ' '); + // If this is the first word on the line, replace all leading whitespace chars (unless this is preformatted) + if (firstOfLine && !layoutStyle.isPreFormatted()) { + word = LEADING_WHITESPACE.matcher(word).replaceAll(""); + firstOfLine = false; + } if (getAdjustedTextWidth(currentLine + word, fontSize, renderData) <= actualMaxWidth) { currentLine.append(word); } else { @@ -124,7 +124,8 @@ protected List wrapText(String text, float startX, float maxWidth, R ren if (!finishedText.isEmpty() || actualMaxWidth != maxWidth) { lines.add(finishedText); } - String cleanedWord = word.replaceAll("^ ", ""); + // This is the first word after wrapping, replace all leading whitespace chars + String cleanedWord = LEADING_WHITESPACE.matcher(word).replaceAll(""); currentLine = new StringBuilder(); actualMaxWidth = maxWidth; if (getAdjustedTextWidth(cleanedWord, fontSize, renderData) > actualMaxWidth) { diff --git a/src/main/java/dev/dediamondpro/minemark/utils/HtmlWhiteSpaceUtil.java b/src/main/java/dev/dediamondpro/minemark/utils/HtmlWhiteSpaceUtil.java new file mode 100644 index 0000000..12ae441 --- /dev/null +++ b/src/main/java/dev/dediamondpro/minemark/utils/HtmlWhiteSpaceUtil.java @@ -0,0 +1,101 @@ +/* + * This file is part of MineMark + * Copyright (C) 2024 DeDiamondPro + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License Version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +package dev.dediamondpro.minemark.utils; + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class HtmlWhiteSpaceUtil { + public static final HtmlWhiteSpaceUtil INSTANCE = new HtmlWhiteSpaceUtil(); + private final Pattern PREFORMATTED_ELEMENT = Pattern.compile("
(?:(?!).)*?
", Pattern.DOTALL); + private final Pattern BEFORE_AFTER_LINEBREAK = Pattern.compile("\\s+$|^\\s+", Pattern.MULTILINE); + private final Pattern HORIZONTAL_WHITESPACE = Pattern.compile("\\h"); + private final Pattern SUBSEQUENT_SPACE = Pattern.compile(" {2,}"); + private final Pattern SPACES_ACROSS_TAGS = Pattern.compile(" +((]+>)+) +"); + private final Pattern SPACES_START = Pattern.compile("^((]+>)+) +"); + private final Pattern SPACES_END = Pattern.compile(" +((]+>)+)$"); + + private HtmlWhiteSpaceUtil() { + } + + public String removeUnnecessaryWhiteSpace(String html) { + // Based on the steps outlined here: https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace + // Start by removing leading and trailing whitespaces + html = html.trim(); + + // Find preformatted elements and replace them with a placeholder, we don't want to change the formatting + LinkedHashMap preformattedElements = null; + Matcher preformattedElementsMatcher = PREFORMATTED_ELEMENT.matcher(html); + int num = 0; + boolean found = false; + while (true) { + while (preformattedElementsMatcher.find()) { + // Initialize here to avoid a wasted initialization + if (preformattedElements == null) { + preformattedElements = new LinkedHashMap<>(); + } + // Find a key to replace the element with, this can absolutely not already be in the string + String key = "%%%preformattedElement-" + num + "%%%%"; + while (html.contains(key)) { + num++; + key = "%%%preformattedElement-" + num + "%%%%"; + } + num++; + // Replace the element in the html with the key + String element = preformattedElementsMatcher.group(); + // Store the element so it can be re-added later + preformattedElements.put(key, element); + // Remove the element from the html + html = html.replace(element, key); + + found = true; + } + // If no more occurrences are found with a new matcher, we are done, break out of the loop + if (!found) { + break; + } + // Recreate the matcher to handle nested elements + preformattedElementsMatcher = PREFORMATTED_ELEMENT.matcher(html); + found = false; + } + + // Ignore whitespace before and after line breaks + html = BEFORE_AFTER_LINEBREAK.matcher(html).replaceAll(""); + // Replace all whitespace characters with normal spaces + html = HORIZONTAL_WHITESPACE.matcher(html).replaceAll(" "); + // Remove subsequent spaces + html = SUBSEQUENT_SPACE.matcher(html).replaceAll(" "); + // Remove spaces across tags + html = SPACES_ACROSS_TAGS.matcher(html).replaceAll(" $1"); + // Remove spaces at the start and end of the string across the first and last tag + html = SPACES_START.matcher(html).replaceAll("$1"); + html = SPACES_END.matcher(html).replaceAll("$1"); + + // Add preformatted elements back in, in reverse order to handle nested elements + if (preformattedElements != null) { + ArrayList keys = new ArrayList<>(preformattedElements.keySet()); + Collections.reverse(keys); + for (String key : keys) { + html = html.replace(key, preformattedElements.get(key)); + } + } + + return html; + } +} \ No newline at end of file diff --git a/src/main/java/dev/dediamondpro/minemark/utils/PrefixedReader.java b/src/main/java/dev/dediamondpro/minemark/utils/PrefixedReader.java deleted file mode 100644 index 27341bd..0000000 --- a/src/main/java/dev/dediamondpro/minemark/utils/PrefixedReader.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * This file is part of MineMark - * Copyright (C) 2024 DeDiamondPro - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License Version 3 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - */ - -package dev.dediamondpro.minemark.utils; - -import org.jetbrains.annotations.NotNull; - -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; - - -/** - * Utility class to add a prefix to a reader, used by MineMarkCore to trick the Markdown parser to activate early - */ -public class PrefixedReader extends Reader { - private final StringReader prefixReader; - private final Reader mainReader; - private boolean prefixDone; - - public PrefixedReader(String prefix, Reader mainReader) { - this.prefixReader = new StringReader(prefix); - this.mainReader = mainReader; - this.prefixDone = false; - } - - @Override - public int read(char @NotNull [] cbuf, int off, int len) throws IOException { - // Read from the prefixReader first - if (!prefixDone) { - int numRead = prefixReader.read(cbuf, off, len); - if (numRead == -1) { - prefixDone = true; // Prefix is done, switch to mainReader - } else { - return numRead; - } - } - - // Now read from the mainReader - return mainReader.read(cbuf, off, len); - } - - @Override - public void close() throws IOException { - prefixReader.close(); - mainReader.close(); - } -}