Skip to content

Commit

Permalink
breakout latex plugin for delims (#3040)
Browse files Browse the repository at this point in the history
* Breakout LaTeX plugin for modification

* backport regular markdown link
  • Loading branch information
timothycarambat authored Jan 27, 2025
1 parent 55ffc08 commit c56d3b1
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 10 deletions.
2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"js-levenshtein": "^1.1.6",
"lodash.debounce": "^4.0.8",
"markdown-it": "^13.0.1",
"markdown-it-katex": "^2.0.3",
"katex": "^0.6.0",
"moment": "^2.30.1",
"onnxruntime-web": "^1.18.0",
"pluralize": "^8.0.0",
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/utils/chat/markdown.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { encode as HTMLEncode } from "he";
import markdownIt from "markdown-it";
import markdownItKatex from "markdown-it-katex";
import markdownItKatexPlugin from "./plugins/markdown-katex";
import hljs from "highlight.js";
import "./themes/github-dark.css";
import "./themes/github.css";
Expand Down Expand Up @@ -66,7 +66,7 @@ markdown.renderer.rules.image = function (tokens, idx) {
return `<div class="w-full max-w-[800px]"><img src="${src}" alt="${alt}" class="w-full h-auto" /></div>`;
};

markdown.use(markdownItKatex);
markdown.use(markdownItKatexPlugin);

export default function renderMarkdown(text = "") {
return markdown.render(text);
Expand Down
277 changes: 277 additions & 0 deletions frontend/src/utils/chat/plugins/markdown-katex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
import katex from "katex";

// Test if potential opening or closing delimieter
// Assumes that there is a "$" at state.src[pos]
function isValidDelim(state, pos) {
var prevChar,
nextChar,
max = state.posMax,
can_open = true,
can_close = true;

prevChar = pos > 0 ? state.src.charCodeAt(pos - 1) : -1;
nextChar = pos + 1 <= max ? state.src.charCodeAt(pos + 1) : -1;

// Only apply whitespace rules if we're dealing with $ delimiter
if (state.src[pos] === "$") {
if (
prevChar === 0x20 /* " " */ ||
prevChar === 0x09 /* \t */ ||
(nextChar >= 0x30 /* "0" */ && nextChar <= 0x39) /* "9" */
) {
can_close = false;
}
if (nextChar === 0x20 /* " " */ || nextChar === 0x09 /* \t */) {
can_open = false;
}
}

return {
can_open: can_open,
can_close: can_close,
};
}

function math_inline(state, silent) {
var start, match, token, res, pos, esc_count;

// Only process $ and \( delimiters for inline math
if (
state.src[state.pos] !== "$" &&
(state.src[state.pos] !== "\\" || state.src[state.pos + 1] !== "(")
) {
return false;
}

// Handle \( ... \) case separately
if (state.src[state.pos] === "\\" && state.src[state.pos + 1] === "(") {
start = state.pos + 2;
match = start;
while ((match = state.src.indexOf("\\)", match)) !== -1) {
pos = match - 1;
while (state.src[pos] === "\\") {
pos -= 1;
}
if ((match - pos) % 2 == 1) {
break;
}
match += 1;
}

if (match === -1) {
if (!silent) {
state.pending += "\\(";
}
state.pos = start;
return true;
}

if (!silent) {
token = state.push("math_inline", "math", 0);
token.markup = "\\(";
token.content = state.src.slice(start, match);
}

state.pos = match + 2;
return true;
}

res = isValidDelim(state, state.pos);
if (!res.can_open) {
if (!silent) {
state.pending += "$";
}
state.pos += 1;
return true;
}

// First check for and bypass all properly escaped delimieters
// This loop will assume that the first leading backtick can not
// be the first character in state.src, which is known since
// we have found an opening delimieter already.
start = state.pos + 1;
match = start;
while ((match = state.src.indexOf("$", match)) !== -1) {
// Found potential $, look for escapes, pos will point to
// first non escape when complete
pos = match - 1;
while (state.src[pos] === "\\") {
pos -= 1;
}

// Even number of escapes, potential closing delimiter found
if ((match - pos) % 2 == 1) {
break;
}
match += 1;
}

// No closing delimter found. Consume $ and continue.
if (match === -1) {
if (!silent) {
state.pending += "$";
}
state.pos = start;
return true;
}

// Check if we have empty content, ie: $$. Do not parse.
if (match - start === 0) {
if (!silent) {
state.pending += "$$";
}
state.pos = start + 1;
return true;
}

// Check for valid closing delimiter
res = isValidDelim(state, match);
if (!res.can_close) {
if (!silent) {
state.pending += "$";
}
state.pos = start;
return true;
}

if (!silent) {
token = state.push("math_inline", "math", 0);
token.markup = "$";
token.content = state.src.slice(start, match);
}

state.pos = match + 1;
return true;
}

function math_block(state, start, end, silent) {
var firstLine,
lastLine,
next,
lastPos,
found = false,
token,
pos = state.bMarks[start] + state.tShift[start],
max = state.eMarks[start];

// Check for $$, \[, or standalone [ as opening delimiters
if (pos + 1 > max) {
return false;
}

let openDelim = state.src.slice(pos, pos + 2);
let isDoubleDollar = openDelim === "$$";
let isLatexBracket = openDelim === "\\[";

if (!isDoubleDollar && !isLatexBracket) {
return false;
}

// Determine the closing delimiter and position adjustment
let delimiter, posAdjust;
if (isDoubleDollar) {
delimiter = "$$";
posAdjust = 2;
} else if (isLatexBracket) {
delimiter = "\\]";
posAdjust = 2;
}

pos += posAdjust;
firstLine = state.src.slice(pos, max);

if (silent) {
return true;
}
if (firstLine.trim().slice(-delimiter.length) === delimiter) {
// Single line expression
firstLine = firstLine.trim().slice(0, -delimiter.length);
found = true;
}

for (next = start; !found; ) {
next++;

if (next >= end) {
break;
}

pos = state.bMarks[next] + state.tShift[next];
max = state.eMarks[next];

if (pos < max && state.tShift[next] < state.blkIndent) {
// non-empty line with negative indent should stop the list:
break;
}

if (
state.src.slice(pos, max).trim().slice(-delimiter.length) === delimiter
) {
lastPos = state.src.slice(0, max).lastIndexOf(delimiter);
lastLine = state.src.slice(pos, lastPos);
found = true;
}
}

state.line = next + 1;

token = state.push("math_block", "math", 0);
token.block = true;
token.content =
(firstLine && firstLine.trim() ? firstLine + "\n" : "") +
state.getLines(start + 1, next, state.tShift[start], true) +
(lastLine && lastLine.trim() ? lastLine : "");
token.map = [start, state.line];
token.markup = delimiter;
return true;
}

export default function math_plugin(md, options) {
// Default options
options = options || {};

var katexInline = function (latex) {
options.displayMode = false;
try {
latex = latex
.replace(/^\[(.*)\]$/, "$1")
.replace(/^\\\((.*)\\\)$/, "$1")
.replace(/^\\\[(.*)\\\]$/, "$1");
return katex.renderToString(latex, options);
} catch (error) {
if (options.throwOnError) {
console.log(error);
}
return latex;
}
};

var inlineRenderer = function (tokens, idx) {
return katexInline(tokens[idx].content);
};

var katexBlock = function (latex) {
options.displayMode = true;
try {
// Remove surrounding delimiters if present
latex = latex.replace(/^\[(.*)\]$/, "$1").replace(/^\\\[(.*)\\\]$/, "$1");
return "<p>" + katex.renderToString(latex, options) + "</p>";
} catch (error) {
if (options.throwOnError) {
console.log(error);
}
return latex;
}
};

var blockRenderer = function (tokens, idx) {
return katexBlock(tokens[idx].content) + "\n";
};

md.inline.ruler.after("escape", "math_inline", math_inline);
md.block.ruler.after("blockquote", "math_block", math_block, {
alt: ["paragraph", "reference", "blockquote", "list"],
});
md.renderer.rules.math_inline = inlineRenderer;
md.renderer.rules.math_block = blockRenderer;
}
7 changes: 0 additions & 7 deletions frontend/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2483,13 +2483,6 @@ lru-cache@^5.1.1:
dependencies:
yallist "^3.0.2"

markdown-it-katex@^2.0.3:
version "2.0.3"
resolved "https://registry.yarnpkg.com/markdown-it-katex/-/markdown-it-katex-2.0.3.tgz#d7b86a1aea0b9d6496fab4e7919a18fdef589c39"
integrity sha512-nUkkMtRWeg7OpdflamflE/Ho/pWl64Lk9wNBKOmaj33XkQdumhXAIYhI0WO03GeiycPCsxbmX536V5NEXpC3Ng==
dependencies:
katex "^0.6.0"

markdown-it@^13.0.1:
version "13.0.2"
resolved "https://registry.yarnpkg.com/markdown-it/-/markdown-it-13.0.2.tgz#1bc22e23379a6952e5d56217fbed881e0c94d536"
Expand Down

0 comments on commit c56d3b1

Please sign in to comment.