Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 35 additions & 11 deletions packages/lexical-markdown/src/MarkdownTransformers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,17 +198,20 @@ export type TextMatchTransformer = Readonly<{
type: 'text-match';
}>;

const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/;
const EMPTY_OR_WHITESPACE_ONLY = /^[\t ]*$/;
const ORDERED_LIST_REGEX = /^(\s*)(\d+)\.\s/;
const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/;
const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i;
const HEADING_REGEX = /^(#{1,6})\s/;
const QUOTE_REGEX = /^>\s/;
const CODE_START_REGEX = /^[ \t]*```([\w-]+)?/;
const CODE_END_REGEX = /[ \t]*```$/;
const CODE_START_REGEX = /^[ \t]*(?:```|\\`\\`\\`)([\w-]+)?/;
const CODE_END_REGEX = /^[ \t]*(?:```|\\`\\`\\`)$/;
const CODE_SINGLE_LINE_REGEX =
/^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/;
const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/;
const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/;
const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/;
const TAG_START_REGEX = /^[ \t]*<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i;
const TAG_END_REGEX = /^[ \t]*<\/[a-z_][\w-]*\s*>/i;

const createBlockNode = (
createNode: (match: Array<string>) => ElementNode,
Expand Down Expand Up @@ -602,6 +605,7 @@ export function normalizeMarkdown(
const lines = input.split('\n');
let inCodeBlock = false;
const sanitizedLines: string[] = [];
let nestedDeepCodeBlock = 0;

for (let i = 0; i < lines.length; i++) {
const line = lines[i];
Expand All @@ -613,9 +617,24 @@ export function normalizeMarkdown(
continue;
}

// Detect the start or end of a code block
if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
inCodeBlock = !inCodeBlock;
if (CODE_END_REGEX.test(line)) {
if (nestedDeepCodeBlock === 0) {
inCodeBlock = true;
}
if (nestedDeepCodeBlock === 1) {
inCodeBlock = false;
}
if (nestedDeepCodeBlock > 0) {
nestedDeepCodeBlock--;
}
sanitizedLines.push(line);
continue;
}

// Toggle inCodeBlock state when encountering start or end of a code block
if (CODE_START_REGEX.test(line)) {
inCodeBlock = true;
nestedDeepCodeBlock++;
sanitizedLines.push(line);
continue;
}
Expand All @@ -629,8 +648,8 @@ export function normalizeMarkdown(
// In markdown the concept of "empty paragraphs" does not exist.
// Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
if (
line === '' ||
lastLine === '' ||
EMPTY_OR_WHITESPACE_ONLY.test(line) ||
EMPTY_OR_WHITESPACE_ONLY.test(lastLine!) ||
!lastLine ||
HEADING_REGEX.test(lastLine) ||
HEADING_REGEX.test(line) ||
Expand All @@ -640,11 +659,16 @@ export function normalizeMarkdown(
CHECK_LIST_REGEX.test(line) ||
TABLE_ROW_REG_EXP.test(line) ||
TABLE_ROW_DIVIDER_REG_EXP.test(line) ||
!shouldMergeAdjacentLines
!shouldMergeAdjacentLines ||
TAG_START_REGEX.test(line) ||
TAG_END_REGEX.test(line) ||
TAG_START_REGEX.test(lastLine) ||
TAG_END_REGEX.test(lastLine) ||
CODE_END_REGEX.test(lastLine)
) {
sanitizedLines.push(line);
} else {
sanitizedLines[sanitizedLines.length - 1] = lastLine + line;
sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trim();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@
},
{
// Multiline paragraphs: https://spec.commonmark.org/dingus/?text=Hello%0Aworld%0A!
html: '<p><span style="white-space: pre-wrap;">Helloworld!</span></p>',
html: '<p><span style="white-space: pre-wrap;">Hello world !</span></p>',
md: ['Hello', 'world', '!'].join('\n'),
shouldMergeAdjacentLines: true,
skipExport: true,
Expand All @@ -303,7 +303,7 @@
// },
{
// Multiline list items: https://spec.commonmark.org/dingus/?text=-%20Hello%0A-%20world%0A!%0A!
html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world!!</span></li></ul>',
html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world ! !</span></li></ul>',
md: '- Hello\n- world\n!\n!',
shouldMergeAdjacentLines: true,
skipExport: true,
Expand Down Expand Up @@ -396,7 +396,7 @@
mdAfterExport: '*Hello&#32;**world**!*',
},
{
html: '<p><span style="white-space: pre-wrap;">helloworld</span></p>',
html: '<p><span style="white-space: pre-wrap;">hello world</span></p>',
md: 'hello\nworld',
shouldMergeAdjacentLines: true,
skipExport: true,
Expand Down Expand Up @@ -504,7 +504,7 @@
},
{
// https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A!
html: '<blockquote><span style="white-space: pre-wrap;">Helloworld!</span></blockquote>',
html: '<blockquote><span style="white-space: pre-wrap;">Hello world !</span></blockquote>',
md: '> Hello\nworld\n!',
shouldMergeAdjacentLines: true,
skipExport: true,
Expand Down Expand Up @@ -723,7 +723,7 @@

expect(
editor.getEditorState().read(() => $generateHtmlFromNodes(editor)),
).toBe(html);

Check failure on line 726 in packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts

View workflow job for this annotation

GitHub Actions / core-tests / unit (20.19.4)

packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts > Markdown > can import "Some HTML in mdx:\n\n<MyComponent>Line 1\nSome Text</MyComponent>"

AssertionError: expected '<p><span style="white-space: pre-wrap…' to be '<p><span style="white-space: pre-wrap…' // Object.is equality - Expected + Received - <p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1Some Text</span></pre> + <p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1 + Some Text</span></pre> ❯ packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts:726:9

Check failure on line 726 in packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts

View workflow job for this annotation

GitHub Actions / core-tests / unit (20.19.4)

packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts > Markdown > can import "```Single line Code```"

AssertionError: expected '<pre spellcheck="false" data-language…' to be '<pre spellcheck="false"><span style="…' // Object.is equality Expected: "<pre spellcheck="false"><span style="white-space: pre-wrap;">Single line Code</span></pre>" Received: "<pre spellcheck="false" data-language="Single"><span style="white-space: pre-wrap;">line Code```</span></pre>" ❯ packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts:726:9
});
}

Expand Down Expand Up @@ -953,7 +953,7 @@
E3
`;
expect(normalizeMarkdown(markdown, true)).toBe(`
A1A2
A1 A2

A3

Expand All @@ -964,7 +964,7 @@
B3
\`\`\`

C1C2
C1 C2

C3

Expand All @@ -977,7 +977,7 @@

\`\`\`single line code\`\`\`

E1E2
E1 E2

E3
`);
Expand Down Expand Up @@ -1070,3 +1070,64 @@
expect(normalizeMarkdown(markdown, false)).toBe(markdown);
});
});

describe.skip('normalizeMarkdown – new behaviors', () => {
it('merges adjacent plain text lines with a single space', () => {
const md = `Hello
world`;
expect(normalizeMarkdown(md, true)).toBe(`Hello world`);
});

it('merges while trimming the next line and inserting a single space', () => {
const md = `Hello
world `;
expect(normalizeMarkdown(md, true)).toBe(`Hello world`);
});

it('does not merge across HTML-like tags (opening, content, closing, after)', () => {
const md = `<div>
content
</div>
after`;
// Nothing should be merged
expect(normalizeMarkdown(md, true)).toBe(md);
});

it('does not merge the fence line with the first line after a code block', () => {
const md = '```\ncode\n```\nNext line';
// The closing ``` must remain on its own line; "Next line" must not be glued to it
expect(normalizeMarkdown(md, true)).toBe('```\ncode\n```\nNext line');
});

it('treats whitespace-only lines as empty separators (no merge across them)', () => {
const md = `A1

A2`;
// The middle line is spaces only; should be treated as an empty separator
expect(normalizeMarkdown(md, true)).toBe(`A1

A2`);
});

it('handles a code block that contains a literal ``` line without breaking merging outside', () => {
const md = `Intro
para
\`\`\`md
some code
\`\`\`
still code
\`\`\`
Outro
text`;
// Outside the fenced block, adjacent non-empty lines should merge with a space
expect(normalizeMarkdown(md, true)).toBe(
`Intro para
\`\`\`md
some code
\`\`\`
still code
\`\`\`
Outro text`,
);
});
});
Loading