facebook · GermanJablo · Sep 9, 2025 · Sep 10, 2025 · Sep 10, 2025
@@ -198,17 +198,20 @@ export type TextMatchTransformer = Readonly<{
   type: 'text-match';
 }>;
 
-const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/;
+const EMPTY_OR_WHITESPACE_ONLY = /^[\t ]*$/;
+const ORDERED_LIST_REGEX = /^(\s*)(\d+)\.\s/;
 const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/;
 const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i;
 const HEADING_REGEX = /^(#{1,6})\s/;
 const QUOTE_REGEX = /^>\s/;
-const CODE_START_REGEX = /^[ \t]*```([\w-]+)?/;
-const CODE_END_REGEX = /[ \t]*```$/;
+const CODE_START_REGEX = /^[ \t]*(?:```|\\`\\`\\`)([\w-]+)?/;
+const CODE_END_REGEX = /^[ \t]*(?:```|\\`\\`\\`)$/;
 const CODE_SINGLE_LINE_REGEX =
   /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/;
-const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/;
+const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/;
 const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/;
+const TAG_START_REGEX = /^[ \t]*<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i;
+const TAG_END_REGEX = /^[ \t]*<\/[a-z_][\w-]*\s*>/i;
 
 const createBlockNode = (
   createNode: (match: Array<string>) => ElementNode,
@@ -602,6 +605,7 @@ export function normalizeMarkdown(
   const lines = input.split('\n');
   let inCodeBlock = false;
   const sanitizedLines: string[] = [];
+  let nestedDeepCodeBlock = 0;
 
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i];
@@ -613,9 +617,24 @@ export function normalizeMarkdown(
       continue;
     }
 
-    // Detect the start or end of a code block
-    if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
-      inCodeBlock = !inCodeBlock;
+    if (CODE_END_REGEX.test(line)) {
+      if (nestedDeepCodeBlock === 0) {
+        inCodeBlock = true;
+      }
+      if (nestedDeepCodeBlock === 1) {
+        inCodeBlock = false;
+      }
+      if (nestedDeepCodeBlock > 0) {
+        nestedDeepCodeBlock--;
+      }
+      sanitizedLines.push(line);
+      continue;
+    }
+
+    // Toggle inCodeBlock state when encountering start or end of a code block
+    if (CODE_START_REGEX.test(line)) {
+      inCodeBlock = true;
+      nestedDeepCodeBlock++;
       sanitizedLines.push(line);
       continue;
     }
@@ -629,8 +648,8 @@ export function normalizeMarkdown(
     // In markdown the concept of "empty paragraphs" does not exist.
     // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
     if (
-      line === '' ||
-      lastLine === '' ||
+      EMPTY_OR_WHITESPACE_ONLY.test(line) ||
+      EMPTY_OR_WHITESPACE_ONLY.test(lastLine!) ||
       !lastLine ||
       HEADING_REGEX.test(lastLine) ||
       HEADING_REGEX.test(line) ||
@@ -640,11 +659,16 @@ export function normalizeMarkdown(
       CHECK_LIST_REGEX.test(line) ||
       TABLE_ROW_REG_EXP.test(line) ||
       TABLE_ROW_DIVIDER_REG_EXP.test(line) ||
-      !shouldMergeAdjacentLines
+      !shouldMergeAdjacentLines ||
+      TAG_START_REGEX.test(line) ||
+      TAG_END_REGEX.test(line) ||
+      TAG_START_REGEX.test(lastLine) ||
+      TAG_END_REGEX.test(lastLine) ||
+      CODE_END_REGEX.test(lastLine)
     ) {
       sanitizedLines.push(line);
     } else {
-      sanitizedLines[sanitizedLines.length - 1] = lastLine + line;
+      sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trim();
     }
   }
 

@@ -277,7 +277,7 @@
     },
     {
       // Multiline paragraphs: https://spec.commonmark.org/dingus/?text=Hello%0Aworld%0A!
-      html: '<p><span style="white-space: pre-wrap;">Helloworld!</span></p>',
+      html: '<p><span style="white-space: pre-wrap;">Hello world !</span></p>',
       md: ['Hello', 'world', '!'].join('\n'),
       shouldMergeAdjacentLines: true,
       skipExport: true,
@@ -303,7 +303,7 @@
     // },
     {
       // Multiline list items: https://spec.commonmark.org/dingus/?text=-%20Hello%0A-%20world%0A!%0A!
-      html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world!!</span></li></ul>',
+      html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world ! !</span></li></ul>',
       md: '- Hello\n- world\n!\n!',
       shouldMergeAdjacentLines: true,
       skipExport: true,
@@ -396,7 +396,7 @@
       mdAfterExport: '*Hello&#32;**world**!*',
     },
     {
-      html: '<p><span style="white-space: pre-wrap;">helloworld</span></p>',
+      html: '<p><span style="white-space: pre-wrap;">hello world</span></p>',
       md: 'hello\nworld',
       shouldMergeAdjacentLines: true,
       skipExport: true,
@@ -504,7 +504,7 @@
     },
     {
       // https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A!
-      html: '<blockquote><span style="white-space: pre-wrap;">Helloworld!</span></blockquote>',
+      html: '<blockquote><span style="white-space: pre-wrap;">Hello world !</span></blockquote>',
       md: '> Hello\nworld\n!',
       shouldMergeAdjacentLines: true,
       skipExport: true,
@@ -723,7 +723,7 @@

      expect(
        editor.getEditorState().read(() => $generateHtmlFromNodes(editor)),
      ).toBe(html);
    });
  }

@@ -953,7 +953,7 @@
 E3
 `;
     expect(normalizeMarkdown(markdown, true)).toBe(`
-A1A2
+A1 A2
 
 A3
 
@@ -964,7 +964,7 @@
 B3
 \`\`\`
 
-C1C2
+C1 C2
 
 C3
 
@@ -977,7 +977,7 @@
 
 \`\`\`single line code\`\`\`
 
-E1E2
+E1 E2
 
 E3
 `);
@@ -1070,3 +1070,64 @@
     expect(normalizeMarkdown(markdown, false)).toBe(markdown);
   });
 });
+
+describe.skip('normalizeMarkdown – new behaviors', () => {
+  it('merges adjacent plain text lines with a single space', () => {
+    const md = `Hello
+world`;
+    expect(normalizeMarkdown(md, true)).toBe(`Hello world`);
+  });
+
+  it('merges while trimming the next line and inserting a single space', () => {
+    const md = `Hello
+   world   `;
+    expect(normalizeMarkdown(md, true)).toBe(`Hello world`);
+  });
+
+  it('does not merge across HTML-like tags (opening, content, closing, after)', () => {
+    const md = `<div>
+content
+</div>
+after`;
+    // Nothing should be merged
+    expect(normalizeMarkdown(md, true)).toBe(md);
+  });
+
+  it('does not merge the fence line with the first line after a code block', () => {
+    const md = '```\ncode\n```\nNext line';
+    // The closing ``` must remain on its own line; "Next line" must not be glued to it
+    expect(normalizeMarkdown(md, true)).toBe('```\ncode\n```\nNext line');
+  });
+
+  it('treats whitespace-only lines as empty separators (no merge across them)', () => {
+    const md = `A1
+
+A2`;
+    // The middle line is spaces only; should be treated as an empty separator
+    expect(normalizeMarkdown(md, true)).toBe(`A1
+
+A2`);
+  });
+
+  it('handles a code block that contains a literal ``` line without breaking merging outside', () => {
+    const md = `Intro
+para
+\`\`\`md
+some code
+\`\`\`
+still code
+\`\`\`
+Outro
+text`;
+    // Outside the fenced block, adjacent non-empty lines should merge with a space
+    expect(normalizeMarkdown(md, true)).toBe(
+      `Intro para
+\`\`\`md
+some code
+\`\`\`
+still code
+\`\`\`
+Outro text`,
+    );
+  });
+});