fix: update stream-markdown-parser to version 0.0.14 and add inline HTML parsing support

Simon-He95 · Simon-He95 · commit 281f538e814b · 2025-11-02T10:57:29.000+08:00
diff --git a/package.json b/package.json
@@ -103,7 +103,7 @@
   },
   "dependencies": {
     "@floating-ui/dom": "^1.7.4",
-    "stream-markdown-parser": "^0.0.13"
+    "stream-markdown-parser": "^0.0.14"
   },
   "devDependencies": {
     "@antfu/eslint-config": "^5.4.1",
diff --git a/packages/markdown-parser/package.json b/packages/markdown-parser/package.json
@@ -1,7 +1,7 @@
 {
   "name": "stream-markdown-parser",
   "type": "module",
-  "version": "0.0.13",
+  "version": "0.0.14",
   "packageManager": "pnpm@10.20.0",
   "description": "Pure markdown parser and renderer utilities with streaming support - framework agnostic",
   "author": "Simon He",
diff --git a/packages/markdown-parser/src/parser/index.ts b/packages/markdown-parser/src/parser/index.ts
@@ -113,6 +113,8 @@ export function processTokens(tokens: MarkdownToken[]): ParsedNode[] {
         break
 
       case 'html_block':
+        i += 1
+        break
       case 'code_block':
         result.push(parseCodeBlock(tokens[i]))
         i += 1
diff --git a/packages/markdown-parser/src/parser/inline-parsers/html-inline-code-parser.ts b/packages/markdown-parser/src/parser/inline-parsers/html-inline-code-parser.ts
@@ -0,0 +1,78 @@
+import type { InlineCodeNode, MarkdownToken, ParsedNode } from '../../types'
+
+// Parse inline HTML and return an appropriate ParsedNode depending on tag.
+export function parseHtmlInlineCodeToken(token: MarkdownToken, tokens: MarkdownToken[], i: number): [ParsedNode, number] {
+  let code = String(token.content ?? '').trim()
+  const nextToken = tokens[i + 1]
+  const nnextToken = tokens[i + 2]
+
+  // Quick tag detection
+  const tagMatch = code.match(/^<\s*([\w-]+)/)
+  const tag = tagMatch ? tagMatch[1].toLowerCase() : ''
+
+  // Helper to extract inner text for tags like <a>...</a>, <p>...</p>, <div>...</div>
+  function extractInner(html: string) {
+    // Match the first closing sequence like >...< /tag>
+    const m = html.match(/>([\s\S]*?)<\s*\/\s*[\w-]+>/)
+    return m ? m[1] : ''
+  }
+
+  if (tag === 'a') {
+    let loading = false
+    if (!nextToken || (nextToken?.type === 'text' && (!nnextToken || nnextToken.type !== 'html_inline')) || !nextToken) {
+      loading = true
+    }
+    if (nextToken?.type === 'text' && (nnextToken?.type === 'html_inline' || !nnextToken)) {
+      // Try to extract href and inner text
+      const hrefMatch = code.match(/href\s*=\s*"([^"]+)"|href\s*=\s*'([^']+)'|href\s*=\s*([^\s>]+)/i)
+      const href = hrefMatch ? (hrefMatch[1] || hrefMatch[2] || hrefMatch[3]) : ''
+      let index = i + 1
+      if (nextToken.type === 'text') {
+        code = nextToken.content?.replace(/<[^>]*$/, '') ?? ''
+
+        index = i + 2
+      }
+      if (nnextToken?.type === 'html_inline' && nextToken.type === 'text') {
+        index = i + 3
+      }
+      const inner = code || href || ''
+      return [
+        {
+          type: 'link',
+          href: String(href ?? ''),
+          title: null,
+          text: code,
+          children: [
+            { type: 'text', content: inner, raw: inner },
+          ],
+          loading,
+          raw: code,
+        } as ParsedNode,
+        index,
+      ]
+    }
+  }
+
+  if (tag === 'p' || tag === 'div') {
+    const inner = extractInner(code) || ''
+    return [
+      {
+        type: 'paragraph',
+        children: [
+          { type: 'text', content: inner, raw: inner },
+        ],
+        raw: code,
+      } as ParsedNode,
+      i + 1,
+    ]
+  }
+  // Fallback: treat as inline code (preserve previous behavior)
+  return [
+    {
+      type: 'inline_code',
+      code,
+      raw: code,
+    } as InlineCodeNode,
+    i + 1,
+  ]
+}
diff --git a/packages/markdown-parser/src/parser/inline-parsers/index.ts b/packages/markdown-parser/src/parser/inline-parsers/index.ts
@@ -6,6 +6,7 @@ import { parseFenceToken } from './fence-parser'
 import { parseFootnoteRefToken } from './footnote-ref-parser'
 import { parseHardbreakToken } from './hardbreak-parser'
 import { parseHighlightToken } from './highlight-parser'
+import { parseHtmlInlineCodeToken } from './html-inline-code-parser'
 import { parseImageToken } from './image-parser'
 import { parseInlineCodeToken } from './inline-code-parser'
 import { parseInsertToken } from './insert-parser'
@@ -280,6 +281,12 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
         pushNode(parseInlineCodeToken(token))
         i++
         break
+      case 'html_inline': {
+        const [node, index] = parseHtmlInlineCodeToken(token, tokens, i)
+        pushNode(node)
+        i = index
+        break
+      }
 
       case 'link_open': {
         handleLinkOpen(token)
@@ -608,21 +615,21 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
     // may do this), prefer the parseLinkToken's initial loading value
     // (which defaults to true for mid-state links).
     if (raw && hrefStr) {
-    // More robust: locate the first "](" after the link text and see if
-    // there's a matching ')' that closes the href. This avoids false
-    // positives when other parentheses appear elsewhere in the source.
+      // More robust: locate the first "](" after the link text and see if
+      // there's a matching ')' that closes the href. This avoids false
+      // positives when other parentheses appear elsewhere in the source.
       const openIdx = raw.indexOf('](')
       if (openIdx === -1) {
-      // No explicit link start found in raw — be conservative and keep
-      // the parser's default loading value.
+        // No explicit link start found in raw — be conservative and keep
+        // the parser's default loading value.
       }
       else {
         const closeIdx = raw.indexOf(')', openIdx + 2)
         if (closeIdx === -1) {
           node.loading = true
         }
         else {
-        // Check that the href inside the parens corresponds to this token
+          // Check that the href inside the parens corresponds to this token
           const inside = raw.slice(openIdx + 2, closeIdx)
           if (inside.includes(hrefStr))
             node.loading = false
diff --git a/packages/markdown-parser/test/html-inline.test.ts b/packages/markdown-parser/test/html-inline.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, it } from 'vitest'
+import { getMarkdown, parseMarkdownToStructure } from '../src'
+
+describe('html_inline parsing', () => {
+  it('parses <a> as link node with href and inner text', () => {
+    const md = getMarkdown()
+    const markdown = `This is a <a href="https://example.com">Example</a> link.`
+    const nodes = parseMarkdownToStructure(markdown, md)
+    expect(nodes.length).toBeGreaterThan(0)
+    const para = nodes[0]
+    expect(para.type).toBe('paragraph')
+    const linkChild = (para as any).children.find((c: any) => c.type === 'link')
+    expect(linkChild).toBeDefined()
+    expect(linkChild.href).toBe('https://example.com')
+    expect(linkChild.children[0].content).toBe('Example')
+  })
+
+  it('falls back to inline_code for unknown inline html (e.g., <span>)', () => {
+    const md = getMarkdown()
+    const markdown = `Before <span>inner span</span> After`
+    const nodes = parseMarkdownToStructure(markdown, md)
+    const para = nodes[0]
+    const spanNode = (para as any).children.find((c: any) => c.type === 'inline_code')
+    expect(spanNode).toBeDefined()
+    // fallback at least preserves the tag in code/raw (inner text may be emitted as a separate text node)
+    expect(spanNode.code).toContain('<span')
+    expect(spanNode.raw).toContain('<span')
+  })
+
+  it('handles attribute values that include ">" characters', () => {
+    const md = getMarkdown()
+    const markdown = `Value <a href="https://example.com?q=a>b&x=1">Here</a> end`
+    const nodes = parseMarkdownToStructure(markdown, md)
+    const para = nodes[0]
+    const linkChild = (para as any).children.find((c: any) => c.type === 'link')
+    expect(linkChild).toBeDefined()
+    // href extraction should capture full quoted value including '>'
+    expect(linkChild.href).toBe('https://example.com?q=a>b&x=1')
+  })
+
+  it('handles missing closing tag gracefully (unclosed <a>)', () => {
+    const md = getMarkdown()
+    const markdown = `Start <a href="https://example.com">Unclosed text`
+    const nodes = parseMarkdownToStructure(markdown, md)
+    // Should not throw; ensure raw input is preserved in some node
+    const serialized = JSON.stringify(nodes)
+    // JSON will escape quotes, so check for a less strict substring and for href URL
+    expect(serialized).toContain('<a href')
+    expect(serialized).toContain('https://example.com')
+    expect(serialized).toContain('Unclosed text')
+  })
+})
diff --git a/playground/src/const/markdown.ts b/playground/src/const/markdown.ts
@@ -4,6 +4,10 @@ export const streamContent = `
 
 [Star on GitHub](https://github.com/Simon-He95/vue-markdown-render)
 
+<a href="https://simonhe.me/">我是 a 元素标签</a>
+
+https://github.com/Simon-He95/vue-markdown-render
+
 [【Author: Simon】](https://simonhe.me/)
 
 
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "stream-markdown-parser",`
`3`	`3`	`"type": "module",`
`4`		`- "version": "0.0.13",`
	`4`	`+ "version": "0.0.14",`
`5`	`5`	`"packageManager": "[email protected]",`
`6`	`6`	`"description": "Pure markdown parser and renderer utilities with streaming support - framework agnostic",`
`7`	`7`	`"author": "Simon He",`