Skip to content

Commit 281f538

Browse files
committed
fix: update stream-markdown-parser to version 0.0.14 and add inline HTML parsing support
1 parent e51f2d7 commit 281f538

File tree

8 files changed

+156
-13
lines changed

8 files changed

+156
-13
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@
103103
},
104104
"dependencies": {
105105
"@floating-ui/dom": "^1.7.4",
106-
"stream-markdown-parser": "^0.0.13"
106+
"stream-markdown-parser": "^0.0.14"
107107
},
108108
"devDependencies": {
109109
"@antfu/eslint-config": "^5.4.1",

packages/markdown-parser/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "stream-markdown-parser",
33
"type": "module",
4-
"version": "0.0.13",
4+
"version": "0.0.14",
55
"packageManager": "[email protected]",
66
"description": "Pure markdown parser and renderer utilities with streaming support - framework agnostic",
77
"author": "Simon He",

packages/markdown-parser/src/parser/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ export function processTokens(tokens: MarkdownToken[]): ParsedNode[] {
113113
break
114114

115115
case 'html_block':
116+
i += 1
117+
break
116118
case 'code_block':
117119
result.push(parseCodeBlock(tokens[i]))
118120
i += 1
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import type { InlineCodeNode, MarkdownToken, ParsedNode } from '../../types'
2+
3+
// Parse inline HTML and return an appropriate ParsedNode depending on tag.
4+
export function parseHtmlInlineCodeToken(token: MarkdownToken, tokens: MarkdownToken[], i: number): [ParsedNode, number] {
5+
let code = String(token.content ?? '').trim()
6+
const nextToken = tokens[i + 1]
7+
const nnextToken = tokens[i + 2]
8+
9+
// Quick tag detection
10+
const tagMatch = code.match(/^<\s*([\w-]+)/)
11+
const tag = tagMatch ? tagMatch[1].toLowerCase() : ''
12+
13+
// Helper to extract inner text for tags like <a>...</a>, <p>...</p>, <div>...</div>
14+
function extractInner(html: string) {
15+
// Match the first closing sequence like >...< /tag>
16+
const m = html.match(/>([\s\S]*?)<\s*\/\s*[\w-]+>/)
17+
return m ? m[1] : ''
18+
}
19+
20+
if (tag === 'a') {
21+
let loading = false
22+
if (!nextToken || (nextToken?.type === 'text' && (!nnextToken || nnextToken.type !== 'html_inline')) || !nextToken) {
23+
loading = true
24+
}
25+
if (nextToken?.type === 'text' && (nnextToken?.type === 'html_inline' || !nnextToken)) {
26+
// Try to extract href and inner text
27+
const hrefMatch = code.match(/href\s*=\s*"([^"]+)"|href\s*=\s*'([^']+)'|href\s*=\s*([^\s>]+)/i)
28+
const href = hrefMatch ? (hrefMatch[1] || hrefMatch[2] || hrefMatch[3]) : ''
29+
let index = i + 1
30+
if (nextToken.type === 'text') {
31+
code = nextToken.content?.replace(/<[^>]*$/, '') ?? ''
32+
33+
index = i + 2
34+
}
35+
if (nnextToken?.type === 'html_inline' && nextToken.type === 'text') {
36+
index = i + 3
37+
}
38+
const inner = code || href || ''
39+
return [
40+
{
41+
type: 'link',
42+
href: String(href ?? ''),
43+
title: null,
44+
text: code,
45+
children: [
46+
{ type: 'text', content: inner, raw: inner },
47+
],
48+
loading,
49+
raw: code,
50+
} as ParsedNode,
51+
index,
52+
]
53+
}
54+
}
55+
56+
if (tag === 'p' || tag === 'div') {
57+
const inner = extractInner(code) || ''
58+
return [
59+
{
60+
type: 'paragraph',
61+
children: [
62+
{ type: 'text', content: inner, raw: inner },
63+
],
64+
raw: code,
65+
} as ParsedNode,
66+
i + 1,
67+
]
68+
}
69+
// Fallback: treat as inline code (preserve previous behavior)
70+
return [
71+
{
72+
type: 'inline_code',
73+
code,
74+
raw: code,
75+
} as InlineCodeNode,
76+
i + 1,
77+
]
78+
}

packages/markdown-parser/src/parser/inline-parsers/index.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { parseFenceToken } from './fence-parser'
66
import { parseFootnoteRefToken } from './footnote-ref-parser'
77
import { parseHardbreakToken } from './hardbreak-parser'
88
import { parseHighlightToken } from './highlight-parser'
9+
import { parseHtmlInlineCodeToken } from './html-inline-code-parser'
910
import { parseImageToken } from './image-parser'
1011
import { parseInlineCodeToken } from './inline-code-parser'
1112
import { parseInsertToken } from './insert-parser'
@@ -280,6 +281,12 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
280281
pushNode(parseInlineCodeToken(token))
281282
i++
282283
break
284+
case 'html_inline': {
285+
const [node, index] = parseHtmlInlineCodeToken(token, tokens, i)
286+
pushNode(node)
287+
i = index
288+
break
289+
}
283290

284291
case 'link_open': {
285292
handleLinkOpen(token)
@@ -608,21 +615,21 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
608615
// may do this), prefer the parseLinkToken's initial loading value
609616
// (which defaults to true for mid-state links).
610617
if (raw && hrefStr) {
611-
// More robust: locate the first "](" after the link text and see if
612-
// there's a matching ')' that closes the href. This avoids false
613-
// positives when other parentheses appear elsewhere in the source.
618+
// More robust: locate the first "](" after the link text and see if
619+
// there's a matching ')' that closes the href. This avoids false
620+
// positives when other parentheses appear elsewhere in the source.
614621
const openIdx = raw.indexOf('](')
615622
if (openIdx === -1) {
616-
// No explicit link start found in raw — be conservative and keep
617-
// the parser's default loading value.
623+
// No explicit link start found in raw — be conservative and keep
624+
// the parser's default loading value.
618625
}
619626
else {
620627
const closeIdx = raw.indexOf(')', openIdx + 2)
621628
if (closeIdx === -1) {
622629
node.loading = true
623630
}
624631
else {
625-
// Check that the href inside the parens corresponds to this token
632+
// Check that the href inside the parens corresponds to this token
626633
const inside = raw.slice(openIdx + 2, closeIdx)
627634
if (inside.includes(hrefStr))
628635
node.loading = false
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { getMarkdown, parseMarkdownToStructure } from '../src'
3+
4+
describe('html_inline parsing', () => {
5+
it('parses <a> as link node with href and inner text', () => {
6+
const md = getMarkdown()
7+
const markdown = `This is a <a href="https://example.com">Example</a> link.`
8+
const nodes = parseMarkdownToStructure(markdown, md)
9+
expect(nodes.length).toBeGreaterThan(0)
10+
const para = nodes[0]
11+
expect(para.type).toBe('paragraph')
12+
const linkChild = (para as any).children.find((c: any) => c.type === 'link')
13+
expect(linkChild).toBeDefined()
14+
expect(linkChild.href).toBe('https://example.com')
15+
expect(linkChild.children[0].content).toBe('Example')
16+
})
17+
18+
it('falls back to inline_code for unknown inline html (e.g., <span>)', () => {
19+
const md = getMarkdown()
20+
const markdown = `Before <span>inner span</span> After`
21+
const nodes = parseMarkdownToStructure(markdown, md)
22+
const para = nodes[0]
23+
const spanNode = (para as any).children.find((c: any) => c.type === 'inline_code')
24+
expect(spanNode).toBeDefined()
25+
// fallback at least preserves the tag in code/raw (inner text may be emitted as a separate text node)
26+
expect(spanNode.code).toContain('<span')
27+
expect(spanNode.raw).toContain('<span')
28+
})
29+
30+
it('handles attribute values that include ">" characters', () => {
31+
const md = getMarkdown()
32+
const markdown = `Value <a href="https://example.com?q=a>b&x=1">Here</a> end`
33+
const nodes = parseMarkdownToStructure(markdown, md)
34+
const para = nodes[0]
35+
const linkChild = (para as any).children.find((c: any) => c.type === 'link')
36+
expect(linkChild).toBeDefined()
37+
// href extraction should capture full quoted value including '>'
38+
expect(linkChild.href).toBe('https://example.com?q=a>b&x=1')
39+
})
40+
41+
it('handles missing closing tag gracefully (unclosed <a>)', () => {
42+
const md = getMarkdown()
43+
const markdown = `Start <a href="https://example.com">Unclosed text`
44+
const nodes = parseMarkdownToStructure(markdown, md)
45+
// Should not throw; ensure raw input is preserved in some node
46+
const serialized = JSON.stringify(nodes)
47+
// JSON will escape quotes, so check for a less strict substring and for href URL
48+
expect(serialized).toContain('<a href')
49+
expect(serialized).toContain('https://example.com')
50+
expect(serialized).toContain('Unclosed text')
51+
})
52+
})

playground/src/const/markdown.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ export const streamContent = `
44
55
[Star on GitHub](https://github.com/Simon-He95/vue-markdown-render)
66
7+
<a href="https://simonhe.me/">我是 a 元素标签</a>
8+
9+
https://github.com/Simon-He95/vue-markdown-render
10+
711
[【Author: Simon】](https://simonhe.me/)
812
913

pnpm-lock.yaml

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)