Skip to content

Commit 95901eb

Browse files
fix(search): clean markdown elements in search contents (#2457)
Co-authored-by: John Hildenbiddle <[email protected]>
1 parent 298cc44 commit 95901eb

File tree

4 files changed

+306
-6
lines changed

4 files changed

+306
-6
lines changed

src/plugins/search/component.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,13 @@ function doSearch(value) {
4949

5050
let html = '';
5151
matches.forEach((post, i) => {
52+
const content = post.content ? `...${post.content}...` : '';
5253
const title = (post.title || '').replace(/<[^>]+>/g, '');
5354
html += /* html */ `
5455
<div class="matching-post" aria-label="search result ${i + 1}">
5556
<a href="${post.url}" title="${title}">
5657
<p class="title clamp-1">${post.title}</p>
57-
<p class="content clamp-2">${post.content}</p>
58+
<p class="content clamp-2">${content}</p>
5859
</a>
5960
</div>
6061
`;

src/plugins/search/markdown-to-txt.js

+197
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
/**
2+
* This is a function to convert markdown to txt based on markedjs v13+.
3+
* Copies the escape/unescape functions from [lodash](https://www.npmjs.com/package/lodash) instead import to reduce the size.
4+
*/
5+
import { marked } from 'marked';
6+
7+
const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39);/g;
8+
const reHasEscapedHtml = RegExp(reEscapedHtml.source);
9+
const htmlUnescapes = {
10+
'&amp;': '&',
11+
'&lt;': '<',
12+
'&gt;': '>',
13+
'&quot;': '"',
14+
'&#39;': "'",
15+
};
16+
17+
function unescape(string) {
18+
return string && reHasEscapedHtml.test(string)
19+
? string.replace(reEscapedHtml, entity => htmlUnescapes[entity] || "'")
20+
: string || '';
21+
}
22+
23+
const reUnescapedHtml = /[&<>"']/g;
24+
const reHasUnescapedHtml = RegExp(reUnescapedHtml.source);
25+
const htmlEscapes = {
26+
'&': '&amp;',
27+
'<': '&lt;',
28+
'>': '&gt;',
29+
'"': '&quot;',
30+
"'": '&#39;',
31+
};
32+
33+
function escape(string) {
34+
return string && reHasUnescapedHtml.test(string)
35+
? string.replace(reUnescapedHtml, chr => htmlEscapes[chr])
36+
: string || '';
37+
}
38+
39+
function helpersCleanup(string) {
40+
return string && string.replace('!>', '').replace('?>', '');
41+
}
42+
43+
const markdownToTxtRenderer = {
44+
space() {
45+
return '';
46+
},
47+
48+
code({ text }) {
49+
const code = text.replace(/\n$/, '');
50+
return escape(code);
51+
},
52+
53+
blockquote({ tokens }) {
54+
return this.parser?.parse(tokens) || '';
55+
},
56+
57+
html() {
58+
return '';
59+
},
60+
61+
heading({ tokens }) {
62+
return this.parser?.parse(tokens) || '';
63+
},
64+
65+
hr() {
66+
return '';
67+
},
68+
69+
list(token) {
70+
let body = '';
71+
for (let j = 0; j < token.items.length; j++) {
72+
const item = token.items[j];
73+
body += this.listitem?.(item);
74+
}
75+
76+
return body;
77+
},
78+
79+
listitem(item) {
80+
let itemBody = '';
81+
if (item.task) {
82+
const checkbox = this.checkbox?.({ checked: !!item.checked });
83+
if (item.loose) {
84+
if (item.tokens.length > 0 && item.tokens[0].type === 'paragraph') {
85+
item.tokens[0].text = checkbox + ' ' + item.tokens[0].text;
86+
if (
87+
item.tokens[0].tokens &&
88+
item.tokens[0].tokens.length > 0 &&
89+
item.tokens[0].tokens[0].type === 'text'
90+
) {
91+
item.tokens[0].tokens[0].text =
92+
checkbox + ' ' + item.tokens[0].tokens[0].text;
93+
}
94+
} else {
95+
item.tokens.unshift({
96+
type: 'text',
97+
raw: checkbox + ' ',
98+
text: checkbox + ' ',
99+
});
100+
}
101+
} else {
102+
itemBody += checkbox + ' ';
103+
}
104+
}
105+
106+
itemBody += this.parser?.parse(item.tokens, !!item.loose);
107+
108+
return `${itemBody || ''}`;
109+
},
110+
111+
checkbox() {
112+
return '';
113+
},
114+
115+
paragraph({ tokens }) {
116+
return this.parser?.parseInline(tokens) || '';
117+
},
118+
119+
table(token) {
120+
let header = '';
121+
122+
let cell = '';
123+
for (let j = 0; j < token.header.length; j++) {
124+
cell += this.tablecell?.(token.header[j]);
125+
}
126+
header += this.tablerow?.({ text: cell });
127+
128+
let body = '';
129+
for (let j = 0; j < token.rows.length; j++) {
130+
const row = token.rows[j];
131+
132+
cell = '';
133+
for (let k = 0; k < row.length; k++) {
134+
cell += this.tablecell?.(row[k]);
135+
}
136+
137+
body += this.tablerow?.({ text: cell });
138+
}
139+
140+
return header + ' ' + body;
141+
},
142+
143+
tablerow({ text }) {
144+
return text;
145+
},
146+
147+
tablecell(token) {
148+
return this.parser?.parseInline(token.tokens) || '';
149+
},
150+
151+
strong({ text }) {
152+
return text;
153+
},
154+
155+
em({ tokens }) {
156+
return this.parser?.parseInline(tokens) || '';
157+
},
158+
159+
codespan({ text }) {
160+
return text;
161+
},
162+
163+
br() {
164+
return ' ';
165+
},
166+
167+
del({ tokens }) {
168+
return this.parser?.parseInline(tokens);
169+
},
170+
171+
link({ tokens, href, title }) {
172+
// Remain the href and title attributes for searching, so is the image
173+
// e.g. [filename](_media/example.js ':include :type=code :fragment=demo')
174+
// Result: filename _media/example.js :include :type=code :fragment=demo
175+
return `${this.parser?.parseInline(tokens) || ''} ${href || ''} ${title || ''}`;
176+
},
177+
178+
image({ title, text, href }) {
179+
return `${text || ''} ${href || ''} ${title || ''}`;
180+
},
181+
182+
text(token) {
183+
return token.tokens
184+
? this.parser?.parseInline(token.tokens) || ''
185+
: token.text || '';
186+
},
187+
};
188+
const _marked = marked.setOptions({ renderer: markdownToTxtRenderer });
189+
190+
export function markdownToTxt(markdown) {
191+
const unmarked = _marked.parse(markdown);
192+
const unescaped = unescape(unmarked);
193+
const helpersCleaned = helpersCleanup(unescaped);
194+
return helpersCleaned.trim();
195+
}
196+
197+
export default markdownToTxt;

src/plugins/search/search.js

+6-5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import {
22
getAndRemoveConfig,
33
getAndRemoveDocsifyIgnoreConfig,
44
} from '../../core/render/utils.js';
5+
import { markdownToTxt } from './markdown-to-txt.js';
56
import Dexie from 'dexie';
67

78
let INDEXES = {};
@@ -134,7 +135,7 @@ export function genIndex(path, content = '', router, depth, indexKey) {
134135
index[slug] = {
135136
slug,
136137
title: path !== '/' ? path.slice(1) : 'Home Page',
137-
body: token.text || '',
138+
body: markdownToTxt(token.text || ''),
138139
path: path,
139140
indexKey: indexKey,
140141
};
@@ -150,12 +151,12 @@ export function genIndex(path, content = '', router, depth, indexKey) {
150151
token.text = getTableData(token);
151152
token.text = getListData(token);
152153

153-
index[slug].body += '\n' + (token.text || '');
154+
index[slug].body += '\n' + markdownToTxt(token.text || '');
154155
} else {
155156
token.text = getTableData(token);
156157
token.text = getListData(token);
157158

158-
index[slug].body = token.text || '';
159+
index[slug].body = markdownToTxt(token.text || '');
159160
}
160161

161162
index[slug].path = path;
@@ -229,8 +230,8 @@ export function search(query) {
229230
start = indexContent < 11 ? 0 : indexContent - 10;
230231
end = start === 0 ? 100 : indexContent + keyword.length + 90;
231232

232-
if (postContent && end > postContent.length) {
233-
end = postContent.length;
233+
if (handlePostContent && end > handlePostContent.length) {
234+
end = handlePostContent.length;
234235
}
235236

236237
const matchContent =

test/e2e/search.test.js

+101
Original file line numberDiff line numberDiff line change
@@ -232,4 +232,105 @@ test.describe('Search Plugin Tests', () => {
232232
await page.keyboard.press('z');
233233
await expect(searchFieldElm).toBeFocused();
234234
});
235+
test('search result should remove markdown code block', async ({ page }) => {
236+
const docsifyInitConfig = {
237+
markdown: {
238+
homepage: `
239+
# Hello World
240+
241+
searchHere
242+
\`\`\`js
243+
console.log('Hello World');
244+
\`\`\`
245+
`,
246+
},
247+
scriptURLs: ['/dist/plugins/search.js'],
248+
};
249+
250+
const searchFieldElm = page.locator('input[type=search]');
251+
const resultsHeadingElm = page.locator('.results-panel .content');
252+
253+
await docsifyInit(docsifyInitConfig);
254+
await searchFieldElm.fill('searchHere');
255+
// there is a newline after searchHere and the markdown part ```js ``` it should be removed
256+
expect(await resultsHeadingElm.textContent()).toContain(
257+
"...searchHere\nconsole.log('Hello World');...",
258+
);
259+
});
260+
261+
test('search result should remove file markdown and keep href attribution for files', async ({
262+
page,
263+
}) => {
264+
const docsifyInitConfig = {
265+
markdown: {
266+
homepage: `
267+
# Hello World
268+
![filename](_media/example.js ':include :type=code :fragment=demo')
269+
`,
270+
},
271+
scriptURLs: ['/dist/plugins/search.js'],
272+
};
273+
274+
const searchFieldElm = page.locator('input[type=search]');
275+
const resultsHeadingElm = page.locator('.results-panel .content');
276+
277+
await docsifyInit(docsifyInitConfig);
278+
await searchFieldElm.fill('filename');
279+
expect(await resultsHeadingElm.textContent()).toContain(
280+
'...filename _media/example.js :include :type=code :fragment=demo...',
281+
);
282+
});
283+
284+
test('search result should remove checkbox markdown and keep related values', async ({
285+
page,
286+
}) => {
287+
const docsifyInitConfig = {
288+
markdown: {
289+
homepage: `
290+
# Hello World
291+
292+
- [ ] Task 1
293+
- [x] SearchHere
294+
- [ ] Task 3
295+
`,
296+
},
297+
scriptURLs: ['/dist/plugins/search.js'],
298+
};
299+
300+
const searchFieldElm = page.locator('input[type=search]');
301+
const resultsHeadingElm = page.locator('.results-panel .content');
302+
303+
await docsifyInit(docsifyInitConfig);
304+
await searchFieldElm.fill('SearchHere');
305+
// remove the checkbox markdown and keep the related values
306+
expect(await resultsHeadingElm.textContent()).toContain(
307+
'...Task 1 SearchHere Task 3...',
308+
);
309+
});
310+
311+
test('search result should remove docsify self helper markdown and keep related values', async ({
312+
page,
313+
}) => {
314+
const docsifyInitConfig = {
315+
markdown: {
316+
homepage: `
317+
# Hello World
318+
319+
!> SearchHere to check it!
320+
321+
`,
322+
},
323+
scriptURLs: ['/dist/plugins/search.js'],
324+
};
325+
326+
const searchFieldElm = page.locator('input[type=search]');
327+
const resultsHeadingElm = page.locator('.results-panel .content');
328+
329+
await docsifyInit(docsifyInitConfig);
330+
await searchFieldElm.fill('SearchHere');
331+
// remove the helper markdown and keep the related values
332+
expect(await resultsHeadingElm.textContent()).toContain(
333+
'...SearchHere to check it!...',
334+
);
335+
});
235336
});

0 commit comments

Comments
 (0)