Skip to content

Commit ca1b170

Browse files
authored
Relax the return type of extractField (#303)
To address #302 Also, in order to maintain type safety for indexed fields, and to allow more customization options, add a new `stringifyField` option to control how field values are turned into strings for indexing.
1 parent d462450 commit ca1b170

File tree

3 files changed

+117
-36
lines changed

3 files changed

+117
-36
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
`MiniSearch` follows [semantic versioning](https://semver.org/spec/v2.0.0.html).
44

5+
## Upcoming
6+
7+
- [fix] Relax the return type of `extractField` to allow non-string values
8+
(when a field is stored but not indexed, it can be any type)
9+
- Add `stringifyField` option to customize how field values are turned into strings
10+
for indexing
11+
512
## v7.1.2
613

714
- [fix] Correctly specify that MiniSearch targets ES9 (ES2018), not ES6

src/MiniSearch.test.js

Lines changed: 70 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,30 @@ describe('MiniSearch', () => {
8686
expect(tokenize).toHaveBeenCalledWith('true', 'isBlinky')
8787
})
8888

89+
it('turns the field to string before tokenization using a custom stringifyField function, if given', () => {
90+
const tokenize = jest.fn(x => x.split(/\W+/))
91+
const stringifyField = jest.fn((value, fieldName) => {
92+
if (fieldName === 'tags') {
93+
return value.join('|')
94+
} else if (typeof value === 'boolean') {
95+
return value ? 'T' : 'F'
96+
}
97+
return value.toString()
98+
})
99+
const ms = new MiniSearch({ fields: ['id', 'tags', 'isBlinky'], tokenize, stringifyField })
100+
expect(() => {
101+
ms.add({ id: 123, tags: ['foo', 'bar'], isBlinky: false })
102+
ms.add({ id: 321, isBlinky: true })
103+
}).not.toThrowError()
104+
105+
expect(tokenize).toHaveBeenCalledWith('123', 'id')
106+
expect(tokenize).toHaveBeenCalledWith('foo|bar', 'tags')
107+
expect(tokenize).toHaveBeenCalledWith('F', 'isBlinky')
108+
109+
expect(tokenize).toHaveBeenCalledWith('321', 'id')
110+
expect(tokenize).toHaveBeenCalledWith('T', 'isBlinky')
111+
})
112+
89113
it('passes document and field name to the field extractor', () => {
90114
const extractField = jest.fn((document, fieldName) => {
91115
if (fieldName === 'pubDate') {
@@ -290,39 +314,47 @@ describe('MiniSearch', () => {
290314
expect(ms.search('bar')).toHaveLength(0)
291315
})
292316

293-
describe('when using custom per-field extraction/tokenizer/processing', () => {
317+
describe('when using custom per-field extraction/stringification/tokenizer/processing', () => {
294318
const documents = [
295-
{ id: 1, title: 'Divina Commedia', tags: 'dante,virgilio', author: { name: 'Dante Alighieri' } },
296-
{ id: 2, title: 'I Promessi Sposi', tags: 'renzo,lucia', author: { name: 'Alessandro Manzoni' } },
297-
{ id: 3, title: 'Vita Nova', author: { name: 'Dante Alighieri' } }
319+
{ id: 1, title: 'Divina Commedia', tags: ['dante', 'virgilio'], author: { name: 'Dante Alighieri' }, available: true },
320+
{ id: 2, title: 'I Promessi Sposi', tags: ['renzo', 'lucia'], author: { name: 'Alessandro Manzoni' }, available: false },
321+
{ id: 3, title: 'Vita Nova', tags: ['dante'], author: { name: 'Dante Alighieri' }, available: true }
298322
]
323+
const options = {
324+
fields: ['title', 'tags', 'authorName', 'available'],
325+
extractField: (doc, fieldName) => {
326+
if (fieldName === 'authorName') {
327+
return doc.author.name
328+
} else {
329+
return doc[fieldName]
330+
}
331+
},
332+
stringifyField: (fieldValue, fieldName) => {
333+
if (fieldName === 'available') {
334+
return fieldValue ? 'yes' : 'no'
335+
} else {
336+
return fieldValue.toString()
337+
}
338+
},
339+
tokenize: (field, fieldName) => {
340+
if (fieldName === 'tags') {
341+
return field.split(',')
342+
} else {
343+
return field.split(/\s+/)
344+
}
345+
},
346+
processTerm: (term, fieldName) => {
347+
if (fieldName === 'tags') {
348+
return term.toUpperCase()
349+
} else {
350+
return term.toLowerCase()
351+
}
352+
}
353+
}
299354

300355
let ms, _warn
301356
beforeEach(() => {
302-
ms = new MiniSearch({
303-
fields: ['title', 'tags', 'authorName'],
304-
extractField: (doc, fieldName) => {
305-
if (fieldName === 'authorName') {
306-
return doc.author.name
307-
} else {
308-
return doc[fieldName]
309-
}
310-
},
311-
tokenize: (field, fieldName) => {
312-
if (fieldName === 'tags') {
313-
return field.split(',')
314-
} else {
315-
return field.split(/\s+/)
316-
}
317-
},
318-
processTerm: (term, fieldName) => {
319-
if (fieldName === 'tags') {
320-
return term.toUpperCase()
321-
} else {
322-
return term.toLowerCase()
323-
}
324-
}
325-
})
357+
ms = new MiniSearch(options)
326358
ms.addAll(documents)
327359
_warn = console.warn
328360
console.warn = jest.fn()
@@ -332,12 +364,20 @@ describe('MiniSearch', () => {
332364
console.warn = _warn
333365
})
334366

335-
it('removes the document from the index', () => {
367+
it('removes the document and its terms from the index', () => {
336368
expect(ms.documentCount).toEqual(3)
369+
expect(ms.search('commedia').map(({ id }) => id)).toEqual([1])
370+
expect(ms.search('DANTE').map(({ id }) => id)).toEqual([1, 3])
371+
expect(ms.search('vita').map(({ id }) => id)).toEqual([3])
372+
expect(ms.search('yes').map(({ id }) => id)).toEqual([1, 3])
373+
337374
ms.remove(documents[0])
375+
338376
expect(ms.documentCount).toEqual(2)
339-
expect(ms.search('commedia').length).toEqual(0)
377+
expect(ms.search('commedia').map(({ id }) => id)).toEqual([])
378+
expect(ms.search('DANTE').map(({ id }) => id)).toEqual([3])
340379
expect(ms.search('vita').map(({ id }) => id)).toEqual([3])
380+
expect(ms.search('yes').map(({ id }) => id)).toEqual([3])
341381
expect(console.warn).not.toHaveBeenCalled()
342382
})
343383
})

src/MiniSearch.ts

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,38 @@ export type Options<T = any> = {
222222
* The returned string is fed into the `tokenize` function to split it up
223223
* into tokens.
224224
*/
225-
extractField?: (document: T, fieldName: string) => string,
225+
extractField?: (document: T, fieldName: string) => any,
226+
227+
/**
228+
* Function used to turn field values into strings for indexing
229+
*
230+
* The function takes as arguments the field value, and the name of the field
231+
* to stringify, so that its logic can be customized on specific fields. By
232+
* default, it simply calls `toString()` on the field value (which in many
233+
* cases is already a string).
234+
*
235+
* ### Example:
236+
*
237+
* ```javascript
238+
* // Custom stringifier that formats dates as "Tuesday, September 16, 2025"
239+
* const miniSearch = new MiniSearch({
240+
* fields: ['title', 'date'],
241+
* stringifyField: ((fieldValue, _fieldName) => {
242+
* if (fieldValue instanceof Date) {
243+
* return fieldValue.toLocaleDateString('en-US', {
244+
* weekday: 'long',
245+
* year: 'numeric',
246+
* month: 'long',
247+
* day: 'numeric'
248+
* })
249+
* } else {
250+
* return fieldValue.toString()
251+
* }
252+
* }
253+
* })
254+
* ```
255+
*/
256+
stringifyField?: (fieldValue: any, fieldName: string) => string,
226257

227258
/**
228259
* Function used to split a field value into individual terms to be indexed.
@@ -322,7 +353,9 @@ type OptionsWithDefaults<T = any> = Options<T> & {
322353

323354
idField: string
324355

325-
extractField: (document: T, fieldName: string) => string
356+
extractField: (document: T, fieldName: string) => any
357+
358+
stringifyField: (fieldValue: any, fieldName: string) => string
326359

327360
tokenize: (text: string, fieldName: string) => string[]
328361

@@ -711,7 +744,7 @@ export default class MiniSearch<T = any> {
711744
* @param document The document to be indexed
712745
*/
713746
add (document: T): void {
714-
const { extractField, tokenize, processTerm, fields, idField } = this._options
747+
const { extractField, stringifyField, tokenize, processTerm, fields, idField } = this._options
715748
const id = extractField(document, idField)
716749
if (id == null) {
717750
throw new Error(`MiniSearch: document does not have ID field "${idField}"`)
@@ -728,7 +761,7 @@ export default class MiniSearch<T = any> {
728761
const fieldValue = extractField(document, field)
729762
if (fieldValue == null) continue
730763

731-
const tokens = tokenize(fieldValue.toString(), field)
764+
const tokens = tokenize(stringifyField(fieldValue, field), field)
732765
const fieldId = this._fieldIds[field]
733766

734767
const uniqueTerms = new Set(tokens).size
@@ -803,7 +836,7 @@ export default class MiniSearch<T = any> {
803836
* @param document The document to be removed
804837
*/
805838
remove (document: T): void {
806-
const { tokenize, processTerm, extractField, fields, idField } = this._options
839+
const { tokenize, processTerm, extractField, stringifyField, fields, idField } = this._options
807840
const id = extractField(document, idField)
808841

809842
if (id == null) {
@@ -820,7 +853,7 @@ export default class MiniSearch<T = any> {
820853
const fieldValue = extractField(document, field)
821854
if (fieldValue == null) continue
822855

823-
const tokens = tokenize(fieldValue.toString(), field)
856+
const tokens = tokenize(stringifyField(fieldValue, field), field)
824857
const fieldId = this._fieldIds[field]
825858

826859
const uniqueTerms = new Set(tokens).size
@@ -2143,6 +2176,7 @@ const termToQuerySpec = (options: SearchOptions) => (term: string, i: number, te
21432176
const defaultOptions = {
21442177
idField: 'id',
21452178
extractField: (document: any, fieldName: string) => document[fieldName],
2179+
stringifyField: (fieldValue: any, fieldName: string) => fieldValue.toString(),
21462180
tokenize: (text: string) => text.split(SPACE_OR_PUNCTUATION),
21472181
processTerm: (term: string) => term.toLowerCase(),
21482182
fields: undefined,

0 commit comments

Comments
 (0)