Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

`MiniSearch` follows [semantic versioning](https://semver.org/spec/v2.0.0.html).

## Upcoming

- [fix] Relax the return type of `extractField` to allow non-string values
(when a field is stored but not indexed, it can be any type)
- Add `stringifyField` option to customize how field values are turned into strings
for indexing

## v7.1.2

- [fix] Correctly specify that MiniSearch targets ES9 (ES2018), not ES6
Expand Down
100 changes: 70 additions & 30 deletions src/MiniSearch.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,30 @@ describe('MiniSearch', () => {
expect(tokenize).toHaveBeenCalledWith('true', 'isBlinky')
})

it('turns the field to string before tokenization using a custom stringifyField function, if given', () => {
const tokenize = jest.fn(x => x.split(/\W+/))
const stringifyField = jest.fn((value, fieldName) => {
if (fieldName === 'tags') {
return value.join('|')
} else if (typeof value === 'boolean') {
return value ? 'T' : 'F'
}
return value.toString()
})
const ms = new MiniSearch({ fields: ['id', 'tags', 'isBlinky'], tokenize, stringifyField })
expect(() => {
ms.add({ id: 123, tags: ['foo', 'bar'], isBlinky: false })
ms.add({ id: 321, isBlinky: true })
}).not.toThrowError()

expect(tokenize).toHaveBeenCalledWith('123', 'id')
expect(tokenize).toHaveBeenCalledWith('foo|bar', 'tags')
expect(tokenize).toHaveBeenCalledWith('F', 'isBlinky')

expect(tokenize).toHaveBeenCalledWith('321', 'id')
expect(tokenize).toHaveBeenCalledWith('T', 'isBlinky')
})

it('passes document and field name to the field extractor', () => {
const extractField = jest.fn((document, fieldName) => {
if (fieldName === 'pubDate') {
Expand Down Expand Up @@ -290,39 +314,47 @@ describe('MiniSearch', () => {
expect(ms.search('bar')).toHaveLength(0)
})

describe('when using custom per-field extraction/tokenizer/processing', () => {
describe('when using custom per-field extraction/stringification/tokenizer/processing', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', tags: 'dante,virgilio', author: { name: 'Dante Alighieri' } },
{ id: 2, title: 'I Promessi Sposi', tags: 'renzo,lucia', author: { name: 'Alessandro Manzoni' } },
{ id: 3, title: 'Vita Nova', author: { name: 'Dante Alighieri' } }
{ id: 1, title: 'Divina Commedia', tags: ['dante', 'virgilio'], author: { name: 'Dante Alighieri' }, available: true },
{ id: 2, title: 'I Promessi Sposi', tags: ['renzo', 'lucia'], author: { name: 'Alessandro Manzoni' }, available: false },
{ id: 3, title: 'Vita Nova', tags: ['dante'], author: { name: 'Dante Alighieri' }, available: true }
]
const options = {
fields: ['title', 'tags', 'authorName', 'available'],
extractField: (doc, fieldName) => {
if (fieldName === 'authorName') {
return doc.author.name
} else {
return doc[fieldName]
}
},
stringifyField: (fieldValue, fieldName) => {
if (fieldName === 'available') {
return fieldValue ? 'yes' : 'no'
} else {
return fieldValue.toString()
}
},
tokenize: (field, fieldName) => {
if (fieldName === 'tags') {
return field.split(',')
} else {
return field.split(/\s+/)
}
},
processTerm: (term, fieldName) => {
if (fieldName === 'tags') {
return term.toUpperCase()
} else {
return term.toLowerCase()
}
}
}

let ms, _warn
beforeEach(() => {
ms = new MiniSearch({
fields: ['title', 'tags', 'authorName'],
extractField: (doc, fieldName) => {
if (fieldName === 'authorName') {
return doc.author.name
} else {
return doc[fieldName]
}
},
tokenize: (field, fieldName) => {
if (fieldName === 'tags') {
return field.split(',')
} else {
return field.split(/\s+/)
}
},
processTerm: (term, fieldName) => {
if (fieldName === 'tags') {
return term.toUpperCase()
} else {
return term.toLowerCase()
}
}
})
ms = new MiniSearch(options)
ms.addAll(documents)
_warn = console.warn
console.warn = jest.fn()
Expand All @@ -332,12 +364,20 @@ describe('MiniSearch', () => {
console.warn = _warn
})

it('removes the document from the index', () => {
it('removes the document and its terms from the index', () => {
expect(ms.documentCount).toEqual(3)
expect(ms.search('commedia').map(({ id }) => id)).toEqual([1])
expect(ms.search('DANTE').map(({ id }) => id)).toEqual([1, 3])
expect(ms.search('vita').map(({ id }) => id)).toEqual([3])
expect(ms.search('yes').map(({ id }) => id)).toEqual([1, 3])

ms.remove(documents[0])

expect(ms.documentCount).toEqual(2)
expect(ms.search('commedia').length).toEqual(0)
expect(ms.search('commedia').map(({ id }) => id)).toEqual([])
expect(ms.search('DANTE').map(({ id }) => id)).toEqual([3])
expect(ms.search('vita').map(({ id }) => id)).toEqual([3])
expect(ms.search('yes').map(({ id }) => id)).toEqual([3])
expect(console.warn).not.toHaveBeenCalled()
})
})
Expand Down
46 changes: 40 additions & 6 deletions src/MiniSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,38 @@ export type Options<T = any> = {
* The returned string is fed into the `tokenize` function to split it up
* into tokens.
*/
extractField?: (document: T, fieldName: string) => string,
extractField?: (document: T, fieldName: string) => any,

/**
* Function used to turn field values into strings for indexing
*
* The function takes as arguments the field value, and the name of the field
* to stringify, so that its logic can be customized on specific fields. By
* default, it simply calls `toString()` on the field value (which in many
* cases is already a string).
*
* ### Example:
*
* ```javascript
* // Custom stringifier that formats dates as "Tuesday, September 16, 2025"
* const miniSearch = new MiniSearch({
* fields: ['title', 'date'],
* stringifyField: ((fieldValue, _fieldName) => {
* if (fieldValue instanceof Date) {
* return fieldValue.toLocaleDateString('en-US', {
* weekday: 'long',
* year: 'numeric',
* month: 'long',
* day: 'numeric'
* })
* } else {
* return fieldValue.toString()
* }
* }
* })
* ```
*/
stringifyField?: (fieldValue: any, fieldName: string) => string,

/**
* Function used to split a field value into individual terms to be indexed.
Expand Down Expand Up @@ -322,7 +353,9 @@ type OptionsWithDefaults<T = any> = Options<T> & {

idField: string

extractField: (document: T, fieldName: string) => string
extractField: (document: T, fieldName: string) => any

stringifyField: (fieldValue: any, fieldName: string) => string

tokenize: (text: string, fieldName: string) => string[]

Expand Down Expand Up @@ -711,7 +744,7 @@ export default class MiniSearch<T = any> {
* @param document The document to be indexed
*/
add (document: T): void {
const { extractField, tokenize, processTerm, fields, idField } = this._options
const { extractField, stringifyField, tokenize, processTerm, fields, idField } = this._options
const id = extractField(document, idField)
if (id == null) {
throw new Error(`MiniSearch: document does not have ID field "${idField}"`)
Expand All @@ -728,7 +761,7 @@ export default class MiniSearch<T = any> {
const fieldValue = extractField(document, field)
if (fieldValue == null) continue

const tokens = tokenize(fieldValue.toString(), field)
const tokens = tokenize(stringifyField(fieldValue, field), field)
const fieldId = this._fieldIds[field]

const uniqueTerms = new Set(tokens).size
Expand Down Expand Up @@ -803,7 +836,7 @@ export default class MiniSearch<T = any> {
* @param document The document to be removed
*/
remove (document: T): void {
const { tokenize, processTerm, extractField, fields, idField } = this._options
const { tokenize, processTerm, extractField, stringifyField, fields, idField } = this._options
const id = extractField(document, idField)

if (id == null) {
Expand All @@ -820,7 +853,7 @@ export default class MiniSearch<T = any> {
const fieldValue = extractField(document, field)
if (fieldValue == null) continue

const tokens = tokenize(fieldValue.toString(), field)
const tokens = tokenize(stringifyField(fieldValue, field), field)
const fieldId = this._fieldIds[field]

const uniqueTerms = new Set(tokens).size
Expand Down Expand Up @@ -2143,6 +2176,7 @@ const termToQuerySpec = (options: SearchOptions) => (term: string, i: number, te
const defaultOptions = {
idField: 'id',
extractField: (document: any, fieldName: string) => document[fieldName],
stringifyField: (fieldValue: any, fieldName: string) => fieldValue.toString(),
tokenize: (text: string) => text.split(SPACE_OR_PUNCTUATION),
processTerm: (term: string) => term.toLowerCase(),
fields: undefined,
Expand Down