Skip to content

Commit

Permalink
Add basic support for wildcard queries
Browse files Browse the repository at this point in the history
  • Loading branch information
knubie committed Oct 15, 2023
1 parent 4ade3bb commit 3958ef4
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 0 deletions.
77 changes: 77 additions & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,57 @@ exports.search = utils.toPromise(function (opts, callback) {
if (typeof stale === 'string') {
queryOpts.stale = stale;
}
//
// Wildcards
// 1. Detect if the query term contains an asterisk
// 2. Remove keys property from the queryOpts
// 3. Perform _search_query with limit (100?)
// 4. Match the key with the query term.
// 5. If wildcard is in the front, compare last n chars of both strings
// 6. If wildcard is in the back, compare first n strings of both strings
// 7. If wildcard is in the middle, compare first n and last n of both strings.

// 1. Split the query terms on '*'. Are there any query terms with a wildcard?
// *
var wildcardTerms = queryTerms.filter(function(queryTerm) {
let sections = queryTerm.split('*');
// Term needs to contain something other than '*'.
return sections.length > 1 && sections.filter((s) => s.length > 0 ).length > 0;
})
var hasWildCard = wildcardTerms.length > 0;

if (hasWildCard) {
delete queryOpts.keys;
// For v0 let's not bother setting a limit.
return pouch._search_query(mapFun, queryOpts).then(function (res) {
return res.rows.filter(function(d) {
var text = d.key.substring(1),
term = wildcardTerms[0],
sections = term.split("*");
return matchWildcard(term, sections, text);
});
// Copied from step 3 below.
}).then(function (rows) {
total_rows = rows.length;
// filter before fetching docs or applying highlighting
// for a slight optimization, since for now we've only fetched ids/scores
return (typeof limit === 'number' && limit >= 0) ?
rows.slice(skip, skip + limit) : skip > 0 ? rows.slice(skip) : rows;
}).then(function (rows) {
if (includeDocs) {
return applyIncludeDocs(pouch, rows);
}
return rows;
}).then(function (rows) {
if (highlighting) {
return applyHighlighting(pouch, opts, rows, fieldBoosts, docIdsToFieldsToQueryTerms);
}
return rows;

}).then(function (rows) {
callback(null, {total_rows: total_rows, rows: rows});
});
}

// search algorithm, basically classic TF-IDF
//
Expand Down Expand Up @@ -453,3 +504,29 @@ function isFiltered(doc, filter, db) {
if (typeof PouchDB !== 'undefined') {
PouchDB.plugin(exports);
}

function matchWildcard(term, sections, text) {
// Supports *oobar foo*ar fooba*
// ;; TODO: Support *ooba*
// var sections = term.split("*");
if (term[0] === '*' && term[term.length - 1] === '*' && sections.length === 3) {
var matchable = sections[1],
hasMatch = false;
for (var i = 0, len = text.length - matchable.length; i < len; i++) {
if (text.substring(i, i + matchable.length) === matchable) {
hasMatch = true;
break;
}
}
return hasMatch;
} else if (sections.length > 2) {
return false;
} else {
var front = sections[0],
back = sections[1];
// TODO Don't need to check both here.
var matchFront = text.substring(0, front.length) === front;
var matchBack = text.substring(text.length - back.length) === back;
return matchFront && matchBack;
}
}
45 changes: 45 additions & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,51 @@ function tests(dbName, dbType) {

var db;

describe('wildcard matching', function() {
this.timeout(30000);

beforeEach(function () {
db = new Pouch(dbName);
return db;
});
afterEach(function () {
return db.destroy();
});

var wildcardSearch = function({query, text}, shouldFind) {
return db.bulkDocs({docs: {text: 'foobar'}}).then(function () {
var opts = {
fields: ['text'],
query: query
};
return db.search(opts);
}).then(function (res) {
if (shouldFind) {
res.rows.length.should.equal(1);
} else {
res.rows.length.should.equal(0);
}
});
};

it ('matches leading wildcards', function() {
wildcardSearch({query: '*oobar', text: 'foobar'}, true);
wildcardSearch({query: '*bar', text: 'foobar'}, true);
});
it ('matches trailing wildcards', function() {
wildcardSearch({query: 'fooba*', text: 'foobar'}, true);
wildcardSearch({query: 'foo*', text: 'foobar'}, true);
});
it ('matches center wildcards', function() {
wildcardSearch({query: 'foo*ar', text: 'foobar'}, true);
wildcardSearch({query: 'foo*ar', text: 'foo'}, false);
});
it ('matches surrounding wildcards', function() {
wildcardSearch({query: '*ooba*', text: 'foobar'}, true);
wildcardSearch({query: '*oob*', text: 'foobar'}, true);
});
});

describe(dbType + ': search test suite', function () {
this.timeout(30000);

Expand Down

0 comments on commit 3958ef4

Please sign in to comment.