Skip to content

Commit 3b861dc

Browse files
committed
remove fuzziness from number segments in SingleError mode. close #50.
1 parent 2bfa63f commit 3b861dc

File tree

7 files changed

+322
-263
lines changed

7 files changed

+322
-263
lines changed

demos/testdata.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161900,6 +161900,13 @@
161900161900
"который",
161901161901
"Alle må holde i et tau og være sikret slik at de er trygge. Etter isbreen må de gå i en ganske stor steinrøys og så er de endelig fremme.",
161902161902
"interface-id-face-scan-2-identification-angle-secure-human-id-person-face-security-brackets",
161903-
"Sabine State Bank and Trust Company"
161903+
"Sabine State Bank and Trust Company",
161904+
"abc1234",
161905+
"abc2134",
161906+
"ab1c234",
161907+
"abc 1234",
161908+
"abc123acb",
161909+
"abc123acb supper",
161910+
"1234"
161904161911
]
161905161912
}

dist/uFuzzy.cjs.js

Lines changed: 78 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -165,27 +165,30 @@ function uFuzzy(opts) {
165165
_intraTrn = 0,
166166
_intraDel = 0;
167167

168-
let plen = p.length;
169-
170-
// prevent junk matches by requiring stricter rules for short terms
171-
if (plen <= 4) {
172-
if (plen >= 3) {
173-
// one swap in non-first char when 3-4 chars
174-
_intraTrn = Math.min(intraTrn, 1);
175-
176-
// or one insertion when 4 chars
177-
if (plen == 4)
178-
_intraIns = Math.min(intraIns, 1);
168+
// only-digits strings should match exactly, else special rules for short strings
169+
if (/[^\d]/.test(p)) {
170+
let plen = p.length;
171+
172+
// prevent junk matches by requiring stricter rules for short terms
173+
if (plen <= 4) {
174+
if (plen >= 3) {
175+
// one swap in non-first char when 3-4 chars
176+
_intraTrn = Math.min(intraTrn, 1);
177+
178+
// or one insertion when 4 chars
179+
if (plen == 4)
180+
_intraIns = Math.min(intraIns, 1);
181+
}
182+
// else exact match when 1-2 chars
183+
}
184+
// use supplied opts
185+
else {
186+
_intraSlice = intraSlice;
187+
_intraIns = intraIns,
188+
_intraSub = intraSub,
189+
_intraTrn = intraTrn,
190+
_intraDel = intraDel;
179191
}
180-
// else exact match when 1-2 chars
181-
}
182-
// use supplied opts
183-
else {
184-
_intraSlice = intraSlice;
185-
_intraIns = intraIns,
186-
_intraSub = intraSub,
187-
_intraTrn = intraTrn,
188-
_intraDel = intraDel;
189192
}
190193

191194
return {
@@ -223,6 +226,8 @@ function uFuzzy(opts) {
223226
return needle.split(interSplit).filter(t => t != '').map(v => v === EXACT_HERE ? exacts[j++] : v);
224227
};
225228

229+
const NUM_OR_ALPHA_RE = /[^\d]+|\d+/g;
230+
226231
const prepQuery = (needle, capt = 0, interOR = false) => {
227232
// split on punct, whitespace, num-alpha, and upper-lower boundaries
228233
let parts = split(needle);
@@ -243,64 +248,72 @@ function uFuzzy(opts) {
243248
// allows single mutations within each term
244249
if (intraMode == 1) {
245250
reTpl = parts.map((p, pi) => {
246-
let {
247-
intraSlice,
248-
intraIns,
249-
intraSub,
250-
intraTrn,
251-
intraDel,
252-
} = intraRules(p);
253-
254-
if (intraIns + intraSub + intraTrn + intraDel == 0)
255-
return p + contrs[pi];
256-
257251
if (p[0] === '"')
258252
return escapeRegExp(p.slice(1, -1));
259253

260-
let [lftIdx, rgtIdx] = intraSlice;
261-
let lftChar = p.slice(0, lftIdx); // prefix
262-
let rgtChar = p.slice(rgtIdx); // suffix
254+
let reTpl = '';
263255

264-
let chars = p.slice(lftIdx, rgtIdx);
256+
// split into numeric and alpha parts, so numbers are only matched as following punct or alpha boundaries, without swaps or insertions
257+
for (let m of p.matchAll(NUM_OR_ALPHA_RE)) {
258+
let p = m[0];
265259

266-
// neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest
267-
// but skip when search term contains leading repetition (aardvark, aaa)
268-
if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0])
269-
lftChar += '(?!' + lftChar + ')';
260+
let {
261+
intraSlice,
262+
intraIns,
263+
intraSub,
264+
intraTrn,
265+
intraDel,
266+
} = intraRules(p);
270267

271-
let numChars = chars.length;
268+
if (intraIns + intraSub + intraTrn + intraDel == 0)
269+
reTpl += p + contrs[pi];
270+
else {
271+
let [lftIdx, rgtIdx] = intraSlice;
272+
let lftChar = p.slice(0, lftIdx); // prefix
273+
let rgtChar = p.slice(rgtIdx); // suffix
272274

273-
let variants = [p];
275+
let chars = p.slice(lftIdx, rgtIdx);
274276

275-
// variants with single char substitutions
276-
if (intraSub) {
277-
for (let i = 0; i < numChars; i++)
278-
variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar);
279-
}
277+
// neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest
278+
// but skip when search term contains leading repetition (aardvark, aaa)
279+
if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0])
280+
lftChar += '(?!' + lftChar + ')';
280281

281-
// variants with single transpositions
282-
if (intraTrn) {
283-
for (let i = 0; i < numChars - 1; i++) {
284-
if (chars[i] != chars[i+1])
285-
variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar);
286-
}
287-
}
282+
let numChars = chars.length;
288283

289-
// variants with single char omissions
290-
if (intraDel) {
291-
for (let i = 0; i < numChars; i++)
292-
variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar);
293-
}
284+
let variants = [p];
294285

295-
// variants with single char insertions
296-
if (intraIns) {
297-
let intraInsTpl = lazyRepeat(intraChars, 1);
286+
// variants with single char substitutions
287+
if (intraSub) {
288+
for (let i = 0; i < numChars; i++)
289+
variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar);
290+
}
298291

299-
for (let i = 0; i < numChars; i++)
300-
variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar);
301-
}
292+
// variants with single transpositions
293+
if (intraTrn) {
294+
for (let i = 0; i < numChars - 1; i++) {
295+
if (chars[i] != chars[i+1])
296+
variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar);
297+
}
298+
}
302299

303-
let reTpl = '(?:' + variants.join('|') + ')' + contrs[pi];
300+
// variants with single char omissions
301+
if (intraDel) {
302+
for (let i = 0; i < numChars; i++)
303+
variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar);
304+
}
305+
306+
// variants with single char insertions
307+
if (intraIns) {
308+
let intraInsTpl = lazyRepeat(intraChars, 1);
309+
310+
for (let i = 0; i < numChars; i++)
311+
variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar);
312+
}
313+
314+
reTpl += '(?:' + variants.join('|') + ')' + contrs[pi];
315+
}
316+
}
304317

305318
// console.log(reTpl);
306319

dist/uFuzzy.esm.js

Lines changed: 78 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -163,27 +163,30 @@ function uFuzzy(opts) {
163163
_intraTrn = 0,
164164
_intraDel = 0;
165165

166-
let plen = p.length;
167-
168-
// prevent junk matches by requiring stricter rules for short terms
169-
if (plen <= 4) {
170-
if (plen >= 3) {
171-
// one swap in non-first char when 3-4 chars
172-
_intraTrn = Math.min(intraTrn, 1);
173-
174-
// or one insertion when 4 chars
175-
if (plen == 4)
176-
_intraIns = Math.min(intraIns, 1);
166+
// only-digits strings should match exactly, else special rules for short strings
167+
if (/[^\d]/.test(p)) {
168+
let plen = p.length;
169+
170+
// prevent junk matches by requiring stricter rules for short terms
171+
if (plen <= 4) {
172+
if (plen >= 3) {
173+
// one swap in non-first char when 3-4 chars
174+
_intraTrn = Math.min(intraTrn, 1);
175+
176+
// or one insertion when 4 chars
177+
if (plen == 4)
178+
_intraIns = Math.min(intraIns, 1);
179+
}
180+
// else exact match when 1-2 chars
181+
}
182+
// use supplied opts
183+
else {
184+
_intraSlice = intraSlice;
185+
_intraIns = intraIns,
186+
_intraSub = intraSub,
187+
_intraTrn = intraTrn,
188+
_intraDel = intraDel;
177189
}
178-
// else exact match when 1-2 chars
179-
}
180-
// use supplied opts
181-
else {
182-
_intraSlice = intraSlice;
183-
_intraIns = intraIns,
184-
_intraSub = intraSub,
185-
_intraTrn = intraTrn,
186-
_intraDel = intraDel;
187190
}
188191

189192
return {
@@ -221,6 +224,8 @@ function uFuzzy(opts) {
221224
return needle.split(interSplit).filter(t => t != '').map(v => v === EXACT_HERE ? exacts[j++] : v);
222225
};
223226

227+
const NUM_OR_ALPHA_RE = /[^\d]+|\d+/g;
228+
224229
const prepQuery = (needle, capt = 0, interOR = false) => {
225230
// split on punct, whitespace, num-alpha, and upper-lower boundaries
226231
let parts = split(needle);
@@ -241,64 +246,72 @@ function uFuzzy(opts) {
241246
// allows single mutations within each term
242247
if (intraMode == 1) {
243248
reTpl = parts.map((p, pi) => {
244-
let {
245-
intraSlice,
246-
intraIns,
247-
intraSub,
248-
intraTrn,
249-
intraDel,
250-
} = intraRules(p);
251-
252-
if (intraIns + intraSub + intraTrn + intraDel == 0)
253-
return p + contrs[pi];
254-
255249
if (p[0] === '"')
256250
return escapeRegExp(p.slice(1, -1));
257251

258-
let [lftIdx, rgtIdx] = intraSlice;
259-
let lftChar = p.slice(0, lftIdx); // prefix
260-
let rgtChar = p.slice(rgtIdx); // suffix
252+
let reTpl = '';
261253

262-
let chars = p.slice(lftIdx, rgtIdx);
254+
// split into numeric and alpha parts, so numbers are only matched as following punct or alpha boundaries, without swaps or insertions
255+
for (let m of p.matchAll(NUM_OR_ALPHA_RE)) {
256+
let p = m[0];
263257

264-
// neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest
265-
// but skip when search term contains leading repetition (aardvark, aaa)
266-
if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0])
267-
lftChar += '(?!' + lftChar + ')';
258+
let {
259+
intraSlice,
260+
intraIns,
261+
intraSub,
262+
intraTrn,
263+
intraDel,
264+
} = intraRules(p);
268265

269-
let numChars = chars.length;
266+
if (intraIns + intraSub + intraTrn + intraDel == 0)
267+
reTpl += p + contrs[pi];
268+
else {
269+
let [lftIdx, rgtIdx] = intraSlice;
270+
let lftChar = p.slice(0, lftIdx); // prefix
271+
let rgtChar = p.slice(rgtIdx); // suffix
270272

271-
let variants = [p];
273+
let chars = p.slice(lftIdx, rgtIdx);
272274

273-
// variants with single char substitutions
274-
if (intraSub) {
275-
for (let i = 0; i < numChars; i++)
276-
variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar);
277-
}
275+
// neg lookahead to prefer matching 'Test' instead of 'tTest' in ManifestTest or fittest
276+
// but skip when search term contains leading repetition (aardvark, aaa)
277+
if (intraIns == 1 && lftChar.length == 1 && lftChar != chars[0])
278+
lftChar += '(?!' + lftChar + ')';
278279

279-
// variants with single transpositions
280-
if (intraTrn) {
281-
for (let i = 0; i < numChars - 1; i++) {
282-
if (chars[i] != chars[i+1])
283-
variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar);
284-
}
285-
}
280+
let numChars = chars.length;
286281

287-
// variants with single char omissions
288-
if (intraDel) {
289-
for (let i = 0; i < numChars; i++)
290-
variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar);
291-
}
282+
let variants = [p];
292283

293-
// variants with single char insertions
294-
if (intraIns) {
295-
let intraInsTpl = lazyRepeat(intraChars, 1);
284+
// variants with single char substitutions
285+
if (intraSub) {
286+
for (let i = 0; i < numChars; i++)
287+
variants.push(lftChar + chars.slice(0, i) + intraChars + chars.slice(i + 1) + rgtChar);
288+
}
296289

297-
for (let i = 0; i < numChars; i++)
298-
variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar);
299-
}
290+
// variants with single transpositions
291+
if (intraTrn) {
292+
for (let i = 0; i < numChars - 1; i++) {
293+
if (chars[i] != chars[i+1])
294+
variants.push(lftChar + chars.slice(0, i) + chars[i+1] + chars[i] + chars.slice(i + 2) + rgtChar);
295+
}
296+
}
300297

301-
let reTpl = '(?:' + variants.join('|') + ')' + contrs[pi];
298+
// variants with single char omissions
299+
if (intraDel) {
300+
for (let i = 0; i < numChars; i++)
301+
variants.push(lftChar + chars.slice(0, i + 1) + '?' + chars.slice(i + 1) + rgtChar);
302+
}
303+
304+
// variants with single char insertions
305+
if (intraIns) {
306+
let intraInsTpl = lazyRepeat(intraChars, 1);
307+
308+
for (let i = 0; i < numChars; i++)
309+
variants.push(lftChar + chars.slice(0, i) + intraInsTpl + chars.slice(i) + rgtChar);
310+
}
311+
312+
reTpl += '(?:' + variants.join('|') + ')' + contrs[pi];
313+
}
314+
}
302315

303316
// console.log(reTpl);
304317

0 commit comments

Comments
 (0)