Skip to content

Commit 34abe86

Browse files
fix: properly index split infinitives
1 parent 95f8c69 commit 34abe86

10 files changed

+240
-18
lines changed

Diff for: docker-compose.yml

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ version: '3.8'
22
services:
33
redis:
44
image: redis/redis-stack:7.2.0-v9
5+
environment:
6+
- REDIS_ARGS=--loglevel verbose
57
ports:
68
- '${REDIS_PORT}:6379'
79
- '${REDIS_INSIGHT_PORT}:8001'

Diff for: packages/common/src/providers/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export * from './redis.service';
2+
export * from './sanitization.service';
23
export * from './uib-api.service';
34
export * from './uib-redis.service';
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import { Injectable } from '@nestjs/common';
2+
3+
@Injectable()
4+
export class SanitizationService {
5+
/**
6+
* Sanitizes a value for use in a search query. Sanitizes the value by
7+
* removing any characters that could be used to escape the query.
8+
* Alpha-numeric characters are allowed, as well as common punctuation, and
9+
* special characters and accents that are commonly used in Norwegian.
10+
* @param value The value to sanitize.
11+
* @returns The sanitized value.
12+
*/
13+
sanitize(value: string): string {
14+
return value.replace(/[^a-zA-Z0-9æøåÆØÅéèÉÈáàÁÀäÄöÖüÜß\- ]/g, '');
15+
}
16+
}

Diff for: packages/common/src/providers/uib-redis.service.ts

+14-12
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import {
1414
idForArticleKey,
1515
DeferredIterable,
1616
RawArticle,
17+
UibArticle,
18+
addUibArticleMetadata,
1719
} from '../types';
1820

1921
/*
@@ -72,7 +74,7 @@ export interface FullSearchResult extends UiBArticleIdentifier {
7274
/**
7375
* The article data.
7476
*/
75-
data: RawArticle;
77+
data: UibArticle;
7678
}
7779

7880
/**
@@ -107,15 +109,15 @@ export class UibRedisService {
107109
async getArticle(
108110
dictionary: UiBDictionary,
109111
articleId: number,
110-
): Promise<RawArticle | null>;
112+
): Promise<UibArticle | null>;
111113
/**
112114
* Fetches the article with the given ID from the given dictionary.
113115
* @param identifier The identifier of the article to fetch.
114116
* @returns The article data, or null if the article does not exist.
115117
*/
116118
async getArticle(
117119
identifier: UiBArticleIdentifier,
118-
): Promise<RawArticle | null>;
120+
): Promise<UibArticle | null>;
119121
/**
120122
* Fetches the article with the given ID from the given dictionary.
121123
* @param dictionaryOrIdentifier The dictionary to fetch the article from, or
@@ -127,19 +129,19 @@ export class UibRedisService {
127129
async getArticle(
128130
dictionaryOrIdentifier: UiBDictionary | UiBArticleIdentifier,
129131
articleId?: number,
130-
): Promise<RawArticle | null> {
132+
): Promise<UibArticle | null> {
131133
if (typeof dictionaryOrIdentifier === 'string') {
132134
return this.redis.client.json.get(
133135
uibKeys.article(dictionaryOrIdentifier, articleId!),
134-
) as Promise<RawArticle>;
136+
) as unknown as Promise<UibArticle>;
135137
}
136138

137139
return this.redis.client.json.get(
138140
uibKeys.article(
139141
dictionaryOrIdentifier.dictionary,
140142
dictionaryOrIdentifier.id,
141143
),
142-
) as Promise<RawArticle>;
144+
) as unknown as Promise<UibArticle>;
143145
}
144146

145147
/**
@@ -151,17 +153,17 @@ export class UibRedisService {
151153
async getArticlesFromDictionary(
152154
dictionary: UiBDictionary,
153155
articleIds: number[],
154-
): Promise<Map<number, RawArticle>> {
156+
): Promise<Map<number, UibArticle>> {
155157
const articleList = await this.redis.client.json.mGet(
156158
articleIds.map((id) => uibKeys.article(dictionary, id)),
157159
'$',
158160
);
159161

160-
const articles = new Map<number, RawArticle>();
162+
const articles = new Map<number, UibArticle>();
161163

162164
for (let i = 0; i < articleIds.length; i++) {
163165
if (articleList[i] !== null) {
164-
articles.set(articleIds[i], articleList[i] as RawArticle);
166+
articles.set(articleIds[i], articleList[i] as unknown as UibArticle);
165167
}
166168
}
167169

@@ -179,14 +181,14 @@ export class UibRedisService {
179181
transaction: RedisMulti,
180182
dictionary: UiBDictionary,
181183
articleId: number,
182-
article: RawArticle,
184+
article: RawArticle | UibArticle,
183185
): void {
184186
this.#throwIfWritesNotAllowed();
185187

186188
transaction.json.set(
187189
uibKeys.article(dictionary, articleId),
188190
'$',
189-
article as RedisJSON,
191+
addUibArticleMetadata(article) as unknown as RedisJSON,
190192
);
191193
}
192194

@@ -507,7 +509,7 @@ export class UibRedisService {
507509
total,
508510
results: new DeferredIterable(documents).map(({ id, value }) => ({
509511
...idForArticleKey(id),
510-
data: value as unknown as RawArticle,
512+
data: value as unknown as UibArticle,
511513
})),
512514
};
513515
}

Diff for: packages/common/src/types/deferred-iterable.types.ts

+11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { InspectOptionsStylized } from 'util';
2+
import { UnionIterable } from './union-iterable.types';
23

34
/**
45
* A deferred iterable is an iterable that performs some operations on each
@@ -18,9 +19,19 @@ export class DeferredIterable<T> implements Iterable<T> {
1819
}
1920
}
2021

22+
/**
23+
* Returns a new union iterable that is a union of this iterable and the
24+
* given iterable.
25+
* @param other The iterable to union with.
26+
*/
27+
union<U>(other: Iterable<U>): UnionIterable<T | U> {
28+
return new UnionIterable([this.#originalData, other] as Iterable<T | U>[]);
29+
}
30+
2131
/**
2232
* Defer a map operation to be performed when the iterable is iterated over.
2333
* Returns a new deferred iterable.
34+
* @param mapFn The map function to defer.
2435
*/
2536
map<TMapped>(mapFn: (value: T) => TMapped): DeferredIterable<TMapped> {
2637
const newIterable = new DeferredIterable(this.#originalData);

Diff for: packages/common/src/types/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ export * from './deferred-iterable.types';
22
export * from './redis.types';
33
export * from './uib-api.types';
44
export * from './uib-redis.types';
5+
export * from './union-iterable.types';

Diff for: packages/common/src/types/uib-api.types.ts

+7-3
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,11 @@ export type ArticleElement =
176176
text: string;
177177
}
178178
| {
179-
type_: 'quote_inset' | 'explanation' | 'etymology_language';
179+
type_:
180+
| 'quote_inset'
181+
| 'explanation'
182+
| 'etymology_language'
183+
| 'etymology_reference';
180184
content: string | null;
181185
items: ArticleElement[];
182186
}
@@ -247,7 +251,7 @@ export type ArticleElement =
247251
/**
248252
* The format of the raw data returned by the UiB API when fetching an article.
249253
*/
250-
export type RawArticle = {
254+
export interface RawArticle {
251255
article_id: number;
252256
submitted?: string;
253257
suggest: string[];
@@ -275,4 +279,4 @@ export type RawArticle = {
275279
definitions: ArticleElement[];
276280
};
277281
to_index: string[];
278-
};
282+
}

Diff for: packages/common/src/types/uib-redis.types.ts

+52-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { DateTime } from 'luxon';
2-
import { RawArticleMetadata, UiBDictionary } from './uib-api.types';
2+
import { RawArticle, RawArticleMetadata, UiBDictionary } from './uib-api.types';
33

44
/**
55
* Article metadata.
@@ -57,9 +57,15 @@ export enum ArticleIndex {
5757
/** The index for article lemmas. */
5858
Lemma = 'lemma',
5959

60+
/** The index for exact-matching the whole lemma. */
61+
LemmaExact = 'lemma_exact',
62+
6063
/** The index for suggestion terms for a given article. */
6164
Suggest = 'suggest',
6265

66+
/** The index for exact-matching the whole suggestion term. */
67+
SuggestExact = 'suggest_exact',
68+
6369
/** The index for article etymology text. */
6470
Etymology = 'etymology',
6571

@@ -72,6 +78,9 @@ export enum ArticleIndex {
7278
/** The index for inflection word forms. */
7379
Inflection = 'inflection',
7480

81+
/** The index for exact-matching the whole inflection word form. */
82+
InflectionExact = 'inflection_exact',
83+
7584
/** The index for split infinitive attributes. */
7685
SplitInfinitive = 'split_infinitive',
7786
}
@@ -139,3 +148,45 @@ export const idForArticleKey = (key: string): UiBArticleIdentifier => {
139148
id: Number.parseInt(articleId, 10),
140149
};
141150
};
151+
152+
/**
153+
* The format of article data as stored in Redis.
154+
*/
155+
export interface UibArticle extends RawArticle {
156+
/** Additional metadata used for indexing and other purposes. */
157+
__ordbokapi__: UibArticleMetadata;
158+
}
159+
160+
/**
161+
* Returns whether or not the given article is a processed UiB article.
162+
*/
163+
export function isUibArticle(article: RawArticle): article is UibArticle {
164+
return '__ordbokapi__' in article;
165+
}
166+
167+
/**
168+
* Given a raw article, returns the article with additional metadata added.
169+
*/
170+
export function addUibArticleMetadata(article: RawArticle): UibArticle {
171+
if (isUibArticle(article)) {
172+
return article;
173+
}
174+
175+
return {
176+
...article,
177+
__ordbokapi__: {
178+
hasSplitInf: article.lemmas?.some((lemma) => lemma?.split_inf),
179+
},
180+
};
181+
}
182+
183+
/**
184+
* Additional metadata used for indexing and other purposes.
185+
*/
186+
export interface UibArticleMetadata {
187+
/**
188+
* Whether or not any of the lemmas represent a verb with a split infinitive
189+
* (kløyvd infinitiv).
190+
*/
191+
hasSplitInf: boolean;
192+
}

Diff for: packages/common/src/types/union-iterable.types.ts

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import type { InspectOptionsStylized } from 'util';
2+
import { DeferredIterable } from './deferred-iterable.types';
3+
4+
/**
5+
* Provides a type that represents an iterable of a union of types. When
6+
* iterated over, it will yield the values of the first iterable, then the
7+
* values of the second iterable, and so on.
8+
*/
9+
export class UnionIterable<T = never> implements Iterable<T> {
10+
#iterables: Iterable<T>[];
11+
12+
constructor(iterables?: Iterable<T>[]) {
13+
this.#iterables = iterables ?? [];
14+
}
15+
16+
/**
17+
* Iterates over the iterables in the union and yields their values.
18+
*/
19+
*[Symbol.iterator](): Iterator<T> {
20+
for (const iterable of this.#iterables) {
21+
yield* iterable;
22+
}
23+
}
24+
25+
/**
26+
* Returns a new union iterable that is a union of this iterable and the
27+
* given iterable.
28+
* @param other The iterable to union with.
29+
* @returns A new union iterable that is a union of this iterable and the
30+
* given iterable.
31+
*/
32+
union<U>(other: Iterable<U>): UnionIterable<T | U> {
33+
return new UnionIterable(
34+
(this.#iterables as Iterable<T | U>[]).concat(other),
35+
);
36+
}
37+
38+
/**
39+
* Concatenates the given iterables to this union iterable. This mutates the
40+
* union iterable.
41+
* @param iterables The iterables to concatenate.
42+
* @returns This union iterable.
43+
*/
44+
concat(...iterables: Iterable<T>[]): this {
45+
this.#iterables.push(...iterables);
46+
47+
return this;
48+
}
49+
50+
/**
51+
* The size of the union iterable. This is the number of iterables in the
52+
* union, not the total number of elements in all the iterables.
53+
*/
54+
get size(): number {
55+
return this.#iterables.length;
56+
}
57+
58+
/**
59+
* Defer a map operation to be performed when the iterable is iterated over.
60+
* Returns a new deferred iterable.
61+
* @param mapFn The map function to defer.
62+
*/
63+
map<TMapped>(mapFn: (value: T) => TMapped): DeferredIterable<TMapped> {
64+
return new DeferredIterable(this).map(mapFn);
65+
}
66+
67+
/**
68+
* Custom inspect function for Node.js `util.inspect`. Allows for a more
69+
* readable output in the console.
70+
*/
71+
[Symbol.for('nodejs.util.inspect.custom')](
72+
depth: number,
73+
options: InspectOptionsStylized,
74+
inspect: (value: unknown, options: InspectOptionsStylized) => string,
75+
): string {
76+
if (depth < 0) {
77+
return options.stylize('[UnionIterable]', 'special');
78+
}
79+
80+
const newOptions = Object.assign({}, options, {
81+
depth:
82+
options.depth === null || options.depth === undefined
83+
? null
84+
: options.depth - 1,
85+
});
86+
87+
const arr = Array.from(this);
88+
const arrayInspect = inspect(arr, newOptions);
89+
90+
return `UnionIterable(${arr.length}) {${arrayInspect.slice(1, -1)}}`;
91+
}
92+
}

0 commit comments

Comments
 (0)