From e3cf37f06db47203121a00acb62e3ead2ddf12b5 Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Fri, 7 Feb 2025 19:02:13 +0800 Subject: [PATCH] Add methods to iterate exemplar characters --- ffi/capi/bindings/js/ExemplarCharacters.d.ts | 6 ++ ffi/capi/bindings/js/ExemplarCharacters.mjs | 25 +++++ ffi/capi/bindings/js/StringIterator.d.ts | 14 +++ ffi/capi/bindings/js/StringIterator.mjs | 57 +++++++++++ ffi/capi/bindings/js/index.d.ts | 2 + ffi/capi/bindings/js/index.mjs | 2 + ffi/capi/src/exemplar_chars.rs | 23 +++++ ffi/capi/src/lib.rs | 2 + ffi/capi/src/string_iter.rs | 25 +++++ tutorials/npm/index.html | 58 ++++++++++- tutorials/npm/src/ts/app.ts | 4 +- tutorials/npm/src/ts/exemplar-characters.ts | 100 +++++++++++++++++++ 12 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 ffi/capi/bindings/js/StringIterator.d.ts create mode 100644 ffi/capi/bindings/js/StringIterator.mjs create mode 100644 ffi/capi/src/string_iter.rs create mode 100644 tutorials/npm/src/ts/exemplar-characters.ts diff --git a/ffi/capi/bindings/js/ExemplarCharacters.d.ts b/ffi/capi/bindings/js/ExemplarCharacters.d.ts index 04dd2deffaf..a84c823cd09 100644 --- a/ffi/capi/bindings/js/ExemplarCharacters.d.ts +++ b/ffi/capi/bindings/js/ExemplarCharacters.d.ts @@ -1,7 +1,9 @@ // generated by diplomat-tool +import type { CodePointRangeIterator } from "./CodePointRangeIterator" import type { DataError } from "./DataError" import type { DataProvider } from "./DataProvider" import type { Locale } from "./Locale" +import type { StringIterator } from "./StringIterator" import type { pointer, codepoint } from "./diplomat-runtime.d.ts"; @@ -23,6 +25,10 @@ export class ExemplarCharacters { contains(cp: codepoint): boolean; + codePointRanges(): CodePointRangeIterator; + + strings(): StringIterator; + static createMain(locale: Locale): ExemplarCharacters; static createMainWithProvider(provider: DataProvider, locale: Locale): ExemplarCharacters; diff --git a/ffi/capi/bindings/js/ExemplarCharacters.mjs b/ffi/capi/bindings/js/ExemplarCharacters.mjs index b29482f35c3..28f11963ab7 100644 --- a/ffi/capi/bindings/js/ExemplarCharacters.mjs +++ b/ffi/capi/bindings/js/ExemplarCharacters.mjs @@ -1,7 +1,9 @@ // generated by diplomat-tool +import { CodePointRangeIterator } from "./CodePointRangeIterator.mjs" import { DataError } from "./DataError.mjs" import { DataProvider } from "./DataProvider.mjs" import { Locale } from "./Locale.mjs" +import { StringIterator } from "./StringIterator.mjs" import wasm from "./diplomat-wasm.mjs"; import * as diplomatRuntime from "./diplomat-runtime.mjs"; @@ -73,6 +75,29 @@ export class ExemplarCharacters { finally {} } + codePointRanges() { + // This lifetime edge depends on lifetimes 'a + let aEdges = [this]; + + const result = wasm.icu4x_ExemplarCharacters_code_point_ranges_mv1(this.ffiValue); + + try { + return new CodePointRangeIterator(diplomatRuntime.internalConstructor, result, [], aEdges); + } + + finally {} + } + + strings() { + const result = wasm.icu4x_ExemplarCharacters_strings_mv1(this.ffiValue); + + try { + return new StringIterator(diplomatRuntime.internalConstructor, result, []); + } + + finally {} + } + static createMain(locale) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); diff --git a/ffi/capi/bindings/js/StringIterator.d.ts b/ffi/capi/bindings/js/StringIterator.d.ts new file mode 100644 index 00000000000..aef718b9e5b --- /dev/null +++ b/ffi/capi/bindings/js/StringIterator.d.ts @@ -0,0 +1,14 @@ +// generated by diplomat-tool +import type { pointer, codepoint } from "./diplomat-runtime.d.ts"; + + +/** An iterator over strings +*/ + + +export class StringIterator { + + get ffiValue(): pointer; + + next(): string; +} \ No newline at end of file diff --git a/ffi/capi/bindings/js/StringIterator.mjs b/ffi/capi/bindings/js/StringIterator.mjs new file mode 100644 index 00000000000..8092609a4fb --- /dev/null +++ b/ffi/capi/bindings/js/StringIterator.mjs @@ -0,0 +1,57 @@ +// generated by diplomat-tool +import wasm from "./diplomat-wasm.mjs"; +import * as diplomatRuntime from "./diplomat-runtime.mjs"; + + +/** An iterator over strings +*/ +const StringIterator_box_destroy_registry = new FinalizationRegistry((ptr) => { + wasm.icu4x_StringIterator_destroy_mv1(ptr); +}); + +export class StringIterator { + + // Internal ptr reference: + #ptr = null; + + // Lifetimes are only to keep dependencies alive. + // Since JS won't garbage collect until there are no incoming edges. + #selfEdge = []; + + #internalConstructor(symbol, ptr, selfEdge) { + if (symbol !== diplomatRuntime.internalConstructor) { + console.error("StringIterator is an Opaque type. You cannot call its constructor."); + return; + } + + this.#ptr = ptr; + this.#selfEdge = selfEdge; + + // Are we being borrowed? If not, we can register. + if (this.#selfEdge.length === 0) { + StringIterator_box_destroy_registry.register(this, this.#ptr); + } + + return this; + } + get ffiValue() { + return this.#ptr; + } + + next() { + const write = new diplomatRuntime.DiplomatWriteBuf(wasm); + wasm.icu4x_StringIterator_next_mv1(this.ffiValue, write.buffer); + + try { + return write.readString8(); + } + + finally { + write.free(); + } + } + + constructor(symbol, ptr, selfEdge) { + return this.#internalConstructor(...arguments) + } +} \ No newline at end of file diff --git a/ffi/capi/bindings/js/index.d.ts b/ffi/capi/bindings/js/index.d.ts index d1ad5e64e88..4e506ffc7ca 100644 --- a/ffi/capi/bindings/js/index.d.ts +++ b/ffi/capi/bindings/js/index.d.ts @@ -168,6 +168,8 @@ export { WordBreakIteratorUtf8 } from "./WordBreakIteratorUtf8" export { WordSegmenter } from "./WordSegmenter" +export { StringIterator } from "./StringIterator" + export { Time } from "./Time" export { TimeZoneInfo } from "./TimeZoneInfo" diff --git a/ffi/capi/bindings/js/index.mjs b/ffi/capi/bindings/js/index.mjs index 0f7e4a77f9c..0231aa0490a 100644 --- a/ffi/capi/bindings/js/index.mjs +++ b/ffi/capi/bindings/js/index.mjs @@ -166,6 +166,8 @@ export { WordBreakIteratorUtf8 } from "./WordBreakIteratorUtf8.mjs" export { WordSegmenter } from "./WordSegmenter.mjs" +export { StringIterator } from "./StringIterator.mjs" + export { Time } from "./Time.mjs" export { TimeZoneInfo } from "./TimeZoneInfo.mjs" diff --git a/ffi/capi/src/exemplar_chars.rs b/ffi/capi/src/exemplar_chars.rs index 39f90093033..f1afe4c7b3b 100644 --- a/ffi/capi/src/exemplar_chars.rs +++ b/ffi/capi/src/exemplar_chars.rs @@ -7,6 +7,7 @@ #[diplomat::attr(auto, namespace = "icu4x")] pub mod ffi { use alloc::boxed::Box; + use crate::{properties_iter::ffi::CodePointRangeIterator, string_iter::ffi::StringIterator}; #[cfg(feature = "buffer_provider")] use crate::provider::ffi::DataProvider; @@ -47,6 +48,28 @@ pub mod ffi { self.0.as_borrowed().contains32(cp) } + /// Get an iterator of all the code point ranges in the current exemplar character set. + #[diplomat::rust_link( + icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList::code_points, + FnInStruct + )] + pub fn code_point_ranges<'a>(&'a self) -> Box> { + let ranges = self.0.as_borrowed().code_points().iter_ranges().collect::>(); + + Box::new(CodePointRangeIterator(Box::new(ranges.into_iter()))) + } + + /// Get an iterator of all the code point ranges in the current exemplar character set. + #[diplomat::rust_link( + icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList::strings, + FnInStruct + )] + pub fn strings(&self) -> Box { + let strings = self.0.as_borrowed().strings().iter().map(|s| s.to_string()).collect::>(); + + Box::new(StringIterator(Box::new(strings.into_iter()))) + } + /// Create an [`ExemplarCharacters`] for the "main" set of exemplar characters for a given locale, using compiled data. #[diplomat::rust_link( icu::locale::exemplar_chars::ExemplarCharacters::try_new_main, diff --git a/ffi/capi/src/lib.rs b/ffi/capi/src/lib.rs index 063bac8aaae..a6f565891d8 100644 --- a/ffi/capi/src/lib.rs +++ b/ffi/capi/src/lib.rs @@ -118,6 +118,8 @@ pub mod properties_sets; pub mod properties_unisets; #[cfg(feature = "properties")] pub mod script; +#[cfg(feature = "properties")] +pub mod string_iter; #[cfg(feature = "segmenter")] pub mod segmenter_grapheme; #[cfg(feature = "segmenter")] diff --git a/ffi/capi/src/string_iter.rs b/ffi/capi/src/string_iter.rs new file mode 100644 index 00000000000..90253b114cc --- /dev/null +++ b/ffi/capi/src/string_iter.rs @@ -0,0 +1,25 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#[diplomat::bridge] +#[diplomat::abi_rename = "icu4x_{0}_mv1"] +#[diplomat::attr(auto, namespace = "icu4x")] +pub mod ffi { + use core::fmt::Write; + use alloc::boxed::Box; + + /// An iterator over strings + #[diplomat::opaque] + pub struct StringIterator( + pub Box>, + ); + + impl StringIterator { + /// Advance the iterator by one and return the next string, terminated with a null byte. + /// If there are no more strings to be iterated, an empty string is returned. + pub fn next(&mut self, write: &mut DiplomatWrite) { + let _ = write.write_str(&self.0.next().map(|mut s| { s.push('\0'); s }).unwrap_or_default()); + } + } +} diff --git a/tutorials/npm/index.html b/tutorials/npm/index.html index 80335620877..4d6569b654a 100644 --- a/tutorials/npm/index.html +++ b/tutorials/npm/index.html @@ -87,6 +87,9 @@ + @@ -235,6 +238,59 @@ +
+
+ + +
+
Main
+
+

+
+
+
+
Auxiliary
+
+

+
+
+
+
Punctuation
+
+

+
+
+
+
Numbers
+
+

+
+
+
+
Index
+
+

+
+
+
+
@@ -287,4 +343,4 @@ } - \ No newline at end of file + diff --git a/tutorials/npm/src/ts/app.ts b/tutorials/npm/src/ts/app.ts index b8d4df70dac..eedd34b029b 100644 --- a/tutorials/npm/src/ts/app.ts +++ b/tutorials/npm/src/ts/app.ts @@ -1,6 +1,7 @@ import * as fdf from './fixed-decimal'; import * as dtf from './date-time'; import * as seg from './segmenter'; +import * as ech from './exemplar-characters'; import 'bootstrap/js/dist/tab'; import 'bootstrap/js/dist/dropdown'; @@ -10,5 +11,6 @@ import 'bootstrap/js/dist/collapse'; fdf.setup(); dtf.setup(); seg.setup(); + ech.setup(); (document.querySelector("#bigspinner") as HTMLElement).style.display = "none"; -})() \ No newline at end of file +})() diff --git a/tutorials/npm/src/ts/exemplar-characters.ts b/tutorials/npm/src/ts/exemplar-characters.ts new file mode 100644 index 00000000000..9eb69721810 --- /dev/null +++ b/tutorials/npm/src/ts/exemplar-characters.ts @@ -0,0 +1,100 @@ +import { Locale, ExemplarCharacters } from "icu4x"; + +type Kind = typeof KINDS[number]; +const KINDS = ['Main', 'Auxiliary', 'Punctuation', 'Numbers', 'Index'] as const; + +type ExemplarCharactersData = Partial>; + +export class ExemplarCharactersWrapper { + #exemplarCharacters: ExemplarCharacters; + #collator: Intl.Collator; + + constructor(locid: string, kind: Kind) { + this.#exemplarCharacters = ExemplarCharacters[`create${kind}`](Locale.fromString(locid)); + this.#collator = new Intl.Collator(locid); + } + + *#chars(): Generator { + const codePointRanges = this.#exemplarCharacters.codePointRanges(); + + while (true) { + const { start, end, done } = codePointRanges.next(); + if (done) return; + for (let cp = start; cp <= end; ++cp) yield String.fromCodePoint(cp); + } + } + + *#strings(): Generator { + const codePointRanges = this.#exemplarCharacters.strings(); + let str: string; + while ((str = codePointRanges.next())) yield str.slice(0, -1); + } + + alphabet(): string[] { + return [...this.#chars(), ...this.#strings()].sort(this.#collator.compare); + } +} + +export class ExemplarCharactersDemo { + #displayFn: (x: ExemplarCharactersData) => void; + + constructor(displayFn: (x: ExemplarCharactersData) => void) { + this.#displayFn = displayFn; + this.#render('en'); + } + + setLocale(locid: string): void { + this.#render(locid); + } + + #render(locid: string): void { + try { + const charsDisplay = Object.fromEntries(KINDS.map((k) => [ + k, + new ExemplarCharactersWrapper(locid, k).alphabet(), + ])); + this.#displayFn(charsDisplay); + } catch (e: any) { + if (e.error_value) { + this.#displayFn({ Main: new Error(`Error: ${e.error_value}`) }); + } else { + this.#displayFn({ Main: new Error(`Unexpected Error: ${e}`) }); + } + } + } +} + +export function setup(): void { + const exemplarCharactersDemo = new ExemplarCharactersDemo((x) => { + for (const k of KINDS) { + const val = x[k] + const el = document.getElementById(`ech-output-${k.toLowerCase()}`)! + + if (val instanceof Error) { + el.textContent = val.message; + } else { + el.textContent = ''; + for (const text of val ?? []) { + el.append( + Object.assign(document.createElement('span'), { textContent: text, className: 'badge bg-secondary me-1' }), + ); + } + } + } + }); + + const otherLocaleBtn = document.getElementById('ech-locale-other') as HTMLInputElement; + otherLocaleBtn.addEventListener('click', () => exemplarCharactersDemo.setLocale(otherLocaleInput.value)); + + const otherLocaleInput = document.getElementById('ech-locale-other-input') as HTMLInputElement; + otherLocaleInput.addEventListener('input', () => { + otherLocaleBtn.checked = true; + exemplarCharactersDemo.setLocale(otherLocaleInput.value); + }); + + for (const btn of document.querySelectorAll('input[name="ech-locale"]')) { + if (btn.value !== 'other') { + btn.addEventListener('click', () => exemplarCharactersDemo.setLocale(btn.value)); + } + } +}