Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## Unreleased

- Utils
- Retire the `icu_harfbuzz` crate. The `icu_properties` and `icu_normalizer` types now directly implement the `harfbuzz-traits`

## icu4x 2.1

- Components
Expand Down
1 change: 0 additions & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ components/segmenter/ @aethanyc @makotokato @sffc
components/time/ @nekevss @robertbastian @sffc
ffi/capi/ @Manishearth
ffi/ecma402/ @filmil
ffi/harfbuzz/ @hsivonen
provider/blob/ @sffc @Manishearth
provider/core/ @sffc @Manishearth
provider/source/ @sffc @robertbastian @Manishearth
Expand Down
12 changes: 2 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ members = [
"ffi/capi",
"ffi/ecma402",
"ffi/freertos",
"ffi/harfbuzz",

# Provider
"provider/adapters",
Expand Down Expand Up @@ -158,7 +157,6 @@ icu_time = { version = "~2.1.1", path = "components/time", default-features = fa
icu_capi = { version = "~2.1.1", path = "ffi/capi", default-features = false }
# icu4x_ecma402 never used as a dep
# icu_freertos never used as a dep
# icu_harfbuzz never used as a dep

# Provider
icu_provider_export = { version = "~2.1.1", path = "provider/export", default-features = false }
Expand Down Expand Up @@ -245,6 +243,7 @@ arrayvec = { version = "0.7.2", default-features = false }
core_maths = { version = "0.1.0", default-features = false }
displaydoc = { version = "0.2.3", default-features = false }
either = { version = "1.9.0", default-features = false }
harfbuzz-traits = { version = "0.6.0", default-features = false }
libc_alloc = { version = "1.0.6", default-features = false }
log = { version = "0.4.17", default-features = false }
memchr = { version = "2.6.0", default-features = false }
Expand Down
2 changes: 2 additions & 0 deletions components/normalizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ license.workspace = true
all-features = true

[dependencies]
harfbuzz-traits = { workspace = true, optional = true }
icu_collections = { workspace = true }
icu_properties = { workspace = true, optional = true }
icu_provider = { workspace = true }
Expand Down Expand Up @@ -58,6 +59,7 @@ icu_properties = ["dep:icu_properties"]
utf16_iter = ["dep:utf16_iter", "dep:write16"]
# For dealing with potentially ill-formed UTF8 strings
utf8_iter = ["dep:utf8_iter"]
harfbuzz_traits = ["dep:harfbuzz-traits"]

# added by accident
write16 = []
Expand Down
6 changes: 4 additions & 2 deletions components/normalizer/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 68 additions & 0 deletions components/normalizer/src/harfbuzz.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::properties::{
CanonicalCombiningClassMap, CanonicalCombiningClassMapBorrowed, CanonicalComposition,
CanonicalCompositionBorrowed, CanonicalDecomposition, CanonicalDecompositionBorrowed,
Decomposed,
};
use harfbuzz_traits::{CombiningClassFunc, ComposeFunc, DecomposeFunc};

impl ComposeFunc for CanonicalCompositionBorrowed<'_> {
fn compose(&self, a: char, b: char) -> Option<char> {
CanonicalCompositionBorrowed::compose(*self, a, b)
}
}

impl ComposeFunc for CanonicalComposition {
fn compose(&self, a: char, b: char) -> Option<char> {
ComposeFunc::compose(&self.as_borrowed(), a, b)
}
}

impl ComposeFunc for &'_ CanonicalComposition {
fn compose(&self, a: char, b: char) -> Option<char> {
ComposeFunc::compose(&self.as_borrowed(), a, b)
}
}

impl DecomposeFunc for CanonicalDecompositionBorrowed<'_> {
fn decompose(&self, ab: char) -> Option<(char, char)> {
match CanonicalDecompositionBorrowed::decompose(self, ab) {
Decomposed::Default => None,
Decomposed::Expansion(first, second) => Some((first, second)),
Decomposed::Singleton(single) => Some((single, '\0')),
}
}
}

impl DecomposeFunc for CanonicalDecomposition {
fn decompose(&self, ab: char) -> Option<(char, char)> {
DecomposeFunc::decompose(&self.as_borrowed(), ab)
}
}

impl DecomposeFunc for &'_ CanonicalDecomposition {
fn decompose(&self, ab: char) -> Option<(char, char)> {
DecomposeFunc::decompose(&self.as_borrowed(), ab)
}
}

impl CombiningClassFunc for CanonicalCombiningClassMapBorrowed<'_> {
fn combining_class(&self, ch: char) -> u8 {
self.get_u8(ch)
}
}

impl CombiningClassFunc for CanonicalCombiningClassMap {
fn combining_class(&self, ch: char) -> u8 {
CombiningClassFunc::combining_class(&self.as_borrowed(), ch)
}
}

impl CombiningClassFunc for &'_ CanonicalCombiningClassMap {
fn combining_class(&self, ch: char) -> u8 {
CombiningClassFunc::combining_class(&self.as_borrowed(), ch)
}
}
8 changes: 6 additions & 2 deletions components/normalizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@
//!
//! The `properties` module provides the non-recursive canonical decomposition operation on a per `char` basis and
//! the canonical compositon operation given two `char`s. It also provides access to the Canonical Combining Class
//! property. These operations are primarily meant for [HarfBuzz](https://harfbuzz.github.io/) via the
//! [`icu_harfbuzz`](https://docs.rs/icu_harfbuzz/latest/icu_harfbuzz/) crate.
//! property. These operations are primarily meant for [HarfBuzz](https://harfbuzz.github.io/), the types
//! [`CanonicalComposition`](properties::CanonicalComposition), [`CanonicalDecomposition`](properties::CanonicalDecomposition),
//! and [`CanonicalCombiningClassMap`](properties::CanonicalCombiningClassMap) implement the [`harfbuzz_traits`] if
//! the `harfbuzz_traits` Cargo feature is enabled.
//!
//! Notably, this normalizer does _not_ provide the normalization “quick check” that can result in “maybe” in
//! addition to “yes” and “no”. The normalization checks provided by this crate always give a definitive
Expand Down Expand Up @@ -99,6 +101,8 @@ macro_rules! ccc {
};
}

#[cfg(feature = "harfbuzz_traits")]
mod harfbuzz;
pub mod properties;
pub mod provider;
pub mod uts46;
Expand Down
4 changes: 3 additions & 1 deletion components/properties/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ databake = { workspace = true, features = ["derive"], optional = true}
serde = { workspace = true, features = ["derive"], optional = true }
icu_locale_core = { workspace = true, features = ["zerovec"] }

harfbuzz-traits = { workspace = true, optional = true }
unicode-bidi = { workspace = true, optional = true }

icu_properties_data = { workspace = true, optional = true }
Expand All @@ -39,6 +40,7 @@ icu = { path = "../../components/icu", default-features = false }
default = ["compiled_data"]
serde = ["dep:serde", "icu_locale_core/serde", "zerovec/serde", "icu_collections/serde", "icu_provider/serde", "zerotrie/serde"]
datagen = ["serde", "dep:databake", "zerovec/databake", "icu_collections/databake", "icu_locale_core/databake", "zerotrie/databake", "icu_provider/export"]
unicode_bidi = [ "dep:unicode-bidi" ]
unicode_bidi = ["dep:unicode-bidi"]
harfbuzz_traits = ["dep:harfbuzz-traits"]
compiled_data = ["dep:icu_properties_data", "icu_provider/baked"]
alloc = ["zerovec/alloc", "icu_collections/alloc", "serde?/alloc"]
6 changes: 6 additions & 0 deletions components/properties/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

162 changes: 162 additions & 0 deletions components/properties/src/harfbuzz.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::props::{BidiMirroringGlyph, GeneralCategory, Script};
use crate::provider::{PropertyEnumScriptV1, PropertyNameShortScriptV1};
use crate::{
CodePointMapData, CodePointMapDataBorrowed, PropertyNamesShort, PropertyNamesShortBorrowed,
};
use icu_provider::prelude::*;

use harfbuzz_traits::{GeneralCategoryFunc, MirroringFunc, ScriptFunc};

impl GeneralCategoryFunc for CodePointMapDataBorrowed<'_, GeneralCategory> {
fn general_category(&self, ch: char) -> harfbuzz_traits::GeneralCategory {
self.get(ch).into()
}
}

impl GeneralCategoryFunc for CodePointMapData<GeneralCategory> {
fn general_category(&self, ch: char) -> harfbuzz_traits::GeneralCategory {
GeneralCategoryFunc::general_category(&self.as_borrowed(), ch)
}
}

impl GeneralCategoryFunc for &'_ CodePointMapData<GeneralCategory> {
fn general_category(&self, ch: char) -> harfbuzz_traits::GeneralCategory {
GeneralCategoryFunc::general_category(&self.as_borrowed(), ch)
}
}

impl MirroringFunc for CodePointMapDataBorrowed<'_, BidiMirroringGlyph> {
fn mirroring(&self, ch: char) -> char {
self.get(ch).mirroring_glyph.unwrap_or(ch)
}
}

impl MirroringFunc for CodePointMapData<BidiMirroringGlyph> {
fn mirroring(&self, ch: char) -> char {
MirroringFunc::mirroring(&self.as_borrowed(), ch)
}
}

impl MirroringFunc for &'_ CodePointMapData<BidiMirroringGlyph> {
fn mirroring(&self, ch: char) -> char {
MirroringFunc::mirroring(&self.as_borrowed(), ch)
}
}

impl ScriptFunc for HarfbuzzScriptDataBorrowed<'_> {
fn script(&self, ch: char) -> [u8; 4] {
let script = self.script.get(ch);
self.script_names
.get_locale_script(script)
.unwrap_or(icu_locale_core::subtags::script!("Zzzz"))
.into_raw()
}
}

impl ScriptFunc for HarfbuzzScriptData {
fn script(&self, ch: char) -> [u8; 4] {
ScriptFunc::script(&self.as_borrowed(), ch)
}
}

impl ScriptFunc for &'_ HarfbuzzScriptData {
fn script(&self, ch: char) -> [u8; 4] {
ScriptFunc::script(&self.as_borrowed(), ch)
}
}

impl From<GeneralCategory> for harfbuzz_traits::GeneralCategory {
fn from(val: GeneralCategory) -> Self {
use GeneralCategory::*;
match val {
Unassigned => harfbuzz_traits::GeneralCategory::Unassigned,
UppercaseLetter => harfbuzz_traits::GeneralCategory::UppercaseLetter,
LowercaseLetter => harfbuzz_traits::GeneralCategory::LowercaseLetter,
TitlecaseLetter => harfbuzz_traits::GeneralCategory::TitlecaseLetter,
ModifierLetter => harfbuzz_traits::GeneralCategory::ModifierLetter,
OtherLetter => harfbuzz_traits::GeneralCategory::OtherLetter,
NonspacingMark => harfbuzz_traits::GeneralCategory::NonSpacingMark,
SpacingMark => harfbuzz_traits::GeneralCategory::SpacingMark,
EnclosingMark => harfbuzz_traits::GeneralCategory::EnclosingMark,
DecimalNumber => harfbuzz_traits::GeneralCategory::DecimalNumber,
LetterNumber => harfbuzz_traits::GeneralCategory::LetterNumber,
OtherNumber => harfbuzz_traits::GeneralCategory::OtherNumber,
SpaceSeparator => harfbuzz_traits::GeneralCategory::SpaceSeparator,
LineSeparator => harfbuzz_traits::GeneralCategory::LineSeparator,
ParagraphSeparator => harfbuzz_traits::GeneralCategory::ParagraphSeparator,
Control => harfbuzz_traits::GeneralCategory::Control,
Format => harfbuzz_traits::GeneralCategory::Format,
PrivateUse => harfbuzz_traits::GeneralCategory::PrivateUse,
Surrogate => harfbuzz_traits::GeneralCategory::Surrogate,
DashPunctuation => harfbuzz_traits::GeneralCategory::DashPunctuation,
OpenPunctuation => harfbuzz_traits::GeneralCategory::OpenPunctuation,
ClosePunctuation => harfbuzz_traits::GeneralCategory::ClosePunctuation,
ConnectorPunctuation => harfbuzz_traits::GeneralCategory::ConnectPunctuation,
InitialPunctuation => harfbuzz_traits::GeneralCategory::InitialPunctuation,
FinalPunctuation => harfbuzz_traits::GeneralCategory::FinalPunctuation,
OtherPunctuation => harfbuzz_traits::GeneralCategory::OtherPunctuation,
MathSymbol => harfbuzz_traits::GeneralCategory::MathSymbol,
CurrencySymbol => harfbuzz_traits::GeneralCategory::CurrencySymbol,
ModifierSymbol => harfbuzz_traits::GeneralCategory::ModifierSymbol,
OtherSymbol => harfbuzz_traits::GeneralCategory::OtherSymbol,
}
}
}

/// Harfbuzz data for the [`ScriptFunc`] implementation
#[derive(Debug)]
pub struct HarfbuzzScriptDataBorrowed<'a> {
script: CodePointMapDataBorrowed<'a, Script>,
script_names: PropertyNamesShortBorrowed<'a, Script>,
}

/// Harfbuzz data for the [`ScriptFunc`] implementation
#[derive(Debug)]
pub struct HarfbuzzScriptData {
script: CodePointMapData<Script>,
script_names: PropertyNamesShort<Script>,
}

impl HarfbuzzScriptData {
#[cfg(feature = "compiled_data")]
/// Construct a new [`HarfbuzzScriptData`] using compiled data.
#[expect(clippy::new_ret_no_self)]
pub fn new() -> HarfbuzzScriptDataBorrowed<'static> {
HarfbuzzScriptDataBorrowed {
script: CodePointMapData::<Script>::new(),
script_names: PropertyNamesShort::<Script>::new(),
}
}

/// Construct a new [`HarfbuzzScriptData`] from a data provider.
pub fn try_new_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<PropertyEnumScriptV1> + DataProvider<PropertyNameShortScriptV1> + ?Sized,
{
let script_set = CodePointMapData::<Script>::try_new_unstable(provider)?;
let script_names = PropertyNamesShort::try_new_unstable(provider)?;
Ok(Self {
script: script_set,
script_names,
})
}

#[cfg(feature = "serde")]
#[doc = icu_provider::gen_buffer_unstable_docs!(BUFFER,Self::try_new_unstable)]
pub fn try_new_with_buffer_provider(
provider: &(impl icu_provider::buf::BufferProvider + ?Sized),
) -> Result<Self, DataError> {
Self::try_new_unstable(&provider.as_deserializing())
}

fn as_borrowed(&self) -> HarfbuzzScriptDataBorrowed<'_> {
HarfbuzzScriptDataBorrowed {
script: self.script.as_borrowed(),
script_names: self.script_names.as_borrowed(),
}
}
}
Loading