@@ -11,12 +11,18 @@ use icu::locale::{
1111use  icu_experimental:: displaynames:: provider:: RegionDisplayNamesV1 ; 
1212use  icu_provider:: prelude:: * ; 
1313use  icu_provider_source:: SourceDataProvider ; 
14+ use  litemap:: LiteMap ; 
1415use  ndarray:: { Array2 ,  Axis } ; 
1516use  tinystr:: TinyAsciiStr ; 
17+ use  zerotrie:: ZeroTrieSimpleAscii ; 
1618
1719#[ test]  
1820fn  dnametest ( )  { 
19-     let  provider = SourceDataProvider :: new ( ) ; 
21+     let  provider = SourceDataProvider :: new_custom ( ) 
22+         . with_cldr ( & std:: path:: PathBuf :: from ( 
23+             "/home/sffc/lib/cldr-46.0.0-json-full" , 
24+         ) ) 
25+         . unwrap ( ) ; 
2026
2127    let  locales:  BTreeMap < DataIdentifierCow < ' _ > ,  usize >  =
2228        IterableDataProvider :: < RegionDisplayNamesV1 > :: iter_ids ( & provider) 
@@ -40,16 +46,23 @@ fn dnametest() {
4046        } ) 
4147        . collect ( ) ; 
4248
43-     let  en_names = payloads
44-         . get ( & DataIdentifierCow :: from_locale ( locale ! ( "en" ) . into ( ) ) ) 
45-         . unwrap ( ) ; 
49+     let  unique_names:  Vec < & str >  = payloads
50+         . values ( ) 
51+         . flat_map ( |v| v. get ( ) . names . iter_values ( ) ) 
52+         . collect :: < BTreeSet < _ > > ( ) 
53+         . into_iter ( ) 
54+         . collect ( ) ; 
55+     let  unique_names_required_bits = ( unique_names. len ( )  as  f64 ) . log2 ( ) . ceil ( )  as  usize ; 
56+     println ! ( "unique_names: {} ({unique_names_required_bits})" ,  unique_names. len( ) ) ; 
4657
47-     let  regions = en_names
58+     let  regions:  BTreeSet < TinyAsciiStr < 3 > >  = payloads
59+         . get ( & DataIdentifierCow :: from_locale ( locale ! ( "en" ) . into ( ) ) ) 
60+         . unwrap ( ) 
4861        . get ( ) 
4962        . names 
5063        . iter_keys ( ) 
5164        . map ( |s| s. try_into_tinystr ( ) . unwrap ( ) ) 
52-         . collect :: < BTreeSet < TinyAsciiStr < 3 > > > ( ) ; 
65+         . collect ( ) ; 
5366
5467    let  expander = LocaleExpander :: try_new_common_unstable ( & provider) . unwrap ( ) ; 
5568    let  fallbacker = LocaleFallbacker :: try_new_unstable ( & provider) . unwrap ( ) ; 
@@ -80,11 +93,14 @@ fn dnametest() {
8093        . collect ( ) ; 
8194
8295    let  mut  dense_matrix =
83-         Array2 :: < Option < & str > > :: default ( ( locales. len ( )  + script_locales. len ( ) ,  regions. len ( ) ) ) ; 
96+         Array2 :: < Option < usize > > :: default ( ( locales. len ( )  + script_locales. len ( ) ,  regions. len ( ) ) ) ; 
8497
8598    for  ( i,  ( _locale,  payload) )  in  payloads. iter ( ) . enumerate ( )  { 
8699        for  ( j,  region)  in  regions. iter ( ) . enumerate ( )  { 
87-             dense_matrix[ ( i,  j) ]  = payload. get ( ) . names . get ( & region. to_unvalidated ( ) ) ; 
100+             if  let  Some ( name)  = payload. get ( ) . names . get ( & region. to_unvalidated ( ) )  { 
101+                 let  index = unique_names. binary_search ( & name) . unwrap ( ) ; 
102+                 dense_matrix[ ( i,  j) ]  = Some ( index) ; 
103+             } 
88104        } 
89105    } 
90106
@@ -137,11 +153,66 @@ fn dnametest() {
137153        values. iter ( ) . filter ( |v| v. is_some ( ) ) . count ( ) 
138154    } ) ; 
139155
140-     for  ( i,  locale)  in  locales. keys ( ) . enumerate ( )  { 
141-         println ! ( "{locale:<3}: {}" ,  large_small[ i] ) ; 
142-     } 
143-     for  ( i,  locale)  in  script_locales. keys ( ) . enumerate ( )  { 
144-         let  i = i + locales. len ( ) ; 
156+     for  ( i,  locale)  in  locales. keys ( ) . chain ( script_locales. keys ( ) ) . enumerate ( )  { 
145157        println ! ( "{locale:<3}: {}" ,  large_small[ i] ) ; 
146158    } 
159+ 
160+     let  locales_only_zerotrie:  ZeroTrieSimpleAscii < Vec < u8 > >  = locales
161+         . keys ( ) 
162+         . chain ( script_locales. keys ( ) ) 
163+         . enumerate ( ) 
164+         . map ( |( i,  locale) | ( locale. to_string ( ) ,  i) ) 
165+         . collect ( ) ; 
166+     println ! ( "locales_only_zerotrie: {}" ,  locales_only_zerotrie. byte_len( ) ) ; 
167+ 
168+     let  regions_only_zerotrie:  ZeroTrieSimpleAscii < Vec < u8 > >  = regions. iter ( ) . enumerate ( ) 
169+         . map ( |( i,  locale) | ( locale. to_string ( ) ,  i) ) 
170+         . collect ( ) ; 
171+ 
172+     println ! ( "regions_only_zerotrie: {}" ,  regions_only_zerotrie. byte_len( ) ) ; 
173+ 
174+     let  sparse_map:  LiteMap < String ,  usize >  = locales
175+         . keys ( ) 
176+         . chain ( script_locales. keys ( ) ) 
177+         . enumerate ( ) 
178+         . flat_map ( |( i,  locale) | { 
179+             let  dense_matrix = & dense_matrix; 
180+             regions. iter ( ) . enumerate ( ) . filter_map ( move  |( j,  region) | { 
181+                 dense_matrix[ ( i,  j) ] . map ( |index| ( format ! ( "{locale}/{region}" ) ,  index) ) 
182+             } ) 
183+         } ) 
184+         . collect ( ) ; 
185+     println ! ( "sparse_map: {}" ,  sparse_map. len( ) ) ; 
186+ 
187+     let  sparse_zerotrie:  ZeroTrieSimpleAscii < Vec < u8 > >  =
188+         sparse_map. iter ( ) . map ( |( k,  v) | ( k,  * v) ) . collect ( ) ; 
189+     println ! ( "sparse_zerotrie: {}" ,  sparse_zerotrie. byte_len( ) ) ; 
190+ 
191+     let  dense_row_bit_size = regions. len ( )  *  unique_names_required_bits; 
192+ 
193+     let  mut  num_dense_locales = 0 ; 
194+     let  hybrid_sparse_map:  LiteMap < String ,  usize >  = locales
195+         . keys ( ) 
196+         . chain ( script_locales. keys ( ) ) 
197+         . enumerate ( ) 
198+         . flat_map ( |( i,  locale) | { 
199+             let  dense_matrix = & dense_matrix; 
200+             let  row:  Vec < ( String ,  usize ) >  = regions. iter ( ) . enumerate ( ) . filter_map ( move  |( j,  region) | { 
201+                 dense_matrix[ ( i,  j) ] . map ( |index| ( format ! ( "{locale}/{region}" ) ,  index) ) 
202+             } ) . collect ( ) ; 
203+             let  inner_zerotrie:  ZeroTrieSimpleAscii < _ >  = row. iter ( ) . map ( |( k,  v) | ( k,  * v) ) . collect ( ) ; 
204+             if  inner_zerotrie. byte_len ( )  *  8  > dense_row_bit_size { 
205+                 num_dense_locales += 1 ; 
206+                 vec ! [ ( locale. to_string( ) ,  0 ) ] . into_iter ( ) 
207+             }  else  { 
208+                 row. into_iter ( ) 
209+             } 
210+         } ) 
211+         . collect ( ) ; 
212+     println ! ( "hybrid_sparse_map: {}" ,  hybrid_sparse_map. len( ) ) ; 
213+     println ! ( "num_dense_locales: {} ({} B)" ,  num_dense_locales,  num_dense_locales *  dense_row_bit_size / 8 ) ; 
214+ 
215+     let  hybrid_sparse_zerotrie:  ZeroTrieSimpleAscii < Vec < u8 > >  =
216+         hybrid_sparse_map. iter ( ) . map ( |( k,  v) | ( k,  * v) ) . collect ( ) ; 
217+     println ! ( "hybrid_sparse_zerotrie: {}" ,  hybrid_sparse_zerotrie. byte_len( ) ) ; 
147218} 
0 commit comments