13
13
public class GlyphList
14
14
{
15
15
/// <summary>
16
- /// <c>.notdef</c>.
16
+ /// <c>.notdef</c> name .
17
17
/// </summary>
18
18
public const string NotDefined = ".notdef" ;
19
19
@@ -37,17 +37,7 @@ public class GlyphList
37
37
public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList . Value ;
38
38
39
39
private static readonly Lazy < GlyphList > LazyZapfDingbatsGlyphList = new Lazy < GlyphList > ( ( ) => GlyphListFactory . Get ( "zapfdingbats" ) ) ;
40
-
41
- private static readonly HashSet < string > OddLigaturesNames =
42
- [
43
- // See https://en.wikipedia.org/wiki/Ligature_(writing)
44
- "f_f" , "f_i" , "f_j" , "f_l" , "f_a" , "f_e" , "f_o" , "f_r" , "f_s" , "f_t" , "f_b" , "f_h" ,
45
- "f_u" , "f_y" , "f_." , "f_," , "f_-" ,
46
- "f_f_i" , "f_f_l" ,
47
- // Sometimes, ligatures for ⟨st⟩ (st), ⟨ſt⟩ (ſt), ⟨ch⟩, ⟨ck⟩, ⟨ct⟩, ⟨Qu⟩ and ⟨Th⟩ are used (e.g. in the typeface Linux Libertine).
48
- "s_t" , "ſ_t" , "c_h" , "c_k" , "c_t" , "Q_u" , "T_h"
49
- ] ; // TODO - Go use for FrozenSet
50
-
40
+
51
41
/// <summary>
52
42
/// Zapf Dingbats.
53
43
/// </summary>
@@ -94,6 +84,7 @@ public string UnicodeCodePointToName(int unicodeValue)
94
84
95
85
/// <summary>
96
86
/// Get the unicode value for the glyph name.
87
+ /// See <see href="https://github.com/adobe-type-tools/agl-specification"/>.
97
88
/// </summary>
98
89
public string NameToUnicode ( string name )
99
90
{
@@ -113,25 +104,47 @@ public string NameToUnicode(string name)
113
104
}
114
105
115
106
string unicode ;
116
- // Remove suffixes
107
+ // 1. Drop all the characters from the glyph name starting with the first occurrence of a period (U+002E FULL STOP), if any.
117
108
if ( name . IndexOf ( '.' ) > 0 )
118
109
{
119
110
unicode = NameToUnicode ( name . Substring ( 0 , name . IndexOf ( '.' ) ) ) ;
120
111
}
121
- else if ( name . StartsWith ( "uni" ) && name . Length == 7 )
112
+ // 2. Split the remaining string into a sequence of components, using underscore (U+005F LOW LINE) as the delimiter.
113
+ else if ( name . IndexOf ( '_' ) > 0 )
114
+ {
115
+ /*
116
+ * MOZILLA-3136-0.pdf
117
+ * 68-1990-01_A.pdf
118
+ * TIKA-2054-0.pdf
119
+ */
120
+ var sb = new StringBuilder ( ) ;
121
+ foreach ( var s in name . Split ( '_' ) )
122
+ {
123
+ sb . Append ( NameToUnicode ( s ) ) ;
124
+ }
125
+
126
+ unicode = sb . ToString ( ) ;
127
+ }
128
+ // Otherwise, if the component is of the form ‘uni’ (U+0075, U+006E, and U+0069) followed by a sequence of uppercase hexadecimal
129
+ // digits (0–9 and A–F, meaning U+0030 through U+0039 and U+0041 through U+0046), if the length of that sequence is a multiple
130
+ // of four, and if each group of four digits represents a value in the ranges 0000 through D7FF or E000 through FFFF, then
131
+ // interpret each as a Unicode scalar value and map the component to the string made of those scalar values. Note that the range
132
+ // and digit-length restrictions mean that the ‘uni’ glyph name prefix can be used only with UVs in the Basic Multilingual Plane (BMP).
133
+ else if ( name . StartsWith ( "uni" ) && ( name . Length - 3 ) % 4 == 0 )
122
134
{
123
135
// test for Unicode name in the format uniXXXX where X is hex
124
136
int nameLength = name . Length ;
125
137
126
138
var uniStr = new StringBuilder ( ) ;
127
139
128
- var foundUnicode = true ;
129
140
for ( int chPos = 3 ; chPos + 4 <= nameLength ; chPos += 4 )
130
141
{
131
- if ( ! int . TryParse ( name . AsSpanOrSubstring ( chPos , 4 ) , NumberStyles . HexNumber , CultureInfo . InvariantCulture , out var codePoint ) )
142
+ if ( ! int . TryParse ( name . AsSpanOrSubstring ( chPos , 4 ) ,
143
+ NumberStyles . HexNumber ,
144
+ CultureInfo . InvariantCulture ,
145
+ out var codePoint ) )
132
146
{
133
- foundUnicode = false ;
134
- break ;
147
+ return null ;
135
148
}
136
149
137
150
if ( codePoint > 0xD7FF && codePoint < 0xE000 )
@@ -142,42 +155,30 @@ public string NameToUnicode(string name)
142
155
uniStr . Append ( ( char ) codePoint ) ;
143
156
}
144
157
145
- if ( ! foundUnicode )
146
- {
147
- return null ;
148
- }
149
-
150
158
unicode = uniStr . ToString ( ) ;
151
159
}
152
- else if ( name . StartsWith ( "u" , StringComparison . Ordinal ) && name . Length == 5 )
160
+ // Otherwise, if the component is of the form ‘u’ (U+0075) followed by a sequence of four to six uppercase hexadecimal digits (0–9
161
+ // and A–F, meaning U+0030 through U+0039 and U+0041 through U+0046), and those digits represents a value in the ranges 0000 through
162
+ // D7FF or E000 through 10FFFF, then interpret it as a Unicode scalar value and map the component to the string made of this scalar value.
163
+ else if ( name . StartsWith ( "u" , StringComparison . Ordinal ) && name . Length >= 5 && name . Length <= 7 )
153
164
{
154
- // test for an alternate Unicode name representation uXXXX
155
165
var codePoint = int . Parse ( name . AsSpanOrSubstring ( 1 ) , NumberStyles . HexNumber , CultureInfo . InvariantCulture ) ;
156
166
157
167
if ( codePoint > 0xD7FF && codePoint < 0xE000 )
158
168
{
159
- throw new InvalidFontFormatException (
160
- $ "Unicode character name with disallowed code area: { name } ") ;
169
+ throw new InvalidFontFormatException ( $ "Unicode character name with disallowed code area: { name } ") ;
161
170
}
162
171
163
172
unicode = char . ConvertFromUtf32 ( codePoint ) ;
164
173
}
174
+ // Ad-hoc special cases
165
175
else if ( name . StartsWith ( "c" , StringComparison . OrdinalIgnoreCase ) && name . Length >= 3 && name . Length <= 4 )
166
176
{
167
177
// name representation cXXX
168
178
var codePoint = int . Parse ( name . AsSpanOrSubstring ( 1 ) , NumberStyles . Integer , CultureInfo . InvariantCulture ) ;
169
- System . Diagnostics . Debug . Assert ( codePoint > 0 ) ;
170
179
unicode = char . ConvertFromUtf32 ( codePoint ) ;
171
180
}
172
- else if ( name . IndexOf ( '_' ) > 0 && OddLigaturesNames . Contains ( name ) )
173
- {
174
- /*
175
- * MOZILLA-3136-0.pdf
176
- * 68-1990-01_A.pdf
177
- * TIKA-2054-0.pdf
178
- */
179
- unicode = name . Replace ( "_" , "" ) ;
180
- }
181
+ // Otherwise, map the component to an empty string.
181
182
else
182
183
{
183
184
return null ;
0 commit comments