forked from UglyToad/PdfPig
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathGlyphList.cs
192 lines (161 loc) · 7.67 KB
/
GlyphList.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
namespace UglyToad.PdfPig.Fonts
{
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
using Encodings;
using UglyToad.PdfPig.Util;
/// <summary>
/// A list which maps PostScript glyph names to unicode values.
/// </summary>
public class GlyphList
{
/// <summary>
/// <c>.notdef</c> name.
/// </summary>
public const string NotDefined = ".notdef";
private readonly IReadOnlyDictionary<string, string> nameToUnicode;
private readonly IReadOnlyDictionary<string, string> unicodeToName;
private readonly Dictionary<string, string> oddNameToUnicodeCache = new Dictionary<string, string>();
private static readonly Lazy<GlyphList> LazyAdobeGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("glyphlist"));
/// <summary>
/// The Adobe Glyph List.
/// </summary>
public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value;
private static readonly Lazy<GlyphList> LazyAdditionalGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("additional"));
/// <summary>
/// An extension to the Adobe Glyph List.
/// </summary>
public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList.Value;
private static readonly Lazy<GlyphList> LazyZapfDingbatsGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("zapfdingbats"));
/// <summary>
/// Zapf Dingbats.
/// </summary>
public static GlyphList ZapfDingbats => LazyZapfDingbatsGlyphList.Value;
internal GlyphList(IReadOnlyDictionary<string, string> namesToUnicode)
{
nameToUnicode = namesToUnicode;
var unicodeToNameTemp = new Dictionary<string, string>(namesToUnicode.Count);
foreach (var pair in namesToUnicode)
{
var forceOverride =
WinAnsiEncoding.Instance.ContainsName(pair.Key) ||
MacRomanEncoding.Instance.ContainsName(pair.Key) ||
MacExpertEncoding.Instance.ContainsName(pair.Key) ||
SymbolEncoding.Instance.ContainsName(pair.Key) ||
ZapfDingbatsEncoding.Instance.ContainsName(pair.Key);
if (!unicodeToNameTemp.ContainsKey(pair.Value) || forceOverride)
{
unicodeToNameTemp[pair.Value] = pair.Key;
}
}
unicodeToName = unicodeToNameTemp;
}
/// <summary>
/// Get the name for the unicode code point value.
/// </summary>
public string UnicodeCodePointToName(int unicodeValue)
{
var value = char.ConvertFromUtf32(unicodeValue);
if (unicodeToName.TryGetValue(value, out var result))
{
return result;
}
return NotDefined;
}
/// <summary>
/// Get the unicode value for the glyph name.
/// See <see href="https://github.com/adobe-type-tools/agl-specification"/>.
/// </summary>
public string NameToUnicode(string name)
{
if (name == null)
{
return null;
}
if (nameToUnicode.TryGetValue(name, out var unicodeValue))
{
return unicodeValue;
}
if (oddNameToUnicodeCache.TryGetValue(name, out var result))
{
return result;
}
string unicode;
// 1. Drop all the characters from the glyph name starting with the first occurrence of a period (U+002E FULL STOP), if any.
if (name.IndexOf('.') > 0)
{
unicode = NameToUnicode(name.Substring(0, name.IndexOf('.')));
}
// 2. Split the remaining string into a sequence of components, using underscore (U+005F LOW LINE) as the delimiter.
else if (name.IndexOf('_') > 0)
{
/*
* MOZILLA-3136-0.pdf
* 68-1990-01_A.pdf
* TIKA-2054-0.pdf
*/
var sb = new StringBuilder();
foreach (var s in name.Split('_'))
{
sb.Append(NameToUnicode(s));
}
unicode = sb.ToString();
}
// Otherwise, if the component is of the form ‘uni’ (U+0075, U+006E, and U+0069) followed by a sequence of uppercase hexadecimal
// digits (0–9 and A–F, meaning U+0030 through U+0039 and U+0041 through U+0046), if the length of that sequence is a multiple
// of four, and if each group of four digits represents a value in the ranges 0000 through D7FF or E000 through FFFF, then
// interpret each as a Unicode scalar value and map the component to the string made of those scalar values. Note that the range
// and digit-length restrictions mean that the ‘uni’ glyph name prefix can be used only with UVs in the Basic Multilingual Plane (BMP).
else if (name.StartsWith("uni") && (name.Length - 3) % 4 == 0)
{
// test for Unicode name in the format uniXXXX where X is hex
int nameLength = name.Length;
var uniStr = new StringBuilder();
for (int chPos = 3; chPos + 4 <= nameLength; chPos += 4)
{
if (!int.TryParse(name.AsSpanOrSubstring(chPos, 4),
NumberStyles.HexNumber,
CultureInfo.InvariantCulture,
out var codePoint))
{
return null;
}
if (codePoint > 0xD7FF && codePoint < 0xE000)
{
throw new InvalidFontFormatException($"Unicode character name with disallowed code area: {name}");
}
uniStr.Append((char)codePoint);
}
unicode = uniStr.ToString();
}
// Otherwise, if the component is of the form ‘u’ (U+0075) followed by a sequence of four to six uppercase hexadecimal digits (0–9
// and A–F, meaning U+0030 through U+0039 and U+0041 through U+0046), and those digits represents a value in the ranges 0000 through
// D7FF or E000 through 10FFFF, then interpret it as a Unicode scalar value and map the component to the string made of this scalar value.
else if (name.StartsWith("u", StringComparison.Ordinal) && name.Length >= 5 && name.Length <= 7)
{
var codePoint = int.Parse(name.AsSpanOrSubstring(1), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
if (codePoint > 0xD7FF && codePoint < 0xE000)
{
throw new InvalidFontFormatException($"Unicode character name with disallowed code area: {name}");
}
unicode = char.ConvertFromUtf32(codePoint);
}
// Ad-hoc special cases
else if (name.StartsWith("c", StringComparison.OrdinalIgnoreCase) && name.Length >= 3 && name.Length <= 4)
{
// name representation cXXX
var codePoint = int.Parse(name.AsSpanOrSubstring(1), NumberStyles.Integer, CultureInfo.InvariantCulture);
unicode = char.ConvertFromUtf32(codePoint);
}
// Otherwise, map the component to an empty string.
else
{
return null;
}
oddNameToUnicodeCache[name] = unicode;
return unicode;
}
}
}