Skip to content

Commit c734c5a

Browse files
committed
Handle odd ligatures names and fix #945
1 parent 2080424 commit c734c5a

File tree

3 files changed

+54
-0
lines changed

3 files changed

+54
-0
lines changed

Diff for: src/UglyToad.PdfPig.Fonts/GlyphList.cs

+19
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,16 @@ public class GlyphList
3838

3939
private static readonly Lazy<GlyphList> LazyZapfDingbatsGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("zapfdingbats"));
4040

41+
private static readonly HashSet<string> OddLigaturesNames =
42+
[
43+
// See https://en.wikipedia.org/wiki/Ligature_(writing)
44+
"f_f", "f_i", "f_j", "f_l", "f_a", "f_e", "f_o", "f_r", "f_s", "f_t", "f_b", "f_h",
45+
"f_u", "f_y", "f_.", "f_,", "f_-",
46+
"f_f_i", "f_f_l",
47+
// Sometimes, ligatures for ⟨st⟩ (st), ⟨ſt⟩ (ſt), ⟨ch⟩, ⟨ck⟩, ⟨ct⟩, ⟨Qu⟩ and ⟨Th⟩ are used (e.g. in the typeface Linux Libertine).
48+
"s_t", "ſ_t", "c_h", "c_k", "c_t", "Q_u", "T_h"
49+
]; // TODO - Go use for FrozenSet
50+
4151
/// <summary>
4252
/// Zapf Dingbats.
4353
/// </summary>
@@ -159,6 +169,15 @@ public string NameToUnicode(string name)
159169
System.Diagnostics.Debug.Assert(codePoint > 0);
160170
unicode = char.ConvertFromUtf32(codePoint);
161171
}
172+
else if (name.IndexOf('_') > 0 && OddLigaturesNames.Contains(name))
173+
{
174+
/*
175+
* MOZILLA-3136-0.pdf
176+
* 68-1990-01_A.pdf
177+
* TIKA-2054-0.pdf
178+
*/
179+
unicode = name.Replace("_", "");
180+
}
162181
else
163182
{
164183
return null;
3.43 MB
Binary file not shown.

Diff for: src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs

+35
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,41 @@
66

77
public class GithubIssuesTests
88
{
9+
[Fact]
10+
public void Issue945()
11+
{
12+
// Odd ligatures names
13+
var path = IntegrationHelpers.GetDocumentPath("MOZILLA-3136-0.pdf");
14+
using (var document = PdfDocument.Open(path))
15+
{
16+
var page = document.GetPage(2);
17+
Assert.Contains("ff", page.Letters.Select(l => l.Value));
18+
}
19+
20+
path = IntegrationHelpers.GetDocumentPath("68-1990-01_A.pdf");
21+
using (var document = PdfDocument.Open(path))
22+
{
23+
var page = document.GetPage(7);
24+
Assert.Contains("fi", page.Letters.Select(l => l.Value));
25+
}
26+
27+
path = IntegrationHelpers.GetDocumentPath("TIKA-2054-0.pdf");
28+
using (var document = PdfDocument.Open(path))
29+
{
30+
var page = document.GetPage(3);
31+
Assert.Contains("fi", page.Letters.Select(l => l.Value));
32+
33+
page = document.GetPage(4);
34+
Assert.Contains("ff", page.Letters.Select(l => l.Value));
35+
36+
page = document.GetPage(6);
37+
Assert.Contains("fl", page.Letters.Select(l => l.Value));
38+
39+
page = document.GetPage(16);
40+
Assert.Contains("ffi", page.Letters.Select(l => l.Value));
41+
}
42+
}
43+
944
[Fact]
1045
public void Issue943()
1146
{

0 commit comments

Comments
 (0)