Skip to content

Commit

Permalink
Always check for additional glyph list and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
BobLd committed Jan 5, 2025
1 parent d1779cc commit 585e940
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 37 deletions.
13 changes: 3 additions & 10 deletions src/UglyToad.PdfPig.Fonts/GlyphList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,13 @@ public class GlyphList

private readonly Dictionary<string, string> oddNameToUnicodeCache = new Dictionary<string, string>();

private static readonly Lazy<GlyphList> LazyAdobeGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("glyphlist"));
private static readonly Lazy<GlyphList> LazyAdobeGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("glyphlist", "additional"));

/// <summary>
/// The Adobe Glyph List.
/// The Adobe Glyph List (includes an extension to the Adobe Glyph List.).
/// </summary>
public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value;

private static readonly Lazy<GlyphList> LazyAdditionalGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("additional"));

/// <summary>
/// An extension to the Adobe Glyph List.
/// </summary>
public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList.Value;

private static readonly Lazy<GlyphList> LazyZapfDingbatsGlyphList = new Lazy<GlyphList>(() => GlyphListFactory.Get("zapfdingbats"));

/// <summary>
Expand Down Expand Up @@ -103,7 +96,7 @@ public string NameToUnicode(string name)
return result;
}

string unicode;
string? unicode;
// 1. Drop all the characters from the glyph name starting with the first occurrence of a period (U+002E FULL STOP), if any.
if (name.IndexOf('.') > 0)
{
Expand Down
56 changes: 31 additions & 25 deletions src/UglyToad.PdfPig.Fonts/GlyphListFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,56 @@
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using Util;

internal class GlyphListFactory
internal static class GlyphListFactory
{
public static GlyphList Get(string listName)
#if NET
private const char Semicolon = ';';
#else
private static readonly char[] Semicolon = [';'];
#endif

public static GlyphList Get(params string[] listNames)
{
using (var resource =
typeof(GlyphListFactory).Assembly.GetManifestResourceStream(
$"UglyToad.PdfPig.Fonts.Resources.GlyphList.{listName}"))
var result = new Dictionary<string, string>(listNames.Any(n => string.Equals("glyphlist", n, StringComparison.OrdinalIgnoreCase)) ? 4300 : 0);

foreach (var listName in listNames)
{
if (resource == null)
using (var resource =
typeof(GlyphListFactory).Assembly.GetManifestResourceStream(
$"UglyToad.PdfPig.Fonts.Resources.GlyphList.{listName}"))
{
throw new ArgumentException($"No embedded glyph list resource was found with the name {listName}.");
}
if (resource == null)
{
throw new ArgumentException($"No embedded glyph list resource was found with the name {listName}.");
}

int? capacity = null;
// Prevent too much wasted memory capacity for Adobe GlyphList
if (string.Equals("glyphlist", listName, StringComparison.OrdinalIgnoreCase))
{
capacity = 4300;
ReadInternal(resource, result);
}

return ReadInternal(resource, capacity);
}

#if NET
result.TrimExcess();
#endif
return new GlyphList(result);
}

public static GlyphList Read(Stream stream)
{
return ReadInternal(stream);
var result = new Dictionary<string, string>();
ReadInternal(stream, result);
return new GlyphList(result);
}

private static readonly char[] Semicolon = [';'];

private static GlyphList ReadInternal(Stream stream, int? defaultDictionaryCapacity = 0)
private static void ReadInternal(Stream stream, Dictionary<string, string> result)
{
if (stream == null)
{
throw new ArgumentNullException(nameof(stream));
}

var result = defaultDictionaryCapacity.HasValue ? new Dictionary<string, string>(defaultDictionaryCapacity.Value) : [];


using (var reader = new StreamReader(stream))
{
while (!reader.EndOfStream)
Expand All @@ -62,7 +69,7 @@ private static GlyphList ReadInternal(Stream stream, int? defaultDictionaryCapac
{
continue;
}

var parts = line.Split(Semicolon, StringSplitOptions.RemoveEmptyEntries);

if (parts.Length != 2)
Expand All @@ -86,11 +93,10 @@ private static GlyphList ReadInternal(Stream stream, int? defaultDictionaryCapac
value += char.ConvertFromUtf32(code);
}

System.Diagnostics.Debug.Assert(!result.ContainsKey(key));
result[key] = value;
}
}

return new GlyphList(result);
}
}
}
42 changes: 42 additions & 0 deletions src/UglyToad.PdfPig.Tests/Integration/AdditionalGlyphListTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System.Linq;

public class AdditionalGlyphListTests
{
[Fact]
public void Type1FontSimple1()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("2108.11480")))
{
var page = document.GetPage(2);
Assert.Contains("\u22c3", page.Letters.Select(l => l.Value));
}
}

[Fact]
public void Type1FontSimple2()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("ICML03-081")))
{
var page = document.GetPage(2);
Assert.Contains("\u2211", page.Letters.Select(l => l.Value));
Assert.Contains("\u220f", page.Letters.Select(l => l.Value));
Assert.Contains("[", page.Letters.Select(l => l.Value));
Assert.Contains("]", page.Letters.Select(l => l.Value));
}
}

[Fact]
public void Type1FontSimple3()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("Math119FakingData")))
{
var page = document.GetPage(4);
Assert.Contains("(", page.Letters.Select(l => l.Value));
Assert.Contains(")", page.Letters.Select(l => l.Value));
Assert.Contains("\u2211", page.Letters.Select(l => l.Value));
}
}
}
}
3 changes: 1 addition & 2 deletions src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,7 @@ public bool TryGetUnicode(int characterCode, [NotNullWhen(true)] out string? val
// Look up the character name in the Adobe Glyph List or additional Glyph List.
try
{
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName)
?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName);
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
}
catch
{
Expand Down

0 comments on commit 585e940

Please sign in to comment.