Skip to content

Commit

Permalink
Do not write UTF8 BOM. Fix #53.
Browse files Browse the repository at this point in the history
  • Loading branch information
KubaSzostak committed Jul 28, 2024
1 parent e3c8826 commit 2e03e72
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 18 deletions.
23 changes: 15 additions & 8 deletions src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Dbf.cs
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
namespace NetTopologySuite.IO.Esri.Dbf
using System.Text;

namespace NetTopologySuite.IO.Esri.Dbf
{
/// <summary>
/// Manages configurations and constants specific to the structure and operation of DBF files in the dBASE III format.
/// </summary>
internal static class Dbf
{
public readonly static int TableDescriptorSize = 32; // Number of bytes in the table header
internal readonly static int TableDescriptorSize = 32; // Number of bytes in the table header

internal readonly static int FieldDescriptorSize = 32; // Number of bytes in the field descriptor
internal readonly static int MaxFieldCount = 255;
public readonly static byte Dbase3Version = 0x03; // dBASE III
public readonly static byte HeaderTerminatorMark = 0x0D;
internal readonly static byte Dbase3Version = 0x03; // dBASE III
internal readonly static byte HeaderTerminatorMark = 0x0D;

internal readonly static byte DeletedRecordMark = 0x2A; // '*'
internal readonly static byte ValidRecordMark = 0x20; // ' '
internal readonly static byte EndOfFileMark = 0x1A;

public readonly static byte DeletedRecordMark = 0x2A; // '*'
public readonly static byte ValidRecordMark = 0x20; // ' '
public readonly static byte EndOfFileMark = 0x1A;
internal static readonly int MaxFieldNameLength = 10;

public static readonly int MaxFieldNameLength = 10;
internal readonly static Encoding DefaultEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
}
}
2 changes: 1 addition & 1 deletion src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfEncoding.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static DbfEncoding()

// https://support.esri.com/en/technical-article/000013192

AddLanguageDriverId(0, Encoding.UTF8); // For unknown LDID
AddLanguageDriverId(0, Dbf.DefaultEncoding); // For unknown LDID
AddLanguageDriverId(0x03, Encoding.Default); // OS Default
AddLanguageDriverId(0x57, Encoding.Default); // OS Default

Expand Down
2 changes: 1 addition & 1 deletion src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ private void Initialize(Stream stream, Encoding encoding = null)
RecordSize = Buffer.ReadDbfRecordSize();
Buffer.Advance(17);

Encoding = encoding ?? Buffer.ReadDbfEncoding() ?? Encoding.UTF8; // null => Try to read encoding from DBF's reserved bytes
Encoding = encoding ?? Buffer.ReadDbfEncoding() ?? Dbf.DefaultEncoding; // null => Try to read encoding from DBF's reserved bytes
Buffer.Advance(2);

// --- File header is done, read field descriptor header now ---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public static Encoding ReadDbfEncoding(this Stream stream)

public static void WriteDbaseFieldDescriptor(this Stream stream, DbfField field, Encoding encoding)
{
encoding = encoding ?? Encoding.UTF8;
encoding = encoding ?? Dbf.DefaultEncoding;
var name = field.Name.PadRight(Dbf.MaxFieldNameLength, char.MinValue); // Field name must have empty space zero-filled


Expand All @@ -97,7 +97,7 @@ public static void WriteDbaseFieldDescriptor(this Stream stream, DbfField field,
}
public static DbfField ReadDbaseFieldDescriptor(this Stream stream, Encoding encoding)
{
encoding = encoding ?? Encoding.UTF8;
encoding = encoding ?? Dbf.DefaultEncoding;

var name = stream.ReadString(Dbf.MaxFieldNameLength, encoding)?.Trim();
stream.Advance(1); // Reserved (field name terminator)
Expand Down
4 changes: 2 additions & 2 deletions src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public class DbfWriter : ManagedDisposable
/// <param name="encoding">DBF file encoding. Defaults to UTF8.</param>
public DbfWriter(Stream stream, IReadOnlyList<DbfField> fields, Encoding encoding = null)
{
Encoding = encoding ?? Encoding.UTF8;
Encoding = encoding ?? Dbf.DefaultEncoding;
IntializeFields(fields);
DbfStream = stream ?? throw new ArgumentNullException("Uninitialized dBASE stream.", nameof(stream));
WriteHeader();
Expand All @@ -66,7 +66,7 @@ public DbfWriter(Stream stream, IReadOnlyList<DbfField> fields, Encoding encodin
/// <param name="encoding">DBF file encoding. Defaults to UTF8.</param>
public DbfWriter(string dbfPath, IReadOnlyList<DbfField> fields, Encoding encoding = null)
{
Encoding = encoding ?? Encoding.UTF8;
Encoding = encoding ?? Dbf.DefaultEncoding;
IntializeFields(fields);
WriteCpgEncoding(dbfPath, encoding);
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ internal override void WriteValue(Stream stream)
private Encoding _encoding = null;
internal Encoding Encoding
{
get { return _encoding ?? Encoding.UTF8; }
get { return _encoding ?? Dbf.DefaultEncoding; }
set
{
if (value == null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ internal ShapefileWriter(string shpPath, ShapefileWriterOptions options)
ShpWriter = CreateShpWriter(shpStream, shxStream); // It calls this.ShapeType

if (!string.IsNullOrWhiteSpace(options.Projection))
File.WriteAllText(Path.ChangeExtension(shpPath, ".prj"), options.Projection);
{
var prjPath = Path.ChangeExtension(shpPath, ".prj");
File.WriteAllText(prjPath, options.Projection, options.Encoding);
}
}
catch
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ public class ShapefileWriterOptions
/// </summary>
public List<DbfField> Fields { get; } = new List<DbfField>();

private Encoding _encoding = Encoding.UTF8;
private Encoding _encoding = Dbf.Dbf.DefaultEncoding;
/// <summary>
/// DBF file encoding.
/// </summary>
public Encoding Encoding
{
get => _encoding;
set => _encoding = value ?? Encoding.UTF8;
set => _encoding = value ?? Dbf.Dbf.DefaultEncoding;
}

/// <summary>
Expand Down
102 changes: 102 additions & 0 deletions test/NetTopologySuite.IO.Esri.Test/Issues/Issue053.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
using NetTopologySuite.IO.Esri.Dbf.Fields;
using NetTopologySuite.IO.Esri.Shapefiles.Writers;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;

namespace NetTopologySuite.IO.Esri.Test.Issues;

/// <summary>
/// https://github.com/NetTopologySuite/NetTopologySuite.IO.Esri/issues/53
/// </summary>
internal class Issue053
{
[Test]
public void Projection_Utf8_BOM()
{
var fields = new List<DbfField>();
var fidField = fields.AddNumericInt32Field("fid");
var options = new ShapefileWriterOptions(ShapeType.Polygon, fields.ToArray())
{
Projection = "GEOGCS[\"GCS_WGS_1984\",DATUM[\"D_WGS_1984\",SPHEROID[\"WGS_1984\",6378137.0,298.257223563]],PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]]"
};

var shpPath = TestShapefiles.GetTempShpPath();
using (var shpWriter = Shapefile.OpenWrite(shpPath, options))
{
shpWriter.Geometry = SampleGeometry.SampleMultiPolygon;
fidField.NumericValue = 1;
shpWriter.Write();
}

var expectedProjectionString = options.Projection;
var expectedProjectionBytes = options.Encoding.GetBytes(options.Projection);

var prjPath = Path.ChangeExtension(shpPath, ".prj");
var storedProjectionString = File.ReadAllText(prjPath);
var storedProjectionBytes = File.ReadAllBytes(prjPath);

TestShapefiles.DeleteShp(shpPath);

Assert.AreEqual(expectedProjectionString, storedProjectionString);
Assert.AreEqual(expectedProjectionBytes, storedProjectionBytes);
}

[Test]
public static void Utf8_BOM_Default()
{
var encoding = Encoding.UTF8;
var filePath = Path.GetTempFileName();
var expectedString = "abc";
var expectedBytes = encoding.GetBytes(expectedString);
WriteFile(filePath, expectedString, encoding);

var storedString = File.ReadAllText(filePath, encoding);
var storedBytes = File.ReadAllBytes(filePath);

Assert.AreEqual(expectedString, storedString); // C# is cleaver enough to ignore BOM when reading
Assert.AreNotEqual(expectedBytes, storedBytes); // Not equal because of BOM stored by default
}

[Test]
public static void Utf8_BOM_Included()
{
var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true);
var filePath = Path.GetTempFileName();
var expectedString = "abc";
var expectedBytes = encoding.GetBytes(expectedString);
WriteFile(filePath, expectedString, encoding);

var storedString = File.ReadAllText(filePath, encoding);
var storedBytes = File.ReadAllBytes(filePath);

Assert.AreEqual(expectedString, storedString); // C# is cleaver enough to ignore BOM when reading
Assert.AreNotEqual(expectedBytes, storedBytes); // Not equal because of BOM stored explicitly
}

[Test]
public static void Utf8_BOM_Excluded()
{
var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
var filePath = Path.GetTempFileName();
var expectedString = "abc";
var expectedBytes = encoding.GetBytes(expectedString);
WriteFile(filePath, expectedString, encoding);

var storedString = File.ReadAllText(filePath, encoding);
var storedBytes = File.ReadAllBytes(filePath);

Assert.AreEqual(expectedString, storedString);
Assert.AreEqual(expectedBytes, storedBytes);
}

private static void WriteFile(string filePath, string content, Encoding encoding)
{
using (StreamWriter writer = new StreamWriter(filePath, false, encoding))
{
writer.Write(content);
}
}
}

0 comments on commit 2e03e72

Please sign in to comment.