Skip to content

Commit a0a969b

Browse files
committed
Initial tests.
1 parent 3c50da4 commit a0a969b

File tree

2 files changed

+120
-1
lines changed

2 files changed

+120
-1
lines changed

lib/iris/fileformats/cf.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,14 @@ def string_from_1d_bytearray(array, encoding):
836836
bytelist = [b"\0" if byte == b"" else byte for byte in array]
837837
bytes = b"".join(bytelist)
838838
assert len(bytes) == array.shape[0]
839-
string = bytes.decode(encoding=encoding)
839+
try:
840+
string = bytes.decode(encoding=encoding)
841+
except UnicodeDecodeError:
842+
# if encoding == "ascii":
843+
# print("\n\n*** FIX !!")
844+
# string = bytes.decode("utf-8")
845+
# else:
846+
raise
840847
result = string.strip()
841848
return result
842849

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import netCDF4 as nc
2+
import numpy as np
3+
import pytest
4+
5+
import iris
6+
7+
NX, N_STRLEN = 3, 64
8+
TEST_STRINGS = ["Münster", "London", "Amsterdam"]
9+
TEST_COORD_VALS = ["bun", "éclair", "sandwich"]
10+
11+
12+
def convert_chararray(string_array_1d, maxlen, encoding="utf-8"):
13+
bbytes = [text.encode(encoding) for text in string_array_1d]
14+
pad = b"\0" * maxlen
15+
bbytes = [(x + pad)[:maxlen] for x in bbytes]
16+
chararray = np.array([[bb[i : i + 1] for i in range(maxlen)] for bb in bbytes])
17+
return chararray
18+
19+
20+
INCLUDE_COORD = True
21+
# INCLUDE_COORD = False
22+
23+
24+
def make_testfile(filepath, chararray, coordarray, encoding_str=None):
25+
with nc.Dataset(filepath, "w") as ds:
26+
ds.createDimension("x", NX)
27+
ds.createDimension("nstr", N_STRLEN)
28+
vx = ds.createVariable("x", int, dimensions=("x"))
29+
vx[:] = np.arange(NX)
30+
if INCLUDE_COORD:
31+
ds.createDimension("nstr2", N_STRLEN)
32+
v_co = ds.createVariable(
33+
"v_co",
34+
"S1",
35+
dimensions=(
36+
"x",
37+
"nstr2",
38+
),
39+
)
40+
v_co[:] = coordarray
41+
if encoding_str is not None:
42+
v_co._Encoding = encoding_str
43+
v = ds.createVariable(
44+
"v",
45+
"S1",
46+
dimensions=(
47+
"x",
48+
"nstr",
49+
),
50+
)
51+
v[:] = chararray
52+
if encoding_str is not None:
53+
v._Encoding = encoding_str
54+
if INCLUDE_COORD:
55+
v.coordinates = "v_co"
56+
57+
58+
def show_result(filepath):
59+
from pp_utils import ncdump
60+
61+
print(f"File {filepath}")
62+
print("NCDUMP:")
63+
ncdump(filepath, "")
64+
# with nc.Dataset(filepath, "r") as ds:
65+
# v = ds.variables["v"]
66+
# print("\n----\nNetcdf data readback (basic)")
67+
# try:
68+
# print(repr(v[:]))
69+
# except UnicodeDecodeError as err:
70+
# print(repr(err))
71+
# print("..raw:")
72+
# v.set_auto_chartostring(False)
73+
# print(repr(v[:]))
74+
print("\nAs iris cube..")
75+
try:
76+
cube = iris.load_cube(filepath)
77+
print(cube)
78+
if iris.loading.LOAD_PROBLEMS._problems:
79+
print(iris.loading.LOAD_PROBLEMS)
80+
print(
81+
"\n".join(iris.loading.LOAD_PROBLEMS._problems[0].stack_trace.format())
82+
)
83+
print("-data-")
84+
print(repr(cube.data))
85+
if INCLUDE_COORD:
86+
print("-coord data-")
87+
try:
88+
print(repr(cube.coord("v_co").points))
89+
except Exception as err2:
90+
print(repr(err2))
91+
except UnicodeDecodeError as err:
92+
print(repr(err))
93+
94+
95+
# tsts = (None, "ascii", "utf-8", "utf-32",)
96+
# tsts = ("utf-8",)
97+
# tsts = ("utf-8", "utf-32",)
98+
# tsts = ("utf-32",)
99+
tsts = ("utf-8", "ascii", "utf-8")
100+
101+
102+
@pytest.mark.parametrize("encoding", tsts)
103+
def test_encodings(encoding):
104+
print(f"\n=========\nTesting encoding: {encoding}")
105+
filepath = f"tmp_{str(encoding)}.nc"
106+
do_as = encoding
107+
if encoding != "utf-32":
108+
do_as = "utf-8"
109+
TEST_CHARARRAY = convert_chararray(TEST_STRINGS, N_STRLEN, encoding=do_as)
110+
TEST_COORDARRAY = convert_chararray(TEST_COORD_VALS, N_STRLEN, encoding=do_as)
111+
make_testfile(filepath, TEST_CHARARRAY, TEST_COORDARRAY, encoding_str=encoding)
112+
show_result(filepath)

0 commit comments

Comments
 (0)