Skip to content

Commit

Permalink
make use of the caching mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
keewis committed Aug 1, 2023
1 parent ffaf1d5 commit 3ab7802
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 17 deletions.
45 changes: 28 additions & 17 deletions ceos_alos2/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,37 @@ def read_summary(mapper, path):
return parse_summary(bytes_.decode())


def read_image(fs, path, chunks):
def read_image(mapper, path, chunks, *, use_cache=True, create_cache=False):
dims = ["rows", "columns"]
chunksizes = tuple(chunks.get(dim, -1) for dim in dims)

with fs.open(path, mode="rb") as f:
header, metadata = sar_image.read_metadata(f, chunksizes[0])
try:
fs = mapper.dirfs
except AttributeError:
fs = DirFileSystem(fs=mapper.fs, path=mapper.root)

byte_ranges = [(m["data"]["start"], m["data"]["stop"]) for m in metadata]
type_code = sar_image.extract_format_type(header)
parser = curry(sar_image.parse_data, type_code=type_code)
try:
if not use_cache:
# don't use the cache
raise sar_image.CachingError()
metadata = sar_image.read_cache(mapper, path)
except sar_image.CachingError:
with fs.open(path, mode="rb") as f:
metadata = sar_image.read_metadata(f, chunksizes[0])
if create_cache:
sar_image.create_cache(mapper, path, metadata)

parser = curry(sar_image.parse_data, type_code=metadata["type_code"])

dtype = sar_image.dtypes.get(metadata["type_code"])
if dtype is None:
raise ValueError(f"unknown type code: {metadata['type_code']}")

shape = sar_image.extract_shape(header)
dtype = sar_image.extract_dtype(header)
image_data = Array(
fs=fs,
url=path,
byte_ranges=byte_ranges,
shape=shape,
byte_ranges=metadata["byte_ranges"],
shape=metadata["shape"],
dtype=dtype,
parse_bytes=parser,
chunks=chunksizes,
Expand All @@ -53,23 +66,20 @@ def read_image(fs, path, chunks):
# - group attrs
# - coords
# - image variable attrs
header_attrs = sar_image.extract_attrs(header)
coords, attrs = sar_image.transform_metadata(metadata)
image_variable = (dims, image_data, {})

raw_variables = coords | {"data": image_variable}
raw_variables = metadata["variables"] | {"data": image_variable}
variables = {name: Variable(*var) for name, var in raw_variables.items()}

group_name = sar_image.filename_to_groupname(path)

group_attrs = attrs | header_attrs | {"coordinates": list(coords)}
group_attrs = metadata["attrs"]

return Group(path=group_name, data=variables, attrs=group_attrs, url=None)


def open(path, chunks=None, storage_options={}):
def open(path, chunks=None, *, storage_options={}, create_cache=False, use_cache=True):
mapper = fsspec.get_mapper(path, **storage_options)
dirfs = DirFileSystem(fs=mapper.fs, path=mapper.root)

# the default is to read 1024 records at once
if chunks is None:
Expand All @@ -85,7 +95,8 @@ def open(path, chunks=None, storage_options={}):
# read sar leader
# read actual imagery
imagery_groups = [
read_image(dirfs, path, chunks, **storage_options) for path in filenames["sar_imagery"]
read_image(mapper, path, chunks, create_cache=create_cache, use_cache=use_cache)
for path in filenames["sar_imagery"]
]
imagery = Group(
"/imagery", url=mapper.root, data={group.name: group for group in imagery_groups}, attrs={}
Expand Down
5 changes: 5 additions & 0 deletions ceos_alos2/sar_image/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
from tlz.itertoolz import partition_all

from ceos_alos2.common import record_preamble
from ceos_alos2.sar_image.caching import ( # noqa: F401
CachingError,
create_cache,
read_cache,
)
from ceos_alos2.sar_image.file_descriptor import file_descriptor_record
from ceos_alos2.sar_image.metadata import transform # noqa: F401
from ceos_alos2.sar_image.processed_data import processed_data_record
Expand Down

0 comments on commit 3ab7802

Please sign in to comment.