From 406fa470b064c48c074c5a7afcecbeebb67af6b1 Mon Sep 17 00:00:00 2001 From: Axel Bocciarelli Date: Mon, 29 Jan 2024 09:33:18 +0100 Subject: [PATCH] Improve dtype parsing with h5wasm --- .../src/__snapshots__/h5wasm-api.test.ts.snap | 28 +++- packages/h5wasm/src/guards.ts | 63 +------- packages/h5wasm/src/models.ts | 19 +-- packages/h5wasm/src/utils.ts | 137 +++++++++--------- packages/shared/src/hdf5-models.ts | 54 ++++++- packages/shared/src/hdf5-utils.ts | 64 +++++++- 6 files changed, 205 insertions(+), 160 deletions(-) diff --git a/packages/h5wasm/src/__snapshots__/h5wasm-api.test.ts.snap b/packages/h5wasm/src/__snapshots__/h5wasm-api.test.ts.snap index 20686f09e..16a48e77c 100644 --- a/packages/h5wasm/src/__snapshots__/h5wasm-api.test.ts.snap +++ b/packages/h5wasm/src/__snapshots__/h5wasm-api.test.ts.snap @@ -876,7 +876,8 @@ exports[`test file matches snapshot 1`] = ` }, "shape": [], "type": { - "class": "Unknown", + "class": "Opaque", + "tag": "", }, "value": Uint8Array [ 0, @@ -897,7 +898,8 @@ exports[`test file matches snapshot 1`] = ` }, "shape": [], "type": { - "class": "Unknown", + "class": "Opaque", + "tag": "", }, "value": Uint8Array [ 150, @@ -923,7 +925,8 @@ exports[`test file matches snapshot 1`] = ` }, "shape": [], "type": { - "class": "Unknown", + "class": "Opaque", + "tag": "", }, "value": Uint8Array [ 0, @@ -1724,7 +1727,10 @@ exports[`test file matches snapshot 1`] = ` ], }, "vlen": { - "class": "Unknown", + "base": { + "class": "Unknown", + }, + "class": "Array (variable length)", }, }, }, @@ -1792,7 +1798,7 @@ exports[`test file matches snapshot 1`] = ` }, "shape": [], "type": { - "class": "Unknown", + "class": "Reference", }, "value": Uint8Array [ 214, @@ -1818,7 +1824,7 @@ exports[`test file matches snapshot 1`] = ` }, "shape": [], "type": { - "class": "Unknown", + "class": "Reference", }, "value": Uint8Array [ 34, @@ -2028,7 +2034,10 @@ exports[`test file matches snapshot 1`] = ` }, "shape": [], "type": { - "class": "Unknown", + "base": { + "class": "Unknown", + }, + "class": "Array (variable length)", }, "value": Uint8Array [ 2, @@ -2056,7 +2065,10 @@ exports[`test file matches snapshot 1`] = ` 3, ], "type": { - "class": "Unknown", + "base": { + "class": "Unknown", + }, + "class": "Array (variable length)", }, "value": Uint8Array [ 1, diff --git a/packages/h5wasm/src/guards.ts b/packages/h5wasm/src/guards.ts index 17e20371a..74d4cce70 100644 --- a/packages/h5wasm/src/guards.ts +++ b/packages/h5wasm/src/guards.ts @@ -1,73 +1,18 @@ import { isCompoundType } from '@h5web/shared/guards'; import type { Dataset, DType } from '@h5web/shared/hdf5-models'; import { DTypeClass } from '@h5web/shared/hdf5-models'; -import type { Metadata } from 'h5wasm'; import { Dataset as H5WasmDataset } from 'h5wasm'; -import type { - CompoundMetadata, - EnumMetadata, - H5WasmEntity, - NumericMetadata, -} from './models'; +import type { H5WasmEntity } from './models'; export function assertH5WasmDataset( - entity: H5WasmEntity, -): asserts entity is H5WasmDataset { - if (!(entity instanceof H5WasmDataset)) { + h5wEntity: NonNullable, +): asserts h5wEntity is H5WasmDataset { + if (!(h5wEntity instanceof H5WasmDataset)) { throw new TypeError('Expected H5Wasm entity to be dataset'); } } -// See H5T_class_t in https://github.com/usnistgov/h5wasm/blob/main/src/hdf5_util_helpers.d.ts -export function isIntegerMetadata(metadata: Metadata) { - return metadata.type === 0; -} - -export function isFloatMetadata(metadata: Metadata) { - return metadata.type === 1; -} - -export function isNumericMetadata( - metadata: Metadata, -): metadata is NumericMetadata { - return isIntegerMetadata(metadata) || isFloatMetadata(metadata); -} - -export function isStringMetadata(metadata: Metadata) { - return metadata.type === 3; -} - -export function isArrayMetadata(metadata: Metadata) { - return metadata.type === 10; -} - -export function isCompoundMetadata( - metadata: Metadata, -): metadata is CompoundMetadata { - return metadata.type === 6; -} - -export function isEnumMetadata(metadata: Metadata): metadata is EnumMetadata { - return metadata.type === 8; -} - -export function assertCompoundMetadata( - metadata: Metadata, -): asserts metadata is CompoundMetadata { - if (!isCompoundMetadata(metadata)) { - throw new Error('Expected H5Wasm compound metadata'); - } -} - -export function assertNumericMetadata( - metadata: Metadata, -): asserts metadata is NumericMetadata { - if (!isNumericMetadata(metadata)) { - throw new Error('Expected H5Wasm numeric metadata'); - } -} - function isInt64Type(type: DType): boolean { return ( (type.class === DTypeClass.Integer || type.class === DTypeClass.Unsigned) && diff --git a/packages/h5wasm/src/models.ts b/packages/h5wasm/src/models.ts index 6e2ddd701..d6e6fe3e8 100644 --- a/packages/h5wasm/src/models.ts +++ b/packages/h5wasm/src/models.ts @@ -1,22 +1,5 @@ -import type { - CompoundTypeMetadata, - EnumTypeMetadata, - Group as H5WasmGroup, - Metadata, -} from 'h5wasm'; +import type { Group as H5WasmGroup } from 'h5wasm'; export type H5WasmEntity = ReturnType; export type H5WasmAttributes = H5WasmGroup['attrs']; - -export interface CompoundMetadata extends Metadata { - compound_type: CompoundTypeMetadata; -} - -export interface NumericMetadata extends Metadata { - type: 0 | 1; -} - -export interface EnumMetadata extends Metadata { - enum_type: EnumTypeMetadata; -} diff --git a/packages/h5wasm/src/utils.ts b/packages/h5wasm/src/utils.ts index 752abe520..b5bbbfa1b 100644 --- a/packages/h5wasm/src/utils.ts +++ b/packages/h5wasm/src/utils.ts @@ -1,26 +1,26 @@ -import { assertDefined } from '@h5web/shared/guards'; +import { assertDefined, isNumericType } from '@h5web/shared/guards'; import type { Attribute, ChildEntity, DType, Group, - NumericType, ProvidedEntity, Shape, } from '@h5web/shared/hdf5-models'; -import { Endianness, EntityKind } from '@h5web/shared/hdf5-models'; +import { Endianness, EntityKind, H5TClass } from '@h5web/shared/hdf5-models'; import { arrayType, - boolType, + bitfieldType, buildEntityPath, - compoundType, - cplxType, - enumType, + compoundOrCplxType, + enumOrBoolType, floatType, - intType, - isBoolEnumType, + intOrUintType, + opaqueType, + referenceType, strType, - uintType, + timeType, + toCharSet, unknownType, } from '@h5web/shared/hdf5-utils'; import type { Metadata } from 'h5wasm'; @@ -32,17 +32,7 @@ import { Group as H5WasmGroup, } from 'h5wasm'; -import { - assertNumericMetadata, - isArrayMetadata, - isCompoundMetadata, - isEnumMetadata, - isFloatMetadata, - isIntegerMetadata, - isNumericMetadata, - isStringMetadata, -} from './guards'; -import type { H5WasmAttributes, H5WasmEntity, NumericMetadata } from './models'; +import type { H5WasmAttributes, H5WasmEntity } from './models'; // https://github.com/h5wasm/h5wasm-plugins#included-plugins // https://support.hdfgroup.org/services/contributions.html @@ -162,83 +152,86 @@ function parseAttributes(h5wAttrs: H5WasmAttributes): Attribute[] { }); } -export function parseDTypeFromNumericMetadata( - metadata: NumericMetadata, -): NumericType { - const { signed, size: length, littleEndian } = metadata; - const size = length * 8; - const endianness = littleEndian ? Endianness.LE : Endianness.BE; +export function parseDType(metadata: Metadata): DType { + const { type: h5tClass, size } = metadata; - if (isIntegerMetadata(metadata)) { - return signed ? intType(size, endianness) : uintType(size, endianness); + if (h5tClass === H5TClass.Integer) { + const { signed, littleEndian } = metadata; + return intOrUintType(signed, size * 8, toEndianness(littleEndian)); } - - if (isFloatMetadata(metadata)) { - return floatType(size, endianness); + if (h5tClass === H5TClass.Float) { + const { littleEndian } = metadata; + return floatType(size * 8, toEndianness(littleEndian)); } - throw new Error('Expected numeric metadata'); -} - -export function parseDType(metadata: Metadata): DType { - if (isNumericMetadata(metadata)) { - return parseDTypeFromNumericMetadata(metadata); + if (h5tClass === H5TClass.Time) { + return timeType(); } - if (isStringMetadata(metadata)) { - const { size, cset, vlen } = metadata; - - return strType( - cset === 1 ? 'UTF-8' : 'ASCII', - // For variable-length string datatypes, the returned value is the size of the pointer to the actual string and - // not the size of actual variable-length string data (https://portal.hdfgroup.org/display/HDF5/H5T_GET_SIZE) - vlen ? undefined : size, - ); + if (h5tClass === H5TClass.String) { + const { cset, vlen } = metadata; + return strType(toCharSet(cset), vlen ? undefined : size); } - if (isArrayMetadata(metadata)) { - const { array_type } = metadata; - assertDefined(array_type); + if (h5tClass === H5TClass.Bitfield) { + return bitfieldType(); + } - return arrayType(parseDType(array_type), array_type.shape); + if (h5tClass === H5TClass.Opaque) { + return opaqueType(); } - if (isCompoundMetadata(metadata)) { + if (h5tClass === H5TClass.Compound) { const { compound_type } = metadata; - const { members, nmembers } = compound_type; - - if (nmembers === 2 && members[0].name === 'r' && members[1].name === 'i') { - const [realTypeMetadata, imagTypeMetadata] = members; - assertNumericMetadata(realTypeMetadata); - assertNumericMetadata(imagTypeMetadata); + assertDefined(compound_type); - return cplxType( - parseDTypeFromNumericMetadata(realTypeMetadata), - parseDTypeFromNumericMetadata(imagTypeMetadata), - ); - } - - return compoundType( + return compoundOrCplxType( Object.fromEntries( - members.map((member) => [member.name, parseDType(member)]), + compound_type.members.map((member) => [ + member.name, + parseDType(member), + ]), ), ); } - if (isEnumMetadata(metadata)) { + if (h5tClass === H5TClass.Reference) { + return referenceType(); + } + + if (h5tClass === H5TClass.Enum) { const { enum_type } = metadata; - const { members: mapping, type: baseType } = enum_type; + assertDefined(enum_type); + const { members, type } = enum_type; - const baseMetadata = { ...metadata, type: baseType }; - assertNumericMetadata(baseMetadata); + const baseType = parseDType({ ...metadata, type }); + if (!isNumericType(baseType)) { + throw new Error('Expected enum type to have numeric base type'); + } - const type = enumType(parseDTypeFromNumericMetadata(baseMetadata), mapping); - return isBoolEnumType(type) ? boolType() : type; // booleans stored as enums by h5py + return enumOrBoolType(baseType, members); + } + + if (h5tClass === H5TClass.Vlen) { + // Not currently provided, so unable to know base type + // const { array_type } = metadata; + // assertDefined(array_type); + return arrayType(unknownType()); + } + + if (h5tClass === H5TClass.Array) { + const { array_type } = metadata; + assertDefined(array_type); + return arrayType(parseDType(array_type), array_type.shape); } return unknownType(); } +function toEndianness(littleEndian: boolean): Endianness { + return littleEndian ? Endianness.LE : Endianness.BE; +} + export function convertSelectionToRanges( dataset: H5WasmDataset, selection: string, diff --git a/packages/shared/src/hdf5-models.ts b/packages/shared/src/hdf5-models.ts index 04dfed054..0a92b449b 100644 --- a/packages/shared/src/hdf5-models.ts +++ b/packages/shared/src/hdf5-models.ts @@ -90,6 +90,10 @@ export enum DTypeClass { Array = 'Array', VLen = 'Array (variable length)', Enum = 'Enumeration', + Time = 'Time', + Bitfield = 'Bitfield', + Opaque = 'Opaque', + Reference = 'Reference', Unknown = 'Unknown', } @@ -98,11 +102,17 @@ export enum Endianness { BE = 'big-endian', } +export type CharSet = 'UTF-8' | 'ASCII'; + export type DType = | PrintableType | CompoundType | ArrayType | EnumType + | TimeType + | BitfieldType + | OpaqueType + | ReferenceType | UnknownType; export type PrintableType = @@ -131,7 +141,7 @@ export interface ComplexType { export interface StringType { class: DTypeClass.String; - charSet: 'UTF-8' | 'ASCII'; + charSet: CharSet; length?: number; } @@ -156,6 +166,24 @@ export interface EnumType { mapping: Record; } +export interface TimeType { + class: DTypeClass.Time; +} + +export interface BitfieldType { + class: DTypeClass.Bitfield; + endianness?: Endianness; +} + +export interface ReferenceType { + class: DTypeClass.Reference; +} + +export interface OpaqueType { + class: DTypeClass.Opaque; + tag: string; +} + export interface UnknownType { class: DTypeClass.Unknown; } @@ -196,3 +224,27 @@ export interface Filter { id: number; name: string; } + +/* ------------------- */ +/* ---- H5T ENUMS ---- */ + +// https://docs.hdfgroup.org/hdf5/develop/_h5_tpublic_8h.html#title3 + +export enum H5TClass { + Integer = 0, + Float = 1, + Time = 2, + String = 3, + Bitfield = 4, + Opaque = 5, + Compound = 6, + Reference = 7, + Enum = 8, + Vlen = 9, + Array = 10, +} + +export enum H5TCharSet { + ASCII = 0, + UTF8 = 1, +} diff --git a/packages/shared/src/hdf5-utils.ts b/packages/shared/src/hdf5-utils.ts index 96f25b35e..705fedfd1 100644 --- a/packages/shared/src/hdf5-utils.ts +++ b/packages/shared/src/hdf5-utils.ts @@ -1,6 +1,9 @@ +import { isNumericType } from './guards'; import type { ArrayType, + BitfieldType, BooleanType, + CharSet, ChildEntity, ComplexType, CompoundType, @@ -9,11 +12,14 @@ import type { GroupWithChildren, H5WebComplex, NumericType, + OpaqueType, PrintableType, + ReferenceType, StringType, + TimeType, UnknownType, } from './hdf5-models'; -import { DTypeClass, Endianness } from './hdf5-models'; +import { DTypeClass, Endianness, H5TCharSet } from './hdf5-models'; export function getChildEntity( group: GroupWithChildren, @@ -41,12 +47,21 @@ export function uintType(size = 32, endianness = Endianness.LE): NumericType { return { class: DTypeClass.Unsigned, endianness, size }; } +export function intOrUintType( + isSigned: boolean, + size = 32, + endianness = Endianness.LE, +) { + const func = isSigned ? intType : uintType; + return func(size, endianness); +} + export function floatType(size = 32, endianness = Endianness.LE): NumericType { return { class: DTypeClass.Float, endianness, size }; } export function strType( - charSet: StringType['charSet'] = 'ASCII', + charSet: CharSet = 'ASCII', length?: number, ): StringType { return { @@ -77,6 +92,17 @@ export const printableCompoundType = compoundType< Record >; +export function compoundOrCplxType>( + fields: F, +): CompoundType | ComplexType { + const { r, i } = fields; + if (r && isNumericType(r) && i && isNumericType(i)) { + return cplxType(r, i); + } + + return compoundType(fields); +} + export function arrayType( baseType: T, dims?: number[], @@ -104,6 +130,33 @@ export function isBoolEnumType(type: EnumType): boolean { ); } +export function enumOrBoolType( + baseType: NumericType, + mapping: Record, +): EnumType | BooleanType { + if (mapping.FALSE === 0 && mapping.TRUE === 1) { + return boolType(); + } + + return enumType(baseType, mapping); +} + +export function timeType(): TimeType { + return { class: DTypeClass.Time }; +} + +export function bitfieldType(endianness = Endianness.LE): BitfieldType { + return { class: DTypeClass.Bitfield, endianness }; +} + +export function opaqueType(tag = ''): OpaqueType { + return { class: DTypeClass.Opaque, tag }; +} + +export function referenceType(): ReferenceType { + return { class: DTypeClass.Reference }; +} + export function unknownType(): UnknownType { return { class: DTypeClass.Unknown }; } @@ -114,3 +167,10 @@ export function unknownType(): UnknownType { export function cplx(real: number, imag: number): H5WebComplex { return [real, imag]; } + +/* ------------------------- */ +/* --- HDF5 ENUM HELPERS --- */ + +export function toCharSet(h5tCharSet: number): CharSet { + return h5tCharSet === H5TCharSet.ASCII ? 'ASCII' : 'UTF-8'; +}