Skip to content

Commit

Permalink
Merge pull request #1556 from silx-kit/dtype-h5wasm
Browse files Browse the repository at this point in the history
Improve dtype parsing with h5wasm
  • Loading branch information
axelboc authored Jan 29, 2024
2 parents 1bdd035 + 406fa47 commit a4cc8de
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 160 deletions.
28 changes: 20 additions & 8 deletions packages/h5wasm/src/__snapshots__/h5wasm-api.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,8 @@ exports[`test file matches snapshot 1`] = `
},
"shape": [],
"type": {
"class": "Unknown",
"class": "Opaque",
"tag": "",
},
"value": Uint8Array [
0,
Expand All @@ -897,7 +898,8 @@ exports[`test file matches snapshot 1`] = `
},
"shape": [],
"type": {
"class": "Unknown",
"class": "Opaque",
"tag": "",
},
"value": Uint8Array [
150,
Expand All @@ -923,7 +925,8 @@ exports[`test file matches snapshot 1`] = `
},
"shape": [],
"type": {
"class": "Unknown",
"class": "Opaque",
"tag": "",
},
"value": Uint8Array [
0,
Expand Down Expand Up @@ -1724,7 +1727,10 @@ exports[`test file matches snapshot 1`] = `
],
},
"vlen": {
"class": "Unknown",
"base": {
"class": "Unknown",
},
"class": "Array (variable length)",
},
},
},
Expand Down Expand Up @@ -1792,7 +1798,7 @@ exports[`test file matches snapshot 1`] = `
},
"shape": [],
"type": {
"class": "Unknown",
"class": "Reference",
},
"value": Uint8Array [
214,
Expand All @@ -1818,7 +1824,7 @@ exports[`test file matches snapshot 1`] = `
},
"shape": [],
"type": {
"class": "Unknown",
"class": "Reference",
},
"value": Uint8Array [
34,
Expand Down Expand Up @@ -2028,7 +2034,10 @@ exports[`test file matches snapshot 1`] = `
},
"shape": [],
"type": {
"class": "Unknown",
"base": {
"class": "Unknown",
},
"class": "Array (variable length)",
},
"value": Uint8Array [
2,
Expand Down Expand Up @@ -2056,7 +2065,10 @@ exports[`test file matches snapshot 1`] = `
3,
],
"type": {
"class": "Unknown",
"base": {
"class": "Unknown",
},
"class": "Array (variable length)",
},
"value": Uint8Array [
1,
Expand Down
63 changes: 4 additions & 59 deletions packages/h5wasm/src/guards.ts
Original file line number Diff line number Diff line change
@@ -1,73 +1,18 @@
import { isCompoundType } from '@h5web/shared/guards';
import type { Dataset, DType } from '@h5web/shared/hdf5-models';
import { DTypeClass } from '@h5web/shared/hdf5-models';
import type { Metadata } from 'h5wasm';
import { Dataset as H5WasmDataset } from 'h5wasm';

import type {
CompoundMetadata,
EnumMetadata,
H5WasmEntity,
NumericMetadata,
} from './models';
import type { H5WasmEntity } from './models';

export function assertH5WasmDataset(
entity: H5WasmEntity,
): asserts entity is H5WasmDataset {
if (!(entity instanceof H5WasmDataset)) {
h5wEntity: NonNullable<H5WasmEntity>,
): asserts h5wEntity is H5WasmDataset {
if (!(h5wEntity instanceof H5WasmDataset)) {
throw new TypeError('Expected H5Wasm entity to be dataset');
}
}

// See H5T_class_t in https://github.com/usnistgov/h5wasm/blob/main/src/hdf5_util_helpers.d.ts
export function isIntegerMetadata(metadata: Metadata) {
return metadata.type === 0;
}

export function isFloatMetadata(metadata: Metadata) {
return metadata.type === 1;
}

export function isNumericMetadata(
metadata: Metadata,
): metadata is NumericMetadata {
return isIntegerMetadata(metadata) || isFloatMetadata(metadata);
}

export function isStringMetadata(metadata: Metadata) {
return metadata.type === 3;
}

export function isArrayMetadata(metadata: Metadata) {
return metadata.type === 10;
}

export function isCompoundMetadata(
metadata: Metadata,
): metadata is CompoundMetadata {
return metadata.type === 6;
}

export function isEnumMetadata(metadata: Metadata): metadata is EnumMetadata {
return metadata.type === 8;
}

export function assertCompoundMetadata(
metadata: Metadata,
): asserts metadata is CompoundMetadata {
if (!isCompoundMetadata(metadata)) {
throw new Error('Expected H5Wasm compound metadata');
}
}

export function assertNumericMetadata(
metadata: Metadata,
): asserts metadata is NumericMetadata {
if (!isNumericMetadata(metadata)) {
throw new Error('Expected H5Wasm numeric metadata');
}
}

function isInt64Type(type: DType): boolean {
return (
(type.class === DTypeClass.Integer || type.class === DTypeClass.Unsigned) &&
Expand Down
19 changes: 1 addition & 18 deletions packages/h5wasm/src/models.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,5 @@
import type {
CompoundTypeMetadata,
EnumTypeMetadata,
Group as H5WasmGroup,
Metadata,
} from 'h5wasm';
import type { Group as H5WasmGroup } from 'h5wasm';

export type H5WasmEntity = ReturnType<H5WasmGroup['get']>;

export type H5WasmAttributes = H5WasmGroup['attrs'];

export interface CompoundMetadata extends Metadata {
compound_type: CompoundTypeMetadata;
}

export interface NumericMetadata extends Metadata {
type: 0 | 1;
}

export interface EnumMetadata extends Metadata {
enum_type: EnumTypeMetadata;
}
137 changes: 65 additions & 72 deletions packages/h5wasm/src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
import { assertDefined } from '@h5web/shared/guards';
import { assertDefined, isNumericType } from '@h5web/shared/guards';
import type {
Attribute,
ChildEntity,
DType,
Group,
NumericType,
ProvidedEntity,
Shape,
} from '@h5web/shared/hdf5-models';
import { Endianness, EntityKind } from '@h5web/shared/hdf5-models';
import { Endianness, EntityKind, H5TClass } from '@h5web/shared/hdf5-models';
import {
arrayType,
boolType,
bitfieldType,
buildEntityPath,
compoundType,
cplxType,
enumType,
compoundOrCplxType,
enumOrBoolType,
floatType,
intType,
isBoolEnumType,
intOrUintType,
opaqueType,
referenceType,
strType,
uintType,
timeType,
toCharSet,
unknownType,
} from '@h5web/shared/hdf5-utils';
import type { Metadata } from 'h5wasm';
Expand All @@ -32,17 +32,7 @@ import {
Group as H5WasmGroup,
} from 'h5wasm';

import {
assertNumericMetadata,
isArrayMetadata,
isCompoundMetadata,
isEnumMetadata,
isFloatMetadata,
isIntegerMetadata,
isNumericMetadata,
isStringMetadata,
} from './guards';
import type { H5WasmAttributes, H5WasmEntity, NumericMetadata } from './models';
import type { H5WasmAttributes, H5WasmEntity } from './models';

// https://github.com/h5wasm/h5wasm-plugins#included-plugins
// https://support.hdfgroup.org/services/contributions.html
Expand Down Expand Up @@ -162,83 +152,86 @@ function parseAttributes(h5wAttrs: H5WasmAttributes): Attribute[] {
});
}

export function parseDTypeFromNumericMetadata(
metadata: NumericMetadata,
): NumericType {
const { signed, size: length, littleEndian } = metadata;
const size = length * 8;
const endianness = littleEndian ? Endianness.LE : Endianness.BE;
export function parseDType(metadata: Metadata): DType {
const { type: h5tClass, size } = metadata;

if (isIntegerMetadata(metadata)) {
return signed ? intType(size, endianness) : uintType(size, endianness);
if (h5tClass === H5TClass.Integer) {
const { signed, littleEndian } = metadata;
return intOrUintType(signed, size * 8, toEndianness(littleEndian));
}

if (isFloatMetadata(metadata)) {
return floatType(size, endianness);
if (h5tClass === H5TClass.Float) {
const { littleEndian } = metadata;
return floatType(size * 8, toEndianness(littleEndian));
}

throw new Error('Expected numeric metadata');
}

export function parseDType(metadata: Metadata): DType {
if (isNumericMetadata(metadata)) {
return parseDTypeFromNumericMetadata(metadata);
if (h5tClass === H5TClass.Time) {
return timeType();
}

if (isStringMetadata(metadata)) {
const { size, cset, vlen } = metadata;

return strType(
cset === 1 ? 'UTF-8' : 'ASCII',
// For variable-length string datatypes, the returned value is the size of the pointer to the actual string and
// not the size of actual variable-length string data (https://portal.hdfgroup.org/display/HDF5/H5T_GET_SIZE)
vlen ? undefined : size,
);
if (h5tClass === H5TClass.String) {
const { cset, vlen } = metadata;
return strType(toCharSet(cset), vlen ? undefined : size);
}

if (isArrayMetadata(metadata)) {
const { array_type } = metadata;
assertDefined(array_type);
if (h5tClass === H5TClass.Bitfield) {
return bitfieldType();
}

return arrayType(parseDType(array_type), array_type.shape);
if (h5tClass === H5TClass.Opaque) {
return opaqueType();
}

if (isCompoundMetadata(metadata)) {
if (h5tClass === H5TClass.Compound) {
const { compound_type } = metadata;
const { members, nmembers } = compound_type;

if (nmembers === 2 && members[0].name === 'r' && members[1].name === 'i') {
const [realTypeMetadata, imagTypeMetadata] = members;
assertNumericMetadata(realTypeMetadata);
assertNumericMetadata(imagTypeMetadata);
assertDefined(compound_type);

return cplxType(
parseDTypeFromNumericMetadata(realTypeMetadata),
parseDTypeFromNumericMetadata(imagTypeMetadata),
);
}

return compoundType(
return compoundOrCplxType(
Object.fromEntries(
members.map((member) => [member.name, parseDType(member)]),
compound_type.members.map((member) => [
member.name,
parseDType(member),
]),
),
);
}

if (isEnumMetadata(metadata)) {
if (h5tClass === H5TClass.Reference) {
return referenceType();
}

if (h5tClass === H5TClass.Enum) {
const { enum_type } = metadata;
const { members: mapping, type: baseType } = enum_type;
assertDefined(enum_type);
const { members, type } = enum_type;

const baseMetadata = { ...metadata, type: baseType };
assertNumericMetadata(baseMetadata);
const baseType = parseDType({ ...metadata, type });
if (!isNumericType(baseType)) {
throw new Error('Expected enum type to have numeric base type');
}

const type = enumType(parseDTypeFromNumericMetadata(baseMetadata), mapping);
return isBoolEnumType(type) ? boolType() : type; // booleans stored as enums by h5py
return enumOrBoolType(baseType, members);
}

if (h5tClass === H5TClass.Vlen) {
// Not currently provided, so unable to know base type
// const { array_type } = metadata;
// assertDefined(array_type);
return arrayType(unknownType());
}

if (h5tClass === H5TClass.Array) {
const { array_type } = metadata;
assertDefined(array_type);
return arrayType(parseDType(array_type), array_type.shape);
}

return unknownType();
}

function toEndianness(littleEndian: boolean): Endianness {
return littleEndian ? Endianness.LE : Endianness.BE;
}

export function convertSelectionToRanges(
dataset: H5WasmDataset,
selection: string,
Expand Down
Loading

0 comments on commit a4cc8de

Please sign in to comment.