Skip to content

Commit aa7903e

Browse files
committed
Add StyleHint::Common, refactor BaseKind
1 parent 1756162 commit aa7903e

File tree

13 files changed

+222
-58
lines changed

13 files changed

+222
-58
lines changed

doodle-formats/src/format/opentype.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5590,7 +5590,7 @@ pub(crate) mod alt {
55905590
"axis_value_offsets",
55915591
with_view(
55925592
ViewExpr::var("axis_value_scope"),
5593-
read_array(count, BaseKind::U16),
5593+
read_array(count, BaseKind::U16BE),
55945594
),
55955595
), // TODO - ForEach(offset: u16) -> offsetu16(offset, axis_value_table)
55965596
]),

src/alt.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ use std::{
77
};
88

99
use crate::{
10-
BaseKind, BaseType, ByteSet, DynFormat, Expr, Format, FormatModule, FormatRef, IntoLabel,
11-
Label, Pattern, StyleHint, TypeScope, ValueKind, ValueType, ViewExpr,
10+
BaseKind, BaseType, ByteSet, DynFormat, Endian, Expr, Format, FormatModule, FormatRef,
11+
IntoLabel, Label, Pattern, StyleHint, TypeScope, ValueKind, ValueType, ViewExpr,
1212
typecheck::UnificationError, valuetype::Container,
1313
};
1414
use anyhow::{Result as AResult, anyhow};
@@ -129,7 +129,7 @@ pub enum ViewFormatExt {
129129
/// Captures a byte-slice of a View, given an expression for the byte-length of the slice
130130
CaptureBytes(Box<Expr>),
131131
/// Captures a scoped ReadArray of the given unit, given an expression for element-count (*NOT* byte-length)
132-
ReadArray(Box<Expr>, BaseKind),
132+
ReadArray(Box<Expr>, BaseKind<Endian>),
133133
/// Constructs a View-object in the value layer
134134
ReifyView,
135135
}

src/codegen/mod.rs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ use resolve::Resolvable;
1212
pub use rust_ast::ToFragment;
1313

1414
use crate::{
15-
Arith, BaseType, DynFormat, Expr, Format, FormatModule, IntRel, IntoLabel, Label, MatchTree,
16-
Pattern, StyleHint, UnaryOp, ViewExpr, ViewFormat,
15+
Arith, BaseKind, BaseType, CommonOp, DynFormat, Endian, Expr, Format, FormatModule, IntRel,
16+
IntoLabel, Label, MatchTree, Pattern, StyleHint, UnaryOp, ViewExpr, ViewFormat,
1717
byte_set::ByteSet,
1818
decoder::extract_pair,
1919
parser::error::TraceHash,
@@ -2702,7 +2702,7 @@ impl ToAst for SimpleLogic<GTExpr> {
27022702
enum ViewLogic<ExprT> {
27032703
LetView(Label, Box<CaseLogic<ExprT>>),
27042704
CaptureBytes(RustExpr, RustExpr),
2705-
ReadArray(RustExpr, RustExpr, crate::BaseKind),
2705+
ReadArray(RustExpr, RustExpr, BaseKind<Endian>),
27062706
ReifyView(RustExpr),
27072707
}
27082708

@@ -3293,11 +3293,14 @@ where
32933293
}
32943294
OtherLogic::Hint(_hint, inner) => {
32953295
let inner_block = inner.to_ast(ctxt);
3296-
32973296
match _hint {
32983297
// REVIEW - do we want to perform any local modifications?
32993298
StyleHint::Record { .. } => inner_block,
33003299
StyleHint::AsciiStr => inner_block,
3300+
StyleHint::Common(CommonOp::EndianParse(_kind_endian)) => {
3301+
// REVIEW - do we want to swap-in particular endian parses instead?
3302+
inner_block
3303+
}
33013304
}
33023305
}
33033306
}
@@ -4401,7 +4404,25 @@ impl<'a> Elaborator<'a> {
44014404
}
44024405
}
44034406
}
4404-
StyleHint::AsciiStr => (),
4407+
StyleHint::AsciiStr => {
4408+
// REVIEW - should we check for Seq(u8)-like types?
4409+
}
4410+
StyleHint::Common(common_op) => match common_op {
4411+
CommonOp::EndianParse(base_kind) => {
4412+
// double-check the base kind against the type
4413+
let ty = gt.to_rust_type();
4414+
let prim1 = PrimType::from(BaseType::from(*base_kind));
4415+
let Some(prim0) = ty.try_as_prim() else {
4416+
unreachable!(
4417+
"found non-primitive type for common format elaboration: {ty:?} @ {index} (expected {prim1:?})"
4418+
);
4419+
};
4420+
assert_eq!(
4421+
prim0, prim1,
4422+
"CommonOp: actual inner-parse type ({prim0:?}) does not match claimed type ({prim1:?})"
4423+
);
4424+
}
4425+
},
44054426
}
44064427
TypedFormat::Hint(gt, style_hint.clone(), Box::new(t_inner))
44074428
}

src/codegen/model.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::{BaseKind, Label};
1+
use crate::{BaseKind, Endian, Label};
22

33
use super::rust_ast::*;
44
use super::{GenBlock, GenExpr, GenStmt};
@@ -343,13 +343,16 @@ pub fn read_from_view(view: RustExpr, len: RustExpr) -> RustExpr {
343343
}
344344

345345
/// Model RustExpr for handling `ViewFormat::ReadArray(len, kind)` in the Parser (View) model.
346-
pub fn read_array_from_view(view: RustExpr, len: RustExpr, kind: BaseKind) -> RustExpr {
346+
pub fn read_array_from_view(view: RustExpr, len: RustExpr, kind: BaseKind<Endian>) -> RustExpr {
347347
// NOTE - we need these separate methods because RustExpr::MethodCall doesn't allow turbo-fish type-parameters
348348
match kind {
349349
BaseKind::U8 => try_call!(view, read_array_u8, len),
350-
BaseKind::U16 => try_call!(view, read_array_u16be, len),
351-
BaseKind::U32 => try_call!(view, read_array_u32be, len),
352-
BaseKind::U64 => try_call!(view, read_array_u64be, len),
350+
BaseKind::U16BE => try_call!(view, read_array_u16be, len),
351+
BaseKind::U32BE => try_call!(view, read_array_u32be, len),
352+
BaseKind::U64BE => try_call!(view, read_array_u64be, len),
353+
BaseKind::U16LE | BaseKind::U32LE | BaseKind::U64LE => {
354+
unimplemented!("little-endian read-array parses not yet implemented")
355+
}
353356
}
354357
}
355358

src/codegen/typed_decoder.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use crate::byte_set::ByteSet;
2-
use crate::{BaseKind, Format, FormatModule, Label, MatchTree, MaybeTyped, Next, StyleHint};
2+
use crate::{
3+
BaseKind, Endian, Format, FormatModule, Label, MatchTree, MaybeTyped, Next, StyleHint,
4+
};
35
use anyhow::{Result as AResult, anyhow};
46
use std::borrow::Cow;
57
use std::collections::HashMap;
@@ -231,7 +233,7 @@ pub(crate) enum TypedDecoder<TypeRep> {
231233
TypeRep,
232234
TypedViewExpr<TypeRep>,
233235
Box<TypedExpr<TypeRep>>,
234-
BaseKind,
236+
BaseKind<Endian>,
235237
),
236238
ReifyView(TypeRep, TypedViewExpr<TypeRep>),
237239
}

src/codegen/typed_format.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use super::{AtomType, LocalType};
77
use crate::bounds::Bounds;
88
use crate::byte_set::ByteSet;
99
use crate::codegen::rust_ast::{RustLt, RustParams, UseParams};
10-
use crate::{Arith, BaseKind, IntRel, Label, StyleHint, TypeHint, UnaryOp};
10+
use crate::{Arith, BaseKind, Endian, IntRel, Label, StyleHint, TypeHint, UnaryOp};
1111

1212
pub(crate) mod variables;
1313

@@ -538,7 +538,7 @@ impl<TypeRep> std::hash::Hash for TypedDynFormat<TypeRep> {
538538
#[derive(Clone, Debug, PartialEq, Eq)]
539539
pub enum TypedViewFormat<TypeRep> {
540540
CaptureBytes(Box<TypedExpr<TypeRep>>),
541-
ReadArray(Box<TypedExpr<TypeRep>>, BaseKind),
541+
ReadArray(Box<TypedExpr<TypeRep>>, BaseKind<Endian>),
542542
ReifyView,
543543
}
544544

src/decoder.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
Arith, DynFormat, Expr, Format, FormatModule, IntRel, MatchTree, Next, TypeScope, ValueType,
66
ViewExpr, pattern::Pattern,
77
};
8-
use crate::{BaseKind, IntoLabel, Label, MaybeTyped, TypeHint, UnaryOp, ViewFormat};
8+
use crate::{BaseKind, Endian, IntoLabel, Label, MaybeTyped, TypeHint, UnaryOp, ViewFormat};
99
use anyhow::{Result as AResult, anyhow};
1010
use serde::Serialize;
1111
use std::borrow::Cow;
@@ -965,7 +965,7 @@ pub enum Decoder {
965965
LiftedOption(Option<Box<Decoder>>),
966966
LetView(Label, Box<Decoder>),
967967
CaptureBytes(ViewExpr, Box<Expr>),
968-
ReadArray(ViewExpr, Box<Expr>, BaseKind),
968+
ReadArray(ViewExpr, Box<Expr>, BaseKind<Endian>),
969969
ReifyView(ViewExpr),
970970
}
971971

@@ -1959,32 +1959,38 @@ impl Decoder {
19591959
}
19601960
}
19611961

1962-
fn read_base(buf: ReadCtxt<'_>, kind: BaseKind) -> Result<(Value, ReadCtxt<'_>), DecodeError> {
1962+
fn read_base(
1963+
buf: ReadCtxt<'_>,
1964+
kind: BaseKind<Endian>,
1965+
) -> Result<(Value, ReadCtxt<'_>), DecodeError> {
19631966
match kind {
19641967
BaseKind::U8 => {
19651968
let Some((byte, new_buf)) = buf.read_byte() else {
19661969
return Err(DecodeError::overbyte(buf.offset));
19671970
};
19681971
Ok((Value::U8(byte), new_buf))
19691972
}
1970-
BaseKind::U16 => {
1973+
BaseKind::U16BE => {
19711974
let Some((val, new_buf)) = buf.read_u16be() else {
19721975
return Err(DecodeError::overrun(kind.size(), buf.offset));
19731976
};
19741977
Ok((Value::U16(val), new_buf))
19751978
}
1976-
BaseKind::U32 => {
1979+
BaseKind::U32BE => {
19771980
let Some((val, new_buf)) = buf.read_u32be() else {
19781981
return Err(DecodeError::overrun(kind.size(), buf.offset));
19791982
};
19801983
Ok((Value::U32(val), new_buf))
19811984
}
1982-
BaseKind::U64 => {
1985+
BaseKind::U64BE => {
19831986
let Some((val, new_buf)) = buf.read_u64be() else {
19841987
return Err(DecodeError::overrun(kind.size(), buf.offset));
19851988
};
19861989
Ok((Value::U64(val), new_buf))
19871990
}
1991+
BaseKind::U16LE | BaseKind::U32LE | BaseKind::U64LE => {
1992+
unimplemented!("little-endian read-base parses not yet implemented")
1993+
}
19881994
}
19891995
}
19901996

src/helper.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ use std::collections::BTreeSet;
22

33
use num_traits::{ToPrimitive, Zero};
44

5-
use crate::bounds::Bounds;
65
use crate::byte_set::ByteSet;
76
pub use crate::marker::BaseKind;
87
use crate::{
98
Arith, BaseType, Expr, Format, IntRel, IntoLabel, Label, Pattern, StyleHint, TypeHint, UnaryOp,
109
ValueType, ViewExpr, ViewFormat,
1110
};
11+
use crate::{Endian, bounds::Bounds};
1212

1313
#[derive(Debug, Clone, Copy, PartialEq)]
1414
pub enum BitFieldKind {
@@ -1478,6 +1478,44 @@ pub fn capture_bytes(len: Expr) -> ViewFormat {
14781478
}
14791479

14801480
/// Helper for [`ViewFormat::ReadArray`]
1481-
pub fn read_array(len: Expr, kind: BaseKind) -> ViewFormat {
1481+
pub fn read_array(len: Expr, kind: BaseKind<Endian>) -> ViewFormat {
14821482
ViewFormat::ReadArray(Box::new(len), kind)
14831483
}
1484+
1485+
pub mod base {
1486+
use super::*;
1487+
use crate::CommonOp;
1488+
1489+
macro_rules! endian {
1490+
( $( $fname:ident, $kind_endian:ident, $size:expr, $op:ident );* $(;)? ) => {
1491+
$(
1492+
pub fn $fname() -> Format {
1493+
Format::Hint(
1494+
StyleHint::Common(CommonOp::EndianParse(BaseKind::$kind_endian)),
1495+
Box::new(map(
1496+
tuple_repeat($size, Format::ANY_BYTE),
1497+
lambda("x", Expr::$op(Box::new(var("x")))),
1498+
))
1499+
)
1500+
}
1501+
)*
1502+
};
1503+
}
1504+
1505+
pub fn u8() -> Format {
1506+
Format::Hint(
1507+
StyleHint::Common(CommonOp::EndianParse(BaseKind::U8)),
1508+
Box::new(Format::ANY_BYTE),
1509+
)
1510+
}
1511+
1512+
endian! {
1513+
u16be, U16BE, 2, U16Be;
1514+
u16le, U16LE, 2, U16Le;
1515+
u32be, U32BE, 4, U32Be;
1516+
u32le, U32LE, 4, U32Le;
1517+
u64be, U64BE, 8, U64Be;
1518+
u64le, U64LE, 8, U64Le;
1519+
}
1520+
}
1521+
pub use base::*;

src/lib.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub mod error;
2424
pub mod helper;
2525
pub mod loc_decoder;
2626
pub mod marker;
27-
pub use marker::BaseKind;
27+
pub use marker::{BaseKind, Endian};
2828
pub mod output;
2929
pub mod parser;
3030
mod precedence;
@@ -690,7 +690,7 @@ pub enum ViewFormat {
690690
/// CaptureBytes(N): captures a slice of N bytes from the start of the View
691691
CaptureBytes(Box<Expr>),
692692
/// ReadArray(M, Kind): captures an array of M elements of the indicate Kind
693-
ReadArray(Box<Expr>, BaseKind),
693+
ReadArray(Box<Expr>, BaseKind<Endian>),
694694
/// ReifyView: produces a value-element that encapsulates the View-object
695695
ReifyView,
696696
}
@@ -758,6 +758,15 @@ impl ViewExpr {
758758
}
759759
}
760760

761+
/// Operations we want to treat as semi-first-class in downstream processing,
762+
/// without forcing us to add new primitives into the Format layer.
763+
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
764+
#[serde(tag = "tag", content = "args")]
765+
pub enum CommonOp {
766+
EndianParse(BaseKind<Endian>),
767+
}
768+
769+
/// The input is a UTF-8 encoded string, and the output is a UTF-8 encoded string
761770
// NOTE - as currently defined, StyleHint could easily be Copy, but it would be a breaking change if we later had to remove that trait
762771
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
763772
#[serde(tag = "tag", content = "args")]
@@ -768,6 +777,7 @@ pub enum StyleHint {
768777
old_style: bool,
769778
},
770779
AsciiStr,
780+
Common(CommonOp),
771781
}
772782

773783
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]

src/loc_decoder.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ use crate::decoder::{
88
};
99
use crate::error::{DecodeError, LocDecodeError};
1010
use crate::read::ReadCtxt;
11-
use crate::{Arith, BaseKind, DynFormat, Expr, Format, IntRel, Label, Pattern, UnaryOp, ViewExpr};
11+
use crate::{
12+
Arith, BaseKind, DynFormat, Endian, Expr, Format, IntRel, Label, Pattern, UnaryOp, ViewExpr,
13+
};
1214
use std::borrow::Cow;
1315
use std::cmp::Ordering;
1416

@@ -1859,7 +1861,7 @@ impl Decoder {
18591861

18601862
fn read_base(
18611863
buf: ReadCtxt<'_>,
1862-
kind: BaseKind,
1864+
kind: BaseKind<Endian>,
18631865
) -> Result<(ParsedValue, ReadCtxt<'_>), DecodeError<ParsedValue>> {
18641866
let (val, new_buf) = match kind {
18651867
BaseKind::U8 => {
@@ -1868,24 +1870,27 @@ fn read_base(
18681870
};
18691871
(Value::U8(byte), new_buf)
18701872
}
1871-
BaseKind::U16 => {
1873+
BaseKind::U16BE => {
18721874
let Some((val, new_buf)) = buf.read_u16be() else {
18731875
return Err(DecodeError::overrun(kind.size(), buf.offset));
18741876
};
18751877
(Value::U16(val), new_buf)
18761878
}
1877-
BaseKind::U32 => {
1879+
BaseKind::U32BE => {
18781880
let Some((val, new_buf)) = buf.read_u32be() else {
18791881
return Err(DecodeError::overrun(kind.size(), buf.offset));
18801882
};
18811883
(Value::U32(val), new_buf)
18821884
}
1883-
BaseKind::U64 => {
1885+
BaseKind::U64BE => {
18841886
let Some((val, new_buf)) = buf.read_u64be() else {
18851887
return Err(DecodeError::overrun(kind.size(), buf.offset));
18861888
};
18871889
(Value::U64(val), new_buf)
18881890
}
1891+
BaseKind::U16LE | BaseKind::U32LE | BaseKind::U64LE => {
1892+
unimplemented!("little-endian read-base parses not yet implemented")
1893+
}
18891894
};
18901895
Ok((ParsedValue::new_flat(val, buf.offset, kind.size()), new_buf))
18911896
}

0 commit comments

Comments
 (0)