[Cider 2] Memory data dump format & serialization/deserialization (#1988

) * bad placeholder * data dump and testing tweaks * Add some documentation and todos * rename the method to make clippy less angry * fun with memories and such
calyxir · Mar 27, 2024 · 6be8d86 · 6be8d86
1 parent 7a2a236
commit 6be8d86
Show file tree

Hide file tree

Showing 8 changed files with 409 additions and 6 deletions.
diff --git a/interp/src/flatten/mod.rs b/interp/src/flatten/mod.rs
@@ -1,6 +1,6 @@
 pub(crate) mod flat_ir;
 pub mod primitives;
-mod structures;
+pub(crate) mod structures;
 pub(crate) mod text_utils;
 
 use structures::environment::{Environment, Simulator};

diff --git a/interp/src/flatten/primitives/stateful/memories.rs b/interp/src/flatten/primitives/stateful/memories.rs
@@ -1,3 +1,5 @@
+use itertools::Itertools;
+
 use crate::{
     errors::InterpreterError,
     flatten::{
@@ -247,7 +249,7 @@ impl CombMem {
         T: Into<Shape>,
     {
         let shape = size.into();
-        let internal_state = vec![Value::zeroes(width); shape.len()];
+        let internal_state = vec![Value::zeroes(width); shape.size()];
 
         Self {
             base_port: base,
@@ -258,6 +260,47 @@ impl CombMem {
             done_is_high: false,
         }
     }
+
+    pub fn new_with_init<T>(
+        base_port: GlobalPortIdx,
+        width: u32,
+        allow_invalid: bool,
+        size: T,
+        data: &[u8],
+    ) -> Self
+    where
+        T: Into<Shape>,
+    {
+        let byte_count = width.div_ceil(8);
+        let size = size.into();
+
+        let internal_state = data
+            .chunks_exact(byte_count as usize)
+            .map(|x| Value::from_bytes_le(x, width as usize))
+            .collect_vec();
+
+        assert_eq!(internal_state.len(), size.size());
+        assert!(data
+            .chunks_exact(byte_count as usize)
+            .remainder()
+            .is_empty());
+
+        Self {
+            base_port,
+            internal_state,
+            _allow_invalid_access: allow_invalid,
+            _width: width,
+            addresser: MemDx::new(size),
+            done_is_high: false,
+        }
+    }
+
+    pub fn dump_data(&self) -> Vec<u8> {
+        self.internal_state
+            .iter()
+            .flat_map(|x| x.to_bytes())
+            .collect()
+    }
 }
 
 impl Primitive for CombMem {
@@ -365,7 +408,7 @@ impl SeqMem {
         size: T,
     ) -> Self {
         let shape = size.into();
-        let internal_state = vec![Value::zeroes(width); shape.len()];
+        let internal_state = vec![Value::zeroes(width); shape.size()];
 
         Self {
             base_port: base,
@@ -378,6 +421,41 @@ impl SeqMem {
         }
     }
 
+    pub fn new_with_init<T>(
+        base_port: GlobalPortIdx,
+        width: u32,
+        allow_invalid: bool,
+        size: T,
+        data: &[u8],
+    ) -> Self
+    where
+        T: Into<Shape>,
+    {
+        let byte_count = width.div_ceil(8);
+        let size = size.into();
+
+        let internal_state = data
+            .chunks_exact(byte_count as usize)
+            .map(|x| Value::from_bytes_le(x, width as usize))
+            .collect_vec();
+
+        assert_eq!(internal_state.len(), size.size());
+        assert!(data
+            .chunks_exact(byte_count as usize)
+            .remainder()
+            .is_empty());
+
+        Self {
+            base_port,
+            internal_state,
+            _allow_invalid_access: allow_invalid,
+            _width: width,
+            addresser: MemDx::new(size),
+            done_is_high: false,
+            read_out: PortValue::new_undef(),
+        }
+    }
+
     declare_ports![
         _CLK: 0,
         RESET: 1,
@@ -408,6 +486,13 @@ impl SeqMem {
     pub fn reset(&self) -> GlobalPortIdx {
         (self.base_port.index() + Self::RESET).into()
     }
+
+    pub fn dump_data(&self) -> Vec<u8> {
+        self.internal_state
+            .iter()
+            .flat_map(|x| x.to_bytes())
+            .collect()
+    }
 }
 
 impl Primitive for SeqMem {

diff --git a/interp/src/lib.rs b/interp/src/lib.rs
@@ -1,6 +1,6 @@
 pub mod interpreter;
 pub mod primitives;
-mod serialization;
+pub mod serialization;
 pub use utils::MemoryMap;
 pub mod configuration;
 pub mod debugger;

diff --git a/interp/src/logging.rs b/interp/src/logging.rs
@@ -10,7 +10,7 @@ use slog::{Drain, Level};
 static ROOT_LOGGER: OnceCell<Logger> = OnceCell::new();
 
 pub fn initialize_default_logger() {
-    initialize_logger(false);
+    initialize_logger(true);
 }
 
 pub fn initialize_logger(quiet: bool) {

diff --git a/interp/src/serialization/data_dump.rs b/interp/src/serialization/data_dump.rs
@@ -0,0 +1,241 @@
+use std::num::NonZeroUsize;
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Debug, Deserialize, PartialEq, Clone)]
+pub struct MemoryDeclaration {
+    pub name: String,
+    pub width: NonZeroUsize,
+    pub size: NonZeroUsize,
+}
+
+impl MemoryDeclaration {
+    pub fn new(name: String, width: usize, size: usize) -> Self {
+        Self {
+            name,
+            width: NonZeroUsize::new(width).expect("width must be non-zero"),
+            size: NonZeroUsize::new(size).expect("size must be non-zero"),
+        }
+    }
+
+    pub fn byte_count(&self) -> usize {
+        self.width.get().div_ceil(8) * self.size.get()
+    }
+}
+
+#[derive(Serialize, Debug, Deserialize, PartialEq, Clone)]
+pub struct DataHeader {
+    pub top_level: String,
+    pub memories: Vec<MemoryDeclaration>,
+}
+
+impl DataHeader {
+    pub fn new(top_level: String, memories: Vec<MemoryDeclaration>) -> Self {
+        Self {
+            top_level,
+            memories,
+        }
+    }
+
+    pub fn data_size(&self) -> usize {
+        self.memories
+            .iter()
+            .fold(0, |acc, mem| acc + mem.byte_count())
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub struct DataDump {
+    pub header: DataHeader,
+    pub data: Vec<u8>,
+}
+
+impl DataDump {
+    // TODO Griffin: handle the errors properly
+    pub fn serialize(&self, writer: &mut dyn std::io::Write) {
+        let header_str = serde_json::to_string(&self.header).unwrap();
+        let len_bytes = header_str.len();
+        let written = writer.write(&len_bytes.to_le_bytes()).unwrap();
+        assert_eq!(written, 8);
+        write!(writer, "{}", header_str).unwrap();
+
+        let written = writer.write(&self.data).unwrap();
+        assert_eq!(written, self.data.len());
+    }
+
+    /// TODO Griffin: handle the errors properly
+    pub fn deserialize(reader: &mut dyn std::io::Read) -> Self {
+        let mut raw_header_len = [0u8; 8];
+        reader.read_exact(&mut raw_header_len).unwrap();
+        let header_len = usize::from_le_bytes(raw_header_len);
+
+        let mut raw_header = vec![0u8; header_len];
+        reader.read_exact(&mut raw_header).unwrap();
+        let header_str = String::from_utf8(raw_header).unwrap();
+        let header: DataHeader = serde_json::from_str(&header_str).unwrap();
+        let mut data: Vec<u8> = Vec::with_capacity(header.data_size());
+
+        // we could do a read_exact here instead but I opted for read_to_end
+        // instead to avoid allowing incorrect/malformed data files
+        let amount_read = reader.read_to_end(&mut data).unwrap();
+        assert_eq!(amount_read, header.data_size());
+
+        DataDump { header, data }
+    }
+
+    // TODO Griffin: Replace the panic with a proper error and the standard
+    // handling
+    pub fn get_data(&self, mem_name: &str) -> &[u8] {
+        let mut current_base = 0_usize;
+        for mem in &self.header.memories {
+            if mem.name == mem_name {
+                let end = current_base + mem.byte_count();
+                return &self.data[current_base..end];
+            } else {
+                current_base += mem.byte_count();
+            }
+        }
+        panic!("Memory not found")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_data_dump() {
+        let header = DataHeader {
+            top_level: "test".to_string(),
+            memories: vec![
+                MemoryDeclaration::new("mem0".to_string(), 32, 16), // 64 bytes
+                MemoryDeclaration::new("mem1".to_string(), 4, 17),  // 17 bytes
+                MemoryDeclaration::new("mem2".to_string(), 3, 2),   // 2 bytes
+                                                                    // 83 bytes
+            ],
+        };
+
+        // This was generated from random.org
+        let data = vec![
+            230, 165, 232, 82, 9, 111, 146, 146, 243, 18, 26, 100, 23, 45, 22,
+            34, 229, 70, 32, 185, 21, 160, 237, 107, 227, 253, 174, 96, 238,
+            118, 182, 23, 167, 67, 5, 76, 82, 223, 205, 190, 109, 177, 75, 15,
+            216, 40, 93, 111, 231, 205, 136, 231, 193, 155, 217, 192, 120, 235,
+            81, 15, 214, 225, 113, 246, 98, 212, 51, 120, 17, 112, 83, 126,
+            218, 136, 0, 16, 116, 139, 213, 255, 83, 107, 112,
+        ];
+
+        let dump = DataDump { header, data };
+
+        let mut buf = Vec::new();
+
+        dump.serialize(&mut buf);
+        let reparsed_dump = DataDump::deserialize(&mut buf.as_slice());
+        assert_eq!(reparsed_dump, dump);
+    }
+
+    use proptest::prelude::*;
+
+    prop_compose! {
+        fn arb_memory_declaration()(name in any::<String>(), width in 1_usize..=256, size in 1_usize..=500) -> MemoryDeclaration {
+            MemoryDeclaration::new(name.to_string(), width, size)
+        }
+    }
+
+    prop_compose! {
+        fn arb_data_header()(
+            top_level in any::<String>(),
+            mut memories in prop::collection::vec(arb_memory_declaration(), 1..3)
+        ) -> DataHeader {
+            // This is a silly hack to force unique names for the memories
+            for (i, memory) in memories.iter_mut().enumerate() {
+                memory.name = format!("{}_{i}", memory.name);
+            }
+
+            DataHeader { top_level, memories }
+        }
+    }
+
+    prop_compose! {
+        fn arb_data(size: usize)(
+            data in prop::collection::vec(0u8..=255, size)
+        )  -> Vec<u8> {
+            data
+        }
+    }
+
+    fn arb_data_dump() -> impl Strategy<Value = DataDump> {
+        let data = arb_data_header().prop_flat_map(|header| {
+            let data = arb_data(header.data_size());
+            (Just(header), data)
+        });
+
+        data.prop_map(|(header, mut header_data)| {
+            let mut cursor = 0_usize;
+            // Need to go through the upper byte of each value in the memory to
+            // remove any 1s in the padding region since that causes the memory
+            // produced from the memory primitive to not match the one
+            // serialized into it in the first place
+            for mem in &header.memories {
+                let bytes_per_val = mem.width.get().div_ceil(8);
+                let rem = mem.width.get() % 8;
+                let mask = if rem != 0 { 255u8 >> (8 - rem) } else { 255_u8 };
+
+                for bytes in &mut header_data[cursor..cursor + mem.byte_count()]
+                    .chunks_exact_mut(bytes_per_val)
+                {
+                    *bytes.last_mut().unwrap() &= mask;
+                }
+
+                assert!(header_data[cursor..cursor + mem.byte_count()]
+                    .chunks_exact(bytes_per_val)
+                    .remainder()
+                    .is_empty());
+                cursor += mem.byte_count();
+            }
+
+            DataDump {
+                header,
+                data: header_data,
+            }
+        })
+    }
+
+    proptest! {
+        #[test]
+        fn prop_roundtrip(dump in arb_data_dump()) {
+            let mut buf = Vec::new();
+            dump.serialize(&mut buf);
+
+            let reparsed_dump = DataDump::deserialize(&mut buf.as_slice());
+            prop_assert_eq!(dump, reparsed_dump)
+
+        }
+    }
+
+    use crate::flatten::{
+        flat_ir::prelude::GlobalPortIdx,
+        primitives::stateful::{CombMemD1, SeqMemD1},
+        structures::index_trait::IndexRef,
+    };
+
+    proptest! {
+        #[test]
+        fn comb_roundtrip(dump in arb_data_dump()) {
+            for mem in &dump.header.memories {
+                let memory_prim = CombMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dbg!(dump.get_data(&mem.name)));
+                let data = memory_prim.dump_data();
+                prop_assert_eq!(dump.get_data(&mem.name), data);
+            }
+        }
+
+        #[test]
+        fn seq_roundtrip(dump in arb_data_dump()) {
+            for mem in &dump.header.memories {
+                let memory_prim = SeqMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dbg!(dump.get_data(&mem.name)));
+                let data = memory_prim.dump_data();
+                prop_assert_eq!(dump.get_data(&mem.name), data);
+            }
+        }
+    }
+}
diff --git a/interp/src/serialization/mod.rs b/interp/src/serialization/mod.rs
@@ -0,0 +1,3 @@
+pub mod data_dump;
+mod old;
+pub use old::*;