diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 0bbd1fa43..789a42248 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -27,8 +27,7 @@ members = [ "examples/dwarf/dwarf_import", "examples/dwarf/dwarfdump", "examples/dwarf/shared", - "examples/idb/idb_import", - "examples/idb/shared", + "examples/idb_import", "examples/flowgraph", "examples/minidump", "examples/mlil_visitor", diff --git a/rust/examples/idb/shared/Cargo.toml b/rust/examples/idb/shared/Cargo.toml deleted file mode 100644 index 0a1921071..000000000 --- a/rust/examples/idb/shared/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "idb-rs" -version = "0.1.0" -authors = ["Rubens Brandao "] -edition = "2021" - -[dependencies] -anyhow = "1.0.86" -bincode = "1.3.3" -flate2 = "1.0.31" -serde = { version = "1.0.205", features = ["derive"] } -serde_repr = "0.1.19" diff --git a/rust/examples/idb/shared/resources/Readme.md b/rust/examples/idb/shared/resources/Readme.md deleted file mode 100644 index c9411d7d3..000000000 --- a/rust/examples/idb/shared/resources/Readme.md +++ /dev/null @@ -1,2 +0,0 @@ -Put here the `*.idb` `*.i64` in `idbs` folder and `*.til` files in `tils` folder. -Those files will be used to test the parser by the `cargo test` command. diff --git a/rust/examples/idb/shared/resources/idbs/idb_and_i64_files_here b/rust/examples/idb/shared/resources/idbs/idb_and_i64_files_here deleted file mode 100644 index e69de29bb..000000000 diff --git a/rust/examples/idb/shared/resources/tils/til_files_here b/rust/examples/idb/shared/resources/tils/til_files_here deleted file mode 100644 index e69de29bb..000000000 diff --git a/rust/examples/idb/shared/src/lib.rs b/rust/examples/idb/shared/src/lib.rs deleted file mode 100644 index 1fc17c924..000000000 --- a/rust/examples/idb/shared/src/lib.rs +++ /dev/null @@ -1,434 +0,0 @@ -#[cfg(test)] -mod test; - -pub mod til; -pub use til::{TILSection, TILTypeInfo}; - -use std::fmt::Debug; -use std::io::{BufRead, Read, Seek, SeekFrom}; -use std::num::NonZeroU64; - -use serde::Deserialize; - -use anyhow::{anyhow, ensure, Result}; - -#[derive(Debug, Clone, Copy)] -pub struct IDBParser { - input: I, - header: IDBHeader, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct TILOffset(NonZeroU64); - -impl IDBParser { - pub fn new(mut input: I) -> Result { - let header = IDBHeader::read(&mut input)?; - Ok(Self { input, header }) - } - - pub fn til_section(&self) -> Option { - self.header.til_offset.map(TILOffset) - } - - pub fn read_til_section(&mut self, til: TILOffset) -> Result { - self.input.seek(SeekFrom::Start(til.0.get()))?; - let section_header = IDBSectionHeader::read(&self.header, &mut self.input)?; - // makes sure the reader doesn't go out-of-bounds - let mut input = Read::take(&mut self.input, section_header.len); - let result = TILSection::read(&mut input, section_header.compress)?; - - // TODO seems its normal to have a few extra bytes at the end of the sector, maybe - // because of the compressions stuff, anyway verify that - ensure!( - input.limit() <= 16, - "Sector have more data then expected, left {} bytes", - input.limit() - ); - Ok(result) - } - - #[cfg(test)] - pub(crate) fn decompress_til_section( - &mut self, - til: TILOffset, - output: &mut impl std::io::Write, - ) -> Result<()> { - self.input.seek(SeekFrom::Start(til.0.get()))?; - let section_header = IDBSectionHeader::read(&self.header, &mut self.input)?; - // makes sure the reader doesn't go out-of-bounds - let mut input = Read::take(&mut self.input, section_header.len); - TILSection::decompress(&mut input, output, section_header.compress) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -enum IDBMagic { - IDA0, - IDA1, - IDA2, -} - -impl TryFrom<[u8; 4]> for IDBMagic { - type Error = anyhow::Error; - - fn try_from(value: [u8; 4]) -> Result { - match &value { - b"IDA0" => Ok(IDBMagic::IDA0), - b"IDA1" => Ok(IDBMagic::IDA1), - b"IDA2" => Ok(IDBMagic::IDA2), - _ => Err(anyhow!("Invalid IDB Magic number")), - } - } -} -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -enum IDBVersion { - V1, - V4, - V5, - V6, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -struct IDBHeader { - version: IDBVersion, - id0_offset: Option, - id1_offset: Option, - nam_offset: Option, - til_offset: Option, - checksums: [u32; 3], - unk0_checksum: u32, - data: IDBHeaderVersion, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -enum IDBHeaderVersion { - V1 { - seg_offset: Option, - }, - V4 { - seg_offset: Option, - }, - V5 { - unk16: u32, - unk1_checksum: u32, - }, - V6 { - unk16: u32, - id2_offset: Option, - unk1_checksum: u32, - }, -} - -#[derive(Debug, Clone, Copy)] -struct IDBSectionHeader { - compress: IDBSectionCompression, - len: u64, -} - -#[derive(Debug, Clone, Copy)] -#[repr(u8)] -enum IDBSectionCompression { - None = 0, - Zlib = 2, -} - -impl TryFrom for IDBSectionCompression { - type Error = (); - - fn try_from(value: u8) -> std::result::Result { - match value { - 0 => Ok(Self::None), - 2 => Ok(Self::Zlib), - _ => Err(()), - } - } -} - -#[derive(Debug, Deserialize)] -struct IDBHeaderRaw { - magic: [u8; 4], - _padding_0: u16, - offsets: [u32; 5], - signature: u32, - version: u16, - // more, depending on the version -} - -impl IDBHeader { - pub fn read(input: &mut I) -> Result { - let header_raw: IDBHeaderRaw = bincode::deserialize_from(&mut *input)?; - let _magic = IDBMagic::try_from(header_raw.magic)?; - ensure!( - header_raw.signature == 0xAABB_CCDD, - "Invalid header signature {:#x}", - header_raw.signature - ); - match header_raw.version { - 1 => Self::read_v1(&header_raw, input), - 4 => Self::read_v4(&header_raw, input), - 5 => Self::read_v5(&header_raw, input), - 6 => Self::read_v6(&header_raw, input), - v => return Err(anyhow!("Unable to parse version `{v}`")), - } - } - - fn read_v1(header_raw: &IDBHeaderRaw, input: I) -> Result { - #[derive(Debug, Deserialize)] - struct V1Raw { - id2_offset: u32, - checksums: [u32; 3], - unk30_zeroed: u32, - unk33_checksum: u32, - unk38_zeroed: [u8; 6], - } - - let v1_raw: V1Raw = bincode::deserialize_from(input)?; - ensure!(v1_raw.unk30_zeroed == 0, "unk30 not zeroed"); - ensure!(v1_raw.id2_offset == 0, "id2 in V1 is not zeroed"); - ensure!(v1_raw.unk38_zeroed == [0; 6], "unk38 is not zeroed"); - - Ok(Self { - version: IDBVersion::V1, - id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), - id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), - nam_offset: NonZeroU64::new(header_raw.offsets[2].into()), - til_offset: NonZeroU64::new(header_raw.offsets[4].into()), - checksums: v1_raw.checksums, - unk0_checksum: v1_raw.unk33_checksum, - data: IDBHeaderVersion::V1 { - seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), - }, - }) - } - - fn read_v4(header_raw: &IDBHeaderRaw, input: I) -> Result { - #[derive(Debug, Deserialize)] - struct V4Raw { - id2_offset: u32, - checksums: [u32; 3], - unk30_zeroed: u32, - unk33_checksum: u32, - unk38_zeroed: [u8; 8], - unk40_v5c: u32, - unk44_zeroed: [u8; 8], - _unk4c: [u8; 16], - unk5c_zeroed: [[u8; 16]; 8], - } - - let v4_raw: V4Raw = bincode::deserialize_from(input)?; - - ensure!(v4_raw.unk30_zeroed == 0, "unk30 not zeroed"); - ensure!(v4_raw.id2_offset == 0, "id2 in V4 is not zeroed"); - ensure!(v4_raw.unk38_zeroed == [0; 8], "unk38 is not zeroed"); - ensure!(v4_raw.unk40_v5c == 0x5c, "unk40 is not 0x5C"); - ensure!(v4_raw.unk44_zeroed == [0; 8], "unk44 is not zeroed"); - ensure!(v4_raw.unk5c_zeroed == [[0; 16]; 8], "unk5c is not zeroed"); - - Ok(Self { - version: IDBVersion::V4, - id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), - id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), - nam_offset: NonZeroU64::new(header_raw.offsets[2].into()), - til_offset: NonZeroU64::new(header_raw.offsets[4].into()), - checksums: v4_raw.checksums, - unk0_checksum: v4_raw.unk33_checksum, - data: IDBHeaderVersion::V4 { - seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), - }, - }) - } - - fn read_v5(header_raw: &IDBHeaderRaw, input: impl Read) -> Result { - #[derive(Debug, Deserialize)] - struct V5Raw { - nam_offset: u64, - seg_offset_zeroed: u64, - til_offset: u64, - initial_checksums: [u32; 3], - unk4_zeroed: u32, - unk_checksum: u32, - id2_offset_zeroed: u64, - final_checksum: u32, - unk0_v7c: u32, - unk1_zeroed: [u8; 16], - _unk2: [u8; 16], - unk3_zeroed: [[u8; 16]; 8], - } - let v5_raw: V5Raw = bincode::deserialize_from(input)?; - let id0_offset = - u64::from_le(u64::from(header_raw.offsets[1]) << 32 | u64::from(header_raw.offsets[0])); - let id1_offset = - u64::from_le(u64::from(header_raw.offsets[3]) << 32 | u64::from(header_raw.offsets[2])); - - // TODO Final checksum is always zero on v5? - - ensure!(v5_raw.unk4_zeroed == 0, "unk4 not zeroed"); - ensure!(v5_raw.id2_offset_zeroed == 0, "id2 in V5 is not zeroed"); - ensure!(v5_raw.seg_offset_zeroed == 0, "seg in V5 is not zeroed"); - ensure!(v5_raw.unk0_v7c == 0x7C, "unk0 not 0x7C"); - ensure!(v5_raw.unk1_zeroed == [0; 16], "unk1 is not zeroed"); - ensure!(v5_raw.unk3_zeroed == [[0; 16]; 8], "unk3 is not zeroed"); - - Ok(Self { - version: IDBVersion::V5, - id0_offset: NonZeroU64::new(id0_offset), - id1_offset: NonZeroU64::new(id1_offset), - nam_offset: NonZeroU64::new(v5_raw.nam_offset), - til_offset: NonZeroU64::new(v5_raw.til_offset), - checksums: v5_raw.initial_checksums, - unk0_checksum: v5_raw.unk_checksum, - data: IDBHeaderVersion::V5 { - unk16: header_raw.offsets[4], - unk1_checksum: v5_raw.final_checksum, - }, - }) - } - - fn read_v6(header_raw: &IDBHeaderRaw, input: impl Read) -> Result { - #[derive(Debug, Deserialize)] - struct V6Raw { - nam_offset: u64, - seg_offset_zeroed: u64, - til_offset: u64, - initial_checksums: [u32; 3], - unk4_zeroed: [u8; 4], - unk5_checksum: u32, - id2_offset: u64, - final_checksum: u32, - unk0_v7c: u32, - unk1_zeroed: [u8; 16], - _unk2: [u8; 16], - unk3_zeroed: [[u8; 16]; 8], - } - let v6_raw: V6Raw = bincode::deserialize_from(input)?; - let id0_offset = - u64::from_le(u64::from(header_raw.offsets[1]) << 32 | u64::from(header_raw.offsets[0])); - let id1_offset = - u64::from_le(u64::from(header_raw.offsets[3]) << 32 | u64::from(header_raw.offsets[2])); - - ensure!(v6_raw.unk4_zeroed == [0; 4], "unk4 not zeroed"); - ensure!(v6_raw.seg_offset_zeroed == 0, "seg in V6 is not zeroed"); - ensure!(v6_raw.unk0_v7c == 0x7C, "unk0 not 0x7C"); - ensure!(v6_raw.unk1_zeroed == [0; 16], "unk1 is not zeroed"); - ensure!(v6_raw.unk3_zeroed == [[0; 16]; 8], "unk3 is not zeroed"); - - Ok(Self { - version: IDBVersion::V6, - id0_offset: NonZeroU64::new(id0_offset), - id1_offset: NonZeroU64::new(id1_offset), - nam_offset: NonZeroU64::new(v6_raw.nam_offset), - til_offset: NonZeroU64::new(v6_raw.til_offset), - checksums: v6_raw.initial_checksums, - unk0_checksum: v6_raw.unk5_checksum, - data: IDBHeaderVersion::V6 { - unk16: header_raw.offsets[4], - id2_offset: NonZeroU64::new(v6_raw.id2_offset), - unk1_checksum: v6_raw.final_checksum, - }, - }) - } -} - -impl IDBSectionHeader { - pub fn read(header: &IDBHeader, input: I) -> Result { - match header.version { - crate::IDBVersion::V1 | crate::IDBVersion::V4 => { - #[derive(Debug, Deserialize)] - struct Section32Raw { - compress: u8, - len: u32, - } - let header: Section32Raw = bincode::deserialize_from(input)?; - Ok(IDBSectionHeader { - compress: header - .compress - .try_into() - .map_err(|_| anyhow!("Invalid compression code"))?, - len: header.len.into(), - }) - } - crate::IDBVersion::V5 | crate::IDBVersion::V6 => { - #[derive(Debug, Deserialize)] - struct Section64Raw { - compress: u8, - len: u64, - } - let header: Section64Raw = bincode::deserialize_from(input)?; - Ok(IDBSectionHeader { - compress: header - .compress - .try_into() - .map_err(|_| anyhow!("Invalid compression code"))?, - len: header.len, - }) - } - } - } -} - -fn read_bytes_len_u8(mut input: I) -> Result> { - let mut len = [0]; - input.read_exact(&mut len)?; - let mut bytes = vec![0u8; len[0].into()]; - input.read_exact(&mut bytes)?; - Ok(bytes) -} - -fn read_string_len_u8(input: I) -> Result { - let bytes = read_bytes_len_u8(input)?; - Ok(String::from_utf8(bytes)?) -} - -#[cfg(test)] -fn write_string_len_u8(mut output: O, value: &str) -> Result<()> { - output.write_all(&[u8::try_from(value.len()).unwrap()])?; - Ok(output.write_all(value.as_bytes())?) -} - -fn read_c_string_raw(mut input: I) -> std::io::Result> { - let mut buf = vec![]; - input.read_until(b'\x00', &mut buf)?; - // last char need to be \x00 or we found a EoF - if buf.pop() != Some(b'\x00') { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on CStr", - )); - } - Ok(buf) -} - -fn read_c_string(input: &mut I) -> std::io::Result { - let buf = read_c_string_raw(input)?; - Ok(String::from_utf8_lossy(&buf).to_string()) -} - -fn read_c_string_vec(input: &mut I) -> std::io::Result> { - let buf = read_c_string_raw(input)?; - if buf.is_empty() { - return Ok(vec![]); - } - - let mut result = vec![]; - // NOTE never 0 because this came from a CStr - let mut len = buf[0] - 1; - // NOTE zero len (buf[0] == 1) string is allowed - let mut current = &buf[1..]; - loop { - if usize::from(len) > current.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Invalid len on Vec of CStr", - )); - } - let (value, rest) = current.split_at(len.into()); - result.push(String::from_utf8_lossy(value).to_string()); - if rest.is_empty() { - break; - } - len = rest[0] - 1; - current = &rest[1..]; - } - Ok(result) -} diff --git a/rust/examples/idb/shared/src/test.rs b/rust/examples/idb/shared/src/test.rs deleted file mode 100644 index 9db0feeaa..000000000 --- a/rust/examples/idb/shared/src/test.rs +++ /dev/null @@ -1,102 +0,0 @@ -use std::ffi::OsStr; -use std::fs::File; -use std::io::{BufReader, BufWriter, Seek}; -use std::path::{Path, PathBuf}; - -use anyhow::ensure; - -use crate::{IDBParser, IDBSectionCompression, TILSection}; - -#[test] -fn parse_idbs() { - let files = find_all("resources/idbs".as_ref(), &["idb".as_ref(), "i64".as_ref()]).unwrap(); - for filename in files { - println!("{}", filename.to_str().unwrap()); - let file = BufReader::new(File::open(&filename).unwrap()); - let mut parser = IDBParser::new(file).unwrap(); - let til = parser.read_til_section(parser.til_section().unwrap()); - - // if success, parse next file - let error = match til { - Ok(_til) => continue, - Err(e) => e, - }; - - //otherwise create a decompress version of the file for more testing - let mut output = BufWriter::new(std::fs::File::create("/tmp/lasterror.til").unwrap()); - parser - .decompress_til_section(parser.til_section().unwrap(), &mut output) - .unwrap(); - panic!("{error:?}") - } -} - -#[test] -fn parse_tils() { - let files = find_all("resources/tils".as_ref(), &["til".as_ref()]).unwrap(); - let results = files - .into_iter() - .map(|x| parse_til_file(&x).map_err(|e| (x, e))) - .collect::>(); - let Err((file, error)) = results else { - // if success, finish the test - return; - }; - println!("Unable to parse {}", file.to_str().unwrap()); - //otherwise create a decompress version of the file for more testing - let mut input = BufReader::new(std::fs::File::open(&file).unwrap()); - let mut output = BufWriter::new(std::fs::File::create("/tmp/lasterror.til").unwrap()); - TILSection::decompress_inner(&mut input, &mut output).unwrap(); - panic!( - "Unable to parse file `{}`: {error:?}", - file.to_str().unwrap() - ); -} - -fn parse_til_file(file: &Path) -> anyhow::Result<()> { - println!("TIL file: {}", file.to_str().unwrap()); - // makes sure it don't read out-of-bounds - let mut input = BufReader::new(std::fs::File::open(file).unwrap()); - // TODO make a SmartReader - match TILSection::read(&mut input, IDBSectionCompression::None) { - Ok(_til) => { - let current = input.seek(std::io::SeekFrom::Current(0))?; - let end = input.seek(std::io::SeekFrom::End(0))?; - ensure!( - current == end, - "unable to consume the entire TIL file, {current} != {end}" - ); - Ok(()) - } - Err(e) => Err(e), - } -} - -fn find_all(path: &Path, exts: &[&OsStr]) -> anyhow::Result> { - fn inner_find_all(path: &Path, exts: &[&OsStr], buf: &mut Vec) -> anyhow::Result<()> { - for entry in std::fs::read_dir(path).unwrap().map(Result::unwrap) { - let entry_type = entry.metadata().unwrap().file_type(); - if entry_type.is_dir() { - inner_find_all(&entry.path(), exts, buf)?; - continue; - } - - if !entry_type.is_file() { - continue; - } - - let filename = entry.file_name(); - let Some(ext) = Path::new(&filename).extension() else { - continue; - }; - - if exts.contains(&ext) { - buf.push(entry.path()) - } - } - Ok(()) - } - let mut result = vec![]; - inner_find_all(path, exts, &mut result)?; - Ok(result) -} diff --git a/rust/examples/idb/shared/src/til/flag.rs b/rust/examples/idb/shared/src/til/flag.rs deleted file mode 100644 index d23a51ef3..000000000 --- a/rust/examples/idb/shared/src/til/flag.rs +++ /dev/null @@ -1,432 +0,0 @@ -/// byte sequence used to describe a type in IDA -type TypeT = u8; -/// Enum type flags -type BteT = u8; - -/// multi-use -pub const RESERVED_BYTE: TypeT = 0xFF; - -/// Masks -pub mod tf_mask { - use super::TypeT; - /// the low 4 bits define the basic type - pub const TYPE_BASE_MASK: TypeT = 0x0F; - /// type flags - they have different meaning depending on the basic type - pub const TYPE_FLAGS_MASK: TypeT = 0x30; - /// modifiers. - /// for [super::tf_array::BT_ARRAY] see [super::tf_array] - /// ::BT_VOID can have them ONLY in 'void *' - pub const TYPE_MODIF_MASK: TypeT = 0xC0; - /// basic type with type flags - pub const TYPE_FULL_MASK: TypeT = TYPE_BASE_MASK | TYPE_FLAGS_MASK; -} - -/// Basic type: unknown & void -/// [BT_UNK] and [BT_VOID] with non-zero type flags can be used in function -/// (and struct) declarations to describe the function arguments or structure -/// fields if only their size is known. They may be used in ida to describe -/// the user input. -/// -/// In general BT_... bits should not be used alone to describe types. -/// Use BTF_... constants instead. -/// -/// For struct used also as 'single-field-alignment-suffix' -/// [__declspec(align(x))] with [tf_mask::TYPE_MODIF_MASK] == [tf_mask::TYPE_FULL_MASK] -pub mod tf_unk { - use super::TypeT; - /// unknown - pub const BT_UNK: TypeT = 0x00; - /// void - pub const BT_VOID: TypeT = 0x01; - /// [BT_VOID] - normal void; [BT_UNK] - don't use - pub const BTMT_SIZE0: TypeT = 0x00; - /// size = 1 byte if [BT_VOID]; 2 if [BT_UNK] - pub const BTMT_SIZE12: TypeT = 0x10; - /// size = 4 bytes if [BT_VOID]; 8 if [BT_UNK] - pub const BTMT_SIZE48: TypeT = 0x20; - /// size = 16 bytes if [BT_VOID]; unknown if [BT_UNK] (IN struct alignment - see below) - pub const BTMT_SIZE128: TypeT = 0x30; -} - -/// Basic type: integer -pub mod tf_int { - use super::TypeT; - /// __int8 - pub const BT_INT8: TypeT = 0x02; - /// __int16 - pub const BT_INT16: TypeT = 0x03; - /// __int32 - pub const BT_INT32: TypeT = 0x04; - /// __int64 - pub const BT_INT64: TypeT = 0x05; - /// __int128 (for alpha & future use) - pub const BT_INT128: TypeT = 0x06; - /// natural int. (size provided by idp module) - pub const BT_INT: TypeT = 0x07; - /// unknown signedness - pub const BTMT_UNKSIGN: TypeT = 0x00; - /// signed - pub const BTMT_SIGNED: TypeT = 0x10; - /// unsigned - pub const BTMT_UNSIGNED: TypeT = 0x20; - /// specify char or segment register - /// - [BT_INT8] - char - /// - [BT_INT] - segment register - /// - other [BT_INT]... - don't use - pub const BTMT_CHAR: TypeT = 0x30; -} - -/// Basic type: bool -pub mod tf_bool { - use super::TypeT; - /// bool - pub const BT_BOOL: TypeT = 0x08; - /// bool size is model specific or unknown(?) - pub const BTMT_DEFBOOL: TypeT = 0x00; - /// bool sized 1byte - pub const BTMT_BOOL1: TypeT = 0x10; - /// bool sized 2bytes - !inf_is_64bit() - pub const BTMT_BOOL2: TypeT = 0x20; - /// bool sized 8bytes - inf_is_64bit() - pub const BTMT_BOOL8: TypeT = 0x20; - /// bool sized 4bytes - pub const BTMT_BOOL4: TypeT = 0x30; -} - -/// Basic type: float -pub mod tf_float { - use super::TypeT; - /// float - pub const BT_FLOAT: TypeT = 0x09; - /// float (4 bytes) - pub const BTMT_FLOAT: TypeT = 0x00; - /// double (8 bytes) - pub const BTMT_DOUBLE: TypeT = 0x10; - /// long double (compiler specific) - pub const BTMT_LNGDBL: TypeT = 0x20; - /// float (variable size). `if { use_tbyte } then { tbyte_size } else { 2 }`, - pub const BTMT_SPECFLT: TypeT = 0x30; -} - -/// Basic type: last -pub mod tf_last_basic { - /// the last basic type, all basic types may be followed by `tah-typeattrs` - pub const BT_LAST_BASIC: super::TypeT = super::tf_float::BT_FLOAT; -} - -/// Derived type: pointer -/// Pointers to undeclared yet [tf_complex::BT_COMPLEX] types are prohibited -pub mod tf_ptr { - use super::TypeT; - /// pointer - /// has the following format: - /// `[db sizeof(ptr)]; [tah-typeattrs]; type_t...` - pub const BT_PTR: TypeT = 0x0A; - /// default for model - pub const BTMT_DEFPTR: TypeT = 0x00; - /// near - pub const BTMT_NEAR: TypeT = 0x10; - /// far - pub const BTMT_FAR: TypeT = 0x20; - /// closure - /// - if ptr to [super::tf_func::BT_FUNC] - __closure. - /// in this case next byte MUST be - /// [super::RESERVED_BYTE], and after it [super::tf_func::BT_FUNC] - /// - else the next byte contains size_of::() - /// allowed values are 1 - `\varmem{ph,processor_t,max_ptr_size}` - /// - if value is bigger than `\varmem{ph,processor_t,max_ptr_size}`, - /// based_ptr_name_and_size() is called to - /// find out the typeinfo - pub const BTMT_CLOSURE: TypeT = 0x30; -} - -/// Derived type: array -/// For [tf_array::BT_ARRAY], the BTMT_... flags must be equivalent to the BTMT_... flags of its elements -pub mod tf_array { - use super::TypeT; - /// array - pub const BT_ARRAY: TypeT = 0x0B; - - /// code - /// ```custom,{class=text} - /// if set - /// array base==0 - /// format: dt num_elem; [tah-typeattrs]; type_t... - /// if num_elem==0 then the array size is unknown - /// else - /// format: da num_elem, base; [tah-typeattrs]; type_t... \endcode - /// ``` - /// used only for serialization - pub const BTMT_NONBASED: TypeT = 0x10; - /// reserved bit - pub const BTMT_ARRESERV: TypeT = 0x20; -} - -/// \defgroup tf_func Derived type: function -/// Ellipsis is not taken into account in the number of parameters// -/// The return type cannot be ::BT_ARRAY or ::BT_FUNC. -/// -pub mod tf_func { - use super::TypeT; - /// function. - /// format:
-    ///  optional:
-    /// ```custom,{class=text}
-    ///   ::CM_CC_SPOILED | num_of_spoiled_regs
-    ///   if num_of_spoiled_reg == BFA_FUNC_MARKER:
-    ///     ::bfa_byte
-    ///     if (bfa_byte & BFA_FUNC_EXT_FORMAT) != 0
-    ///      ::fti_bits (only low bits: FTI_SPOILED,...,FTI_VIRTUAL)
-    ///      num_of_spoiled_reg times: spoiled reg info (see extract_spoiledreg)
-    ///     else
-    ///       bfa_byte is function attribute byte (see \ref BFA_...)
-    ///   else:
-    ///     num_of_spoiled_reg times: spoiled reg info (see extract_spoiledreg)
-    /// ```
-    ///  ::cm_t ... calling convention and memory model
-    ///  [tah-typeattrs];
-    ///  ::type_t ... return type;
-    ///  [serialized argloc_t of returned value (if ::CM_CC_SPECIAL{PE} && !return void);
-    /// ```custom,{class=text}
-    ///  if !::CM_CC_VOIDARG:
-    ///    dt N (N=number of parameters)
-    ///    if ( N == 0 )
-    ///    if ::CM_CC_ELLIPSIS or ::CM_CC_SPECIALE
-    ///        func(...)
-    ///      else
-    ///        parameters are unknown
-    ///    else
-    ///      N records:
-    ///        ::type_t ... (i.e. type of each parameter)
-    ///        [serialized argloc_t (if ::CM_CC_SPECIAL{PE})] (i.e. place of each parameter)
-    ///        [#FAH_BYTE + de( \ref funcarg_t::flags )] 
- /// ``` - pub const BT_FUNC: TypeT = 0x0C; - - ///< call method - default for model or unknown - pub const BTMT_DEFCALL: TypeT = 0x00; - ///< function returns by retn - pub const BTMT_NEARCALL: TypeT = 0x10; - ///< function returns by retf - pub const BTMT_FARCALL: TypeT = 0x20; - ///< function returns by iret - ///< in this case cc MUST be 'unknown' - pub const BTMT_INTCALL: TypeT = 0x30; -} - -/// Derived type: complex -pub mod tf_complex { - use super::TypeT; - /// struct/union/enum/typedef. - /// format:
:
-    /// ```custom,{class=text}
-    ///   [dt N (N=field count) if !::BTMT_TYPEDEF]
-    ///   if N == 0:
-    ///     p_string name (unnamed types have names "anon_...")
-    ///     [sdacl-typeattrs];
-    ///   else, for struct & union:
-    ///     if N == 0x7FFE   // Support for high (i.e., > 4095) members count
-    ///       N = deserialize_de()
-    ///     ALPOW = N & 0x7
-    ///     MCNT = N >> 3
-    ///     if MCNT == 0
-    ///       empty struct
-    ///     if ALPOW == 0
-    ///       ALIGN = get_default_align()
-    ///     else
-    ///       ALIGN = (1 << (ALPOW - 1))
-    ///     [sdacl-typeattrs];
-    ///   else, for enums:
-    ///     if N == 0x7FFE   // Support for high enum entries count.
-    ///       N = deserialize_de()
-    ///     [tah-typeattrs]; 
- /// ``` - pub const BT_COMPLEX: TypeT = 0x0D; - /// struct - /// `MCNT records: type_t; [sdacl-typeattrs];` - pub const BTMT_STRUCT: TypeT = 0x00; - /// union - /// `MCNT records: type_t...` - pub const BTMT_UNION: TypeT = 0x10; - /// enum - /// ```custom,{class=text} - /// next byte bte_t (see below) - /// N records: de delta(s) - /// OR - /// blocks (see below) - /// ``` - pub const BTMT_ENUM: TypeT = 0x20; - /// named reference - /// `always p_string name` - pub const BTMT_TYPEDEF: TypeT = 0x30; - /// bitfield (only in struct) - /// ```custom,{class=text} - /// ['bitmasked' enum see below] - /// next byte is dt - /// ((size in bits << 1) | (unsigned ? 1 : 0)) - /// ``` - pub const BT_BITFIELD: TypeT = 0x0E; - /// __int8 - pub const BTMT_BFLDI8: TypeT = 0x00; - /// __int16 - pub const BTMT_BFLDI16: TypeT = 0x10; - /// __int32 - pub const BTMT_BFLDI32: TypeT = 0x20; - /// __int64 - pub const BTMT_BFLDI64: TypeT = 0x30; -} - -/// RESERVED -pub const BT_RESERVED: TypeT = 0x0F; - -/// Type modifiers -/// "pub const volatile" types are forbidden -pub mod tf_modifiers { - use super::TypeT; - /// const - pub const BTM_CONST: TypeT = 0x40; - /// volatile - pub const BTM_VOLATILE: TypeT = 0x80; -} - -/// Special enum definitions -pub mod tf_enum { - use super::BteT; - /// storage size. - /// - if == 0 then inf_get_cc_size_e() - /// - else 1 << (n -1) = 1,2,4...64 - pub const BTE_SIZE_MASK: BteT = 0x07; - /// must be 0, in order to distinguish from a tah-byte - pub const BTE_RESERVED: BteT = 0x08; - /// 'subarrays'. In this case ANY record - /// has the following format: - /// - 'de' mask (has name) - /// - 'dt' cnt - /// - cnt records of 'de' values - /// (cnt CAN be 0) - /// NOTE: delta for ALL subsegment is ONE - pub const BTE_BITFIELD: BteT = 0x10; - /// output style mask - pub const BTE_OUT_MASK: BteT = 0x60; - /// hex - pub const BTE_HEX: BteT = 0x00; - /// char or hex - pub const BTE_CHAR: BteT = 0x20; - /// signed decimal - pub const BTE_SDEC: BteT = 0x40; - /// unsigned decimal - pub const BTE_UDEC: BteT = 0x60; - /// this bit MUST be present - pub const BTE_ALWAYS: BteT = 0x80; -} - -/// Convenience definitions: segment register -pub mod tf_conv_segreg { - use super::{tf_int, TypeT}; - /// segment register - pub const BT_SEGREG: TypeT = tf_int::BT_INT | tf_int::BTMT_CHAR; -} - -/// Convenience definitions: unknown types -pub mod tf_conv_unk { - use super::{tf_unk, TypeT}; - /// 1 byte - pub const BT_UNK_BYTE: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE12; - /// 2 bytes - pub const BT_UNK_WORD: TypeT = tf_unk::BT_UNK | tf_unk::BTMT_SIZE12; - /// 4 bytes - pub const BT_UNK_DWORD: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE48; - /// 8 bytes - pub const BT_UNK_QWORD: TypeT = tf_unk::BT_UNK | tf_unk::BTMT_SIZE48; - /// 16 bytes - pub const BT_UNK_OWORD: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE128; - /// unknown size - for parameters - pub const BT_UNKNOWN: TypeT = tf_unk::BT_UNK | tf_unk::BTMT_SIZE128; -} - -/// Convenience definitions: shortcuts -pub mod tf_shortcuts { - use super::{tf_bool, tf_complex, tf_conv_unk, tf_float, tf_int, tf_unk, TypeT}; - /// byte - pub const BTF_BYTE: TypeT = tf_conv_unk::BT_UNK_BYTE; - /// unknown - pub const BTF_UNK: TypeT = tf_conv_unk::BT_UNKNOWN; - /// void - pub const BTF_VOID: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE0; - - /// signed byte - pub const BTF_INT8: TypeT = tf_int::BT_INT8 | tf_int::BTMT_SIGNED; - /// signed char - pub const BTF_CHAR: TypeT = tf_int::BT_INT8 | tf_int::BTMT_CHAR; - /// unsigned char - pub const BTF_UCHAR: TypeT = tf_int::BT_INT8 | tf_int::BTMT_UNSIGNED; - /// unsigned byte - pub const BTF_UINT8: TypeT = tf_int::BT_INT8 | tf_int::BTMT_UNSIGNED; - - /// signed short - pub const BTF_INT16: TypeT = tf_int::BT_INT16 | tf_int::BTMT_SIGNED; - /// unsigned short - pub const BTF_UINT16: TypeT = tf_int::BT_INT16 | tf_int::BTMT_UNSIGNED; - - /// signed int - pub const BTF_INT32: TypeT = tf_int::BT_INT32 | tf_int::BTMT_SIGNED; - /// unsigned int - pub const BTF_UINT32: TypeT = tf_int::BT_INT32 | tf_int::BTMT_UNSIGNED; - - /// signed long - pub const BTF_INT64: TypeT = tf_int::BT_INT64 | tf_int::BTMT_SIGNED; - /// unsigned long - pub const BTF_UINT64: TypeT = tf_int::BT_INT64 | tf_int::BTMT_UNSIGNED; - - /// signed 128-bit value - pub const BTF_INT128: TypeT = tf_int::BT_INT128 | tf_int::BTMT_SIGNED; - /// unsigned 128-bit value - pub const BTF_UINT128: TypeT = tf_int::BT_INT128 | tf_int::BTMT_UNSIGNED; - - /// int, unknown signedness - pub const BTF_INT: TypeT = tf_int::BT_INT | tf_int::BTMT_UNKSIGN; - /// unsigned int - pub const BTF_UINT: TypeT = tf_int::BT_INT | tf_int::BTMT_UNSIGNED; - /// singed int - pub const BTF_SINT: TypeT = tf_int::BT_INT | tf_int::BTMT_SIGNED; - - /// boolean - pub const BTF_BOOL: TypeT = tf_bool::BT_BOOL; - - /// float - pub const BTF_FLOAT: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_FLOAT; - /// double - pub const BTF_DOUBLE: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_DOUBLE; - /// long double - pub const BTF_LDOUBLE: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_LNGDBL; - /// see [tf_float::BTMT_SPECFLT] - pub const BTF_TBYTE: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_SPECFLT; - - /// struct - pub const BTF_STRUCT: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_STRUCT; - /// union - pub const BTF_UNION: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_UNION; - /// enum - pub const BTF_ENUM: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_ENUM; - /// typedef - pub const BTF_TYPEDEF: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_TYPEDEF; -} - -/// pack buckets using zip -pub const TIL_ZIP: u32 = 0x0001; -/// til has macro table -pub const TIL_MAC: u32 = 0x0002; -/// extended sizeof info (short, long, longlong) -pub const TIL_ESI: u32 = 0x0004; -/// universal til for any compiler -pub const TIL_UNI: u32 = 0x0008; -/// type ordinal numbers are present -pub const TIL_ORD: u32 = 0x0010; -/// type aliases are present (this bit is used only on the disk) -pub const TIL_ALI: u32 = 0x0020; -/// til has been modified, should be saved -pub const TIL_MOD: u32 = 0x0040; -/// til has extra streams -pub const TIL_STM: u32 = 0x0080; -/// sizeof(long double) -pub const TIL_SLD: u32 = 0x0100; diff --git a/rust/examples/idb/shared/src/til/mod.rs b/rust/examples/idb/shared/src/til/mod.rs deleted file mode 100644 index 5e12dce63..000000000 --- a/rust/examples/idb/shared/src/til/mod.rs +++ /dev/null @@ -1,1837 +0,0 @@ -/// The u8 values used to describes the type information records in IDA. -/// -/// The recommended way of using type info is to use the [tinfo_t] class. -/// The type information is internally kept as an array of bytes terminated by 0. -/// -/// Items in brackets [] are optional and sometimes are omitted. -/// ::type_t... means a sequence of ::type_t bytes which defines a type. -/// -/// NOTE: to work with the types of instructions or data in the database, -/// use `get_tinfo()`/`set_tinfo()` and similar functions. -#[allow(unused)] -mod flag; - -use std::io::{BufRead, BufReader, Read}; -use std::num::NonZeroU8; - -use anyhow::{anyhow, ensure, Context, Result}; -use serde::{Deserialize, Serialize}; - -use crate::{read_c_string, read_c_string_vec, read_string_len_u8, IDBSectionCompression}; - -// TODO migrate this to flags -const TIL_SECTION_MAGIC: &[u8; 6] = b"IDATIL"; - -#[derive(Debug, Clone)] -pub struct TILSection { - pub format: u32, - /// short file name (without path and extension) - pub title: String, - /// human readable til description - pub description: String, - pub id: u8, - /// information about the target compiler - pub cm: u8, - pub def_align: u8, - pub symbols: Vec, - pub type_ordinal_numbers: Option, - pub types: Vec, - pub size_i: NonZeroU8, - pub size_b: NonZeroU8, - pub sizes: Option, - pub size_long_double: Option, - pub macros: Option>, - pub is_universal: bool, -} - -#[derive(Debug, Clone, Copy)] -pub struct TILSizes { - pub size_short: NonZeroU8, - pub size_long: NonZeroU8, - pub size_long_long: NonZeroU8, -} - -#[derive(Debug, Clone)] -pub(crate) struct TILSectionHeader { - format: u32, - flags: TILSectionFlag, - title: String, - description: String, - id: u8, - cm: u8, - size_enum: u8, - size_i: NonZeroU8, - size_b: NonZeroU8, - def_align: u8, - size_s_l_ll: Option, - size_long_double: Option, -} - -#[derive(Debug, Clone, Copy, Deserialize, Serialize)] -struct TILSectionHeader1 { - signature: [u8; 6], - format: u32, - flags: TILSectionFlag, -} - -#[derive(Debug, Clone, Copy, Deserialize, Serialize)] -struct TILSectionHeader2 { - id: u8, - cm: u8, - size_i: u8, - size_b: u8, - size_enum: u8, - def_align: u8, -} - -impl TILSection { - pub fn parse(mut input: I) -> Result { - Self::read_inner(&mut input) - } - - pub(crate) fn read(input: &mut I, compress: IDBSectionCompression) -> Result { - match compress { - IDBSectionCompression::None => Self::read_inner(input), - IDBSectionCompression::Zlib => { - let mut input = BufReader::new(flate2::read::ZlibDecoder::new(input)); - Self::read_inner(&mut input) - } - } - } - - fn read_inner(input: &mut I) -> Result { - let header = Self::read_header(&mut *input)?; - let symbols = Self::read_bucket(&mut *input, &header)?; - let type_ordinal_numbers = header - .flags - .has_ordinal() - .then(|| bincode::deserialize_from(&mut *input)) - .transpose()?; - let types = Self::read_bucket(&mut *input, &header)?; - let macros = header - .flags - .has_macro_table() - .then(|| Self::read_macros(&mut *input, &header)) - .transpose()?; - - // TODO verify that is always false? - let _mod = header.flags.is_mod(); - let _uni = header.flags.is_universal(); - let _ord = header.flags.has_ordinal(); - let _ali = header.flags.has_type_aliases(); - let _stm = header.flags.has_extra_stream(); - - Ok(TILSection { - format: header.format, - title: header.title, - description: header.description, - id: header.id, - cm: header.cm, - def_align: header.def_align, - size_long_double: header.size_long_double.map(|x| x.try_into()).transpose()?, - is_universal: header.flags.is_universal(), - size_b: header.size_b.try_into()?, - size_i: header.size_i.try_into()?, - sizes: header.size_s_l_ll, - symbols, - type_ordinal_numbers, - types, - macros, - }) - } - - fn read_header(input: &mut I) -> Result { - let header1: TILSectionHeader1 = bincode::deserialize_from(&mut *input)?; - ensure!( - header1.signature == *TIL_SECTION_MAGIC, - "Invalid TIL Signature" - ); - - let title = read_string_len_u8(&mut *input)?; - let description = read_string_len_u8(&mut *input)?; - - let header2: TILSectionHeader2 = bincode::deserialize_from(&mut *input)?; - let size_s_l_ll = header1 - .flags - .have_size_short_long_longlong() - .then(|| bincode::deserialize_from(&mut *input)) - .transpose()? - .map(|(s, l, ll): (u8, u8, u8)| -> anyhow::Result<_> { - Ok(TILSizes { - size_short: s.try_into()?, - size_long: l.try_into()?, - size_long_long: ll.try_into()?, - }) - }) - .transpose()?; - let size_long_double = header1 - .flags - .has_size_long_double() - .then(|| bincode::deserialize_from::<_, u8>(&mut *input)) - .transpose()? - .map(|size| size.try_into()) - .transpose()?; - Ok(TILSectionHeader { - format: header1.format, - flags: header1.flags, - title, - description, - id: header2.id, - size_enum: header2.size_enum, - size_i: header2.size_i.try_into()?, - size_b: header2.size_b.try_into()?, - cm: header2.cm, - def_align: header2.def_align, - size_s_l_ll, - size_long_double, - }) - } - - #[cfg(test)] - pub(crate) fn decompress( - input: &mut I, - output: &mut O, - compress: IDBSectionCompression, - ) -> Result<()> { - match compress { - IDBSectionCompression::Zlib => { - let mut input = BufReader::new(flate2::read::ZlibDecoder::new(input)); - Self::decompress_inner(&mut input, output) - } - IDBSectionCompression::None => Self::decompress_inner(input, output), - } - } - - #[cfg(test)] - pub(crate) fn decompress_inner( - input: &mut I, - output: &mut O, - ) -> Result<()> { - let mut header = Self::read_header(&mut *input)?; - let og_flags = header.flags; - // disable the zip flag - header.flags.set_zip(false); - let header1 = TILSectionHeader1 { - signature: *TIL_SECTION_MAGIC, - format: header.format, - flags: header.flags, - }; - let header2 = TILSectionHeader2 { - id: header.id, - cm: header.cm, - size_i: header.size_i.get(), - size_b: header.size_b.get(), - size_enum: header.size_enum, - def_align: header.def_align, - }; - bincode::serialize_into(&mut *output, &header1)?; - crate::write_string_len_u8(&mut *output, &header.title)?; - crate::write_string_len_u8(&mut *output, &header.description)?; - bincode::serialize_into(&mut *output, &header2)?; - header - .size_s_l_ll - .map(|value| { - bincode::serialize_into( - &mut *output, - &( - value.size_short.get(), - value.size_long.get(), - value.size_long_long.get(), - ), - ) - }) - .transpose()?; - header - .size_long_double - .map(|value| bincode::serialize_into(&mut *output, &value)) - .transpose()?; - - // if not zipped, just copy the rest of the data, there is no posible zip - // block inside a bucket - if !og_flags.is_zip() { - std::io::copy(&mut *input, output)?; - return Ok(()); - } - - // symbols - Self::decompress_bucket(&mut *input, &mut *output)?; - let _type_ordinal_numbers: Option = header - .flags - .has_ordinal() - .then(|| -> Result { - let result: u32 = bincode::deserialize_from(&mut *input)?; - bincode::serialize_into(&mut *output, &result)?; - Ok(result) - }) - .transpose()?; - // types - Self::decompress_bucket(&mut *input, &mut *output)?; - // macros - header - .flags - .has_macro_table() - .then(|| Self::decompress_bucket(&mut *input, &mut *output)) - .transpose()?; - - Ok(()) - } -} - -#[derive(Clone, Copy, Debug, Deserialize, Serialize)] -pub struct TILSectionFlag(u32); -impl TILSectionFlag { - pub fn is_zip(&self) -> bool { - self.0 & flag::TIL_ZIP != 0 - } - pub fn set_zip(&mut self, value: bool) { - if value { - self.0 |= flag::TIL_ZIP - } else { - self.0 &= !flag::TIL_ZIP - } - } - pub fn has_macro_table(&self) -> bool { - self.0 & flag::TIL_MAC != 0 - } - /// extended sizeof info (short, long, longlong) - pub fn have_size_short_long_longlong(&self) -> bool { - self.0 & flag::TIL_ESI != 0 - } - /// universal til for any compiler - pub fn is_universal(&self) -> bool { - self.0 & flag::TIL_UNI != 0 - } - /// type ordinal numbers are present - pub fn has_ordinal(&self) -> bool { - self.0 & flag::TIL_ORD != 0 - } - /// type aliases are present - pub fn has_type_aliases(&self) -> bool { - self.0 & flag::TIL_ALI != 0 - } - /// til has been modified, should be saved - pub fn is_mod(&self) -> bool { - self.0 & flag::TIL_MOD != 0 - } - /// til has extra streams - pub fn has_extra_stream(&self) -> bool { - self.0 & flag::TIL_STM != 0 - } - /// sizeof(long double) - pub fn has_size_long_double(&self) -> bool { - self.0 & flag::TIL_SLD != 0 - } -} - -#[derive(Debug, Deserialize, Serialize)] -struct TILBucketRaw { - ndefs: u32, - len: u32, -} - -impl TILSection { - fn read_bucket_header(input: &mut I) -> Result<(u32, u32)> { - let ndefs = bincode::deserialize_from(&mut *input)?; - let len = bincode::deserialize_from(&mut *input)?; - Ok((ndefs, len)) - } - - fn read_bucket_zip_header(input: &mut I) -> Result<(u32, u32, u32)> { - let (ndefs, len) = Self::read_bucket_header(&mut *input)?; - let compressed_len = bincode::deserialize_from(&mut *input)?; - Ok((ndefs, len, compressed_len)) - } - - fn read_bucket( - input: &mut I, - header: &TILSectionHeader, - ) -> Result> { - if header.flags.is_zip() { - Self::read_bucket_zip(&mut *input, &header) - } else { - Self::read_bucket_normal(&mut *input, &header) - } - } - - fn read_bucket_normal( - input: &mut I, - header: &TILSectionHeader, - ) -> Result> { - let (ndefs, len) = Self::read_bucket_header(&mut *input)?; - let mut input = input.take(len.into()); - let type_info = (0..ndefs) - .map(|_| TILTypeInfo::read(&mut input, header)) - .collect::>()?; - ensure!( - input.limit() == 0, - "TypeBucket total data is smaller then expected" - ); - Ok(type_info) - } - - fn read_bucket_zip( - input: &mut I, - header: &TILSectionHeader, - ) -> Result> { - let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; - // make sure the decompressor don't read out-of-bounds - let mut compressed_input = input.take(compressed_len.into()); - let inflate = BufReader::new(flate2::read::ZlibDecoder::new(&mut compressed_input)); - // make sure only the defined size is decompressed - let mut decompressed_input = inflate.take(len.into()); - let type_info = (0..ndefs.try_into().unwrap()) - .map(|_| TILTypeInfo::read(&mut decompressed_input, header)) - .collect::, _>>()?; - // make sure the input was fully consumed - ensure!( - decompressed_input.limit() == 0, - "TypeBucket data is smaller then expected" - ); - ensure!( - compressed_input.limit() == 0, - "TypeBucket compressed data is smaller then expected" - ); - Ok(type_info) - } - - fn read_macros(input: &mut I, header: &TILSectionHeader) -> Result> { - if header.flags.is_zip() { - Self::read_macros_zip(&mut *input) - } else { - Self::read_macros_normal(&mut *input) - } - } - - fn read_macros_normal(input: &mut I) -> Result> { - let (ndefs, len) = Self::read_bucket_header(&mut *input)?; - let mut input = input.take(len.into()); - let type_info = (0..ndefs) - .map(|_| TILMacro::read(&mut input)) - .collect::>()?; - ensure!( - input.limit() == 0, - "TypeBucket macro total data is smaller then expected" - ); - Ok(type_info) - } - - fn read_macros_zip(input: &mut I) -> Result> { - let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; - // make sure the decompressor don't read out-of-bounds - let mut compressed_input = input.take(compressed_len.into()); - let inflate = BufReader::new(flate2::read::ZlibDecoder::new(&mut compressed_input)); - // make sure only the defined size is decompressed - let mut decompressed_input = inflate.take(len.into()); - let type_info = (0..ndefs.try_into().unwrap()) - .map(|_| TILMacro::read(&mut decompressed_input)) - .collect::, _>>()?; - // make sure the input was fully consumed - ensure!( - decompressed_input.limit() == 0, - "TypeBucket macros data is smaller then expected" - ); - ensure!( - compressed_input.limit() == 0, - "TypeBucket macros compressed data is smaller then expected" - ); - Ok(type_info) - } - - #[cfg(test)] - fn decompress_bucket( - input: &mut I, - output: &mut O, - ) -> Result<()> { - let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; - bincode::serialize_into(&mut *output, &TILBucketRaw { len, ndefs })?; - // write the decompressed data - let mut compressed_input = input.take(compressed_len.into()); - let inflate = flate2::read::ZlibDecoder::new(&mut compressed_input); - let mut decompressed_input = inflate.take(len.into()); - std::io::copy(&mut decompressed_input, output)?; - ensure!( - decompressed_input.limit() == 0, - "TypeBucket data is smaller then expected" - ); - ensure!( - compressed_input.limit() == 0, - "TypeBucket compressed data is smaller then expected" - ); - Ok(()) - } -} - -#[derive(Debug, Clone)] -pub struct TILTypeInfo { - _flags: u32, - pub name: String, - pub ordinal: u64, - pub tinfo: Type, - _cmt: String, - _fieldcmts: String, - _sclass: u8, -} - -impl TILTypeInfo { - pub(crate) fn read(input: &mut I, til: &TILSectionHeader) -> Result { - let flags: u32 = bincode::deserialize_from(&mut *input)?; - let name = read_c_string(&mut *input)?; - let is_u64 = (flags >> 31) != 0; - let ordinal = match (til.format, is_u64) { - // formats below 0x12 doesn't have 64 bits ord - (0..=0x11, _) | (_, false) => bincode::deserialize_from::<_, u32>(&mut *input)?.into(), - (_, true) => bincode::deserialize_from(&mut *input)?, - }; - let tinfo_raw = TypeRaw::read(&mut *input, til).context("parsing `TILTypeInfo::tiinfo`")?; - let _info = read_c_string(&mut *input)?; - let cmt = read_c_string(&mut *input)?; - let fields = read_c_string_vec(&mut *input)?; - let fieldcmts = read_c_string(&mut *input)?; - let sclass: u8 = bincode::deserialize_from(&mut *input)?; - - let tinfo = Type::new(til, tinfo_raw, Some(fields))?; - - Ok(Self { - _flags: flags, - name, - ordinal, - tinfo, - _cmt: cmt, - _fieldcmts: fieldcmts, - _sclass: sclass, - }) - } -} - -#[derive(Debug, Clone)] -pub enum Type { - Basic(Basic), - Pointer(Pointer), - Function(Function), - Array(Array), - Typedef(Typedef), - Struct(Struct), - Union(Union), - Enum(Enum), - Bitfield(Bitfield), -} -impl Type { - fn new( - til: &TILSectionHeader, - tinfo_raw: TypeRaw, - fields: Option>, - ) -> Result { - match tinfo_raw { - TypeRaw::Basic(x) => Basic::new(til, x, fields).map(Type::Basic), - TypeRaw::Bitfield(x) => { - if matches!(fields, Some(f) if !f.is_empty()) { - return Err(anyhow!("fields in a Bitfield")); - } - Ok(Type::Bitfield(x)) - } - TypeRaw::Typedef(x) => { - if matches!(fields, Some(f) if !f.is_empty()) { - return Err(anyhow!("fields in a Typedef")); - } - Ok(Type::Typedef(x)) - } - TypeRaw::Pointer(x) => Pointer::new(til, x, fields).map(Type::Pointer), - TypeRaw::Function(x) => Function::new(til, x, fields).map(Type::Function), - TypeRaw::Array(x) => Array::new(til, x, fields).map(Type::Array), - TypeRaw::Struct(x) => Struct::new(til, x, fields).map(Type::Struct), - TypeRaw::Union(x) => Union::new(til, x, fields).map(Type::Union), - TypeRaw::Enum(x) => Enum::new(til, x, fields).map(Type::Enum), - } - } -} - -#[derive(Debug, Clone)] -enum TypeRaw { - Basic(TypeMetadata), - Pointer(PointerRaw), - Function(FunctionRaw), - Array(ArrayRaw), - Typedef(Typedef), - Struct(StructRaw), - Union(UnionRaw), - Enum(EnumRaw), - Bitfield(Bitfield), -} - -impl TypeRaw { - pub fn read(input: &mut I, header: &TILSectionHeader) -> Result { - let metadata = TypeMetadata::read(&mut *input)?; - if metadata.get_base_type_flag().is_typeid_last() - || metadata.get_base_type_flag().is_reserved() - { - return Ok(TypeRaw::Basic(metadata)); - } else if metadata.get_base_type_flag().is_pointer() { - Ok(TypeRaw::Pointer( - PointerRaw::read(input, metadata, header).context("Type::Pointer")?, - )) - } else if metadata.get_base_type_flag().is_function() { - Ok(TypeRaw::Function( - FunctionRaw::read(input, &metadata, header).context("Type::Function")?, - )) - } else if metadata.get_base_type_flag().is_array() { - Ok(TypeRaw::Array( - ArrayRaw::read(input, metadata, header).context("Type::Array")?, - )) - } else if metadata.get_full_type_flag().is_typedef() { - Ok(TypeRaw::Typedef( - Typedef::read(input).context("Type::Typedef")?, - )) - } else if metadata.get_full_type_flag().is_union() { - Ok(TypeRaw::Union( - UnionRaw::read(input, header).context("Type::Union")?, - )) - } else if metadata.get_full_type_flag().is_struct() { - Ok(TypeRaw::Struct( - StructRaw::read(input, header).context("Type::Struct")?, - )) - } else if metadata.get_full_type_flag().is_enum() { - Ok(TypeRaw::Enum( - EnumRaw::read(input, header).context("Type::Enum")?, - )) - } else if metadata.get_base_type_flag().is_bitfield() { - Ok(TypeRaw::Bitfield( - Bitfield::read(input, metadata).context("Type::Bitfield")?, - )) - } else { - todo!(); - //Ok(Type::Unknown(read_c_string_raw(input)?)) - } - } - - pub fn read_ref(input: &mut I, header: &TILSectionHeader) -> Result { - let mut bytes = read_dt_bytes(&mut *input)?; - - if !bytes.starts_with(b"=") { - let dt = serialize_dt(bytes.len().try_into().unwrap())?; - bytes = [b'='].into_iter().chain(dt).chain(bytes).collect(); - } - - let mut bytes = &bytes[..]; - let result = TypeRaw::read(&mut bytes, header)?; - if !bytes.is_empty() { - return Err(anyhow!("Unable to fully parser Type ref")); - } - Ok(result) - } -} - -#[derive(Debug, Clone, Copy)] -pub enum Basic { - Void, - // NOTE Unknown with 0 bytes is NOT the same as Void - Unknown { - bytes: u8, - }, - - Bool { - bytes: NonZeroU8, - }, - Char, - SegReg, - Int { - bytes: NonZeroU8, - is_signed: Option, - }, - Float { - bytes: NonZeroU8, - }, -} - -impl Basic { - fn new( - til: &TILSectionHeader, - mdata: TypeMetadata, - fields: Option>, - ) -> Result { - const fn bytes(bytes: u8) -> NonZeroU8 { - if bytes == 0 { - unreachable!() - } - unsafe { NonZeroU8::new_unchecked(bytes) } - } - if let Some(fields) = fields { - ensure!(fields.is_empty(), "Unset with fields"); - } - let bt = mdata.get_base_type_flag().0; - let btmt = mdata.get_type_flag().0; - use flag::{tf_bool::*, tf_float::*, tf_int::*, tf_unk::*}; - match bt { - BT_VOID => { - let bytes = match btmt { - // special case, void - BTMT_SIZE0 => return Ok(Self::Void), - BTMT_SIZE12 => 1, - BTMT_SIZE48 => 4, - BTMT_SIZE128 => 16, - _ => unreachable!(), - }; - Ok(Self::Unknown { bytes }) - } - BT_UNK => { - let bytes = match btmt { - BTMT_SIZE0 => return Err(anyhow!("forbidden use of BT_UNK")), - BTMT_SIZE12 => 2, - BTMT_SIZE48 => 8, - BTMT_SIZE128 => 0, - _ => unreachable!(), - }; - Ok(Self::Unknown { bytes }) - } - - bt_int @ BT_INT8..=BT_INT => { - let is_signed = match btmt { - BTMT_UNKSIGN => None, - BTMT_SIGNED => Some(true), - BTMT_UNSIGNED => Some(false), - // special case for char - BTMT_CHAR => match bt_int { - BT_INT8 => return Ok(Self::Char), - BT_INT => return Ok(Self::SegReg), - _ => { - return Err(anyhow!("Reserved use of tf_int::BTMT_CHAR {:x}", mdata.0)) - } - }, - _ => unreachable!(), - }; - let bytes = match bt_int { - BT_INT8 => bytes(1), - BT_INT16 => bytes(2), - BT_INT32 => bytes(4), - BT_INT64 => bytes(8), - BT_INT128 => bytes(16), - BT_INT => til.size_i, - _ => unreachable!(), - }; - Ok(Self::Int { bytes, is_signed }) - } - - BT_BOOL => { - let bytes = match btmt { - BTMT_DEFBOOL => til.size_b, - BTMT_BOOL1 => bytes(1), - BTMT_BOOL4 => bytes(4), - // TODO get the inf_is_64bit field - //BTMT_BOOL2 if !inf_is_64bit => Some(bytes(2)), - //BTMT_BOOL8 if inf_is_64bit => Some(bytes(8)), - BTMT_BOOL8 => bytes(2), // delete this - _ => unreachable!(), - }; - Ok(Self::Bool { bytes }) - } - - BT_FLOAT => { - let bytes = match btmt { - BTMT_FLOAT => bytes(4), - BTMT_DOUBLE => bytes(8), - // TODO error if none? - BTMT_LNGDBL => til.size_long_double.unwrap_or(bytes(8)), - // TODO find the tbyte_size field - //BTMT_SPECFLT if til.tbyte_size() => Some(bytes), - BTMT_SPECFLT => bytes(2), - _ => unreachable!(), - }; - Ok(Self::Float { bytes }) - } - _ => Err(anyhow!("Unkown Unset Type {}", mdata.0)), - } - } -} - -#[derive(Debug, Clone)] -pub struct Pointer { - pub closure: Option, - pub tah: TAH, - pub typ: Box, -} - -impl Pointer { - fn new(til: &TILSectionHeader, raw: PointerRaw, fields: Option>) -> Result { - Ok(Self { - closure: raw.closure.map(|x| Closure::new(til, x)).transpose()?, - tah: raw.tah, - typ: Type::new(til, *raw.typ, fields).map(Box::new)?, - }) - } -} - -#[derive(Debug, Clone)] -pub enum Closure { - Closure(Box), - PointerBased(u8), -} - -impl Closure { - fn new(til: &TILSectionHeader, raw: ClosureRaw) -> Result { - match raw { - ClosureRaw::Closure(c) => Type::new(til, *c, None).map(Box::new).map(Self::Closure), - ClosureRaw::PointerBased(p) => Ok(Self::PointerBased(p)), - } - } -} - -#[derive(Debug, Clone)] -struct PointerRaw { - pub closure: Option, - pub tah: TAH, - pub typ: Box, -} - -#[derive(Debug, Clone)] -enum ClosureRaw { - Closure(Box), - PointerBased(u8), -} - -impl PointerRaw { - fn read( - input: &mut I, - metadata: TypeMetadata, - header: &TILSectionHeader, - ) -> Result { - let closure = metadata - .get_type_flag() - .is_type_closure() - .then(|| ClosureRaw::read(&mut *input, header)) - .transpose()?; - let tah = TAH::read(&mut *input)?; - let typ = TypeRaw::read(&mut *input, header)?; - Ok(Self { - closure, - tah, - typ: Box::new(typ), - }) - } -} - -impl ClosureRaw { - fn read(input: &mut I, header: &TILSectionHeader) -> Result { - let closure_type: u8 = bincode::deserialize_from(&mut *input)?; - if closure_type == 0xFF { - let closure = TypeRaw::read(&mut *input, header)?; - Ok(Self::Closure(Box::new(closure))) - } else { - let closure_ptr = bincode::deserialize_from(&mut *input)?; - Ok(Self::PointerBased(closure_ptr)) - } - } -} - -#[derive(Debug, Clone)] -pub struct Function { - pub ret: Box, - pub args: Vec<(Option, Type, Option)>, - pub retloc: Option, -} -impl Function { - fn new( - til: &TILSectionHeader, - value: FunctionRaw, - fields: Option>, - ) -> Result { - let args = associate_field_name_and_member(fields, value.args) - .context("Function")? - .map(|(n, (t, a))| Type::new(til, t, None).map(|t| (n, t, a))) - .collect::>()?; - Ok(Self { - ret: Type::new(til, *value.ret, None).map(Box::new)?, - args, - retloc: value.retloc, - }) - } -} - -#[derive(Debug, Clone)] -struct FunctionRaw { - pub ret: Box, - pub args: Vec<(TypeRaw, Option)>, - pub retloc: Option, -} - -#[derive(Debug, Clone)] -pub enum ArgLoc { - // TODO add those to flags - // ::ALOC_STACK - // ::ALOC_STATIC - // ::ALOC_REG1 - // ::ALOC_REG2 - // ::ALOC_RREL - // ::ALOC_DIST - // ::ALOC_CUSTOM - /// 0 - None - None, - /// 1 - stack offset - Stack(u32), - /// 2 - distributed (scattered) - Dist(Vec), - /// 3 - one register (and offset within it) - Reg1(u32), - /// 4 - register pair - Reg2(u32), - /// 5 - register relative - RRel { reg: u16, off: u32 }, - /// 6 - global address - Static(u32), - // 7..=0xf custom - // TODO is possible to know the custom impl len? -} - -#[derive(Debug, Clone)] -pub struct ArgLocDist { - pub info: u16, - pub off: u16, - pub size: u16, -} - -impl FunctionRaw { - fn read( - input: &mut I, - metadata: &TypeMetadata, - header: &TILSectionHeader, - ) -> Result { - // TODO what is that? - let mut flags = metadata.get_type_flag().0 << 2; - - let cc = Self::read_cc(&mut *input, &mut flags)?; - - let _tah = TAH::read(&mut *input)?; - let ret = TypeRaw::read(&mut *input, header)?; - let have_retloc = cc.get_calling_convention().is_special_pe() - && !matches!(&ret, TypeRaw::Basic(mdata) if mdata.get_full_type_flag().is_void()); - let retloc = have_retloc.then(|| ArgLoc::read(&mut *input)).transpose()?; - if cc.get_calling_convention().is_void_arg() { - return Ok(Self { - ret: Box::new(ret), - args: vec![], - retloc, - }); - } - - let n = read_dt(&mut *input)?; - let is_special_pe = cc.get_calling_convention().is_special_pe(); - let args = (0..n) - .map(|_| -> Result<_> { - let tmp = input.fill_buf()?.get(0).copied(); - if tmp == Some(0xFF) { - // TODO what is this? - let _tmp: u8 = bincode::deserialize_from(&mut *input)?; - let _flags = read_de(&mut *input)?; - } - let tinfo = TypeRaw::read(&mut *input, header)?; - let argloc = is_special_pe - .then(|| ArgLoc::read(&mut *input)) - .transpose()?; - - Ok((tinfo, argloc)) - }) - .collect::>()?; - - Ok(Self { - ret: Box::new(ret), - args, - retloc, - }) - } - - fn read_cc(input: &mut I, flags: &mut u8) -> Result { - let mut cm = TypeMetadata::read(&mut *input)?; - if !cm.get_calling_convention().is_spoiled() { - return Ok(cm); - } - // TODO find what to do with this spoiled and flags stuff - let mut _spoiled = vec![]; - loop { - // TODO create flags::CM_CC_MASK - let nspoiled = cm.0 & !0xf0; - if nspoiled == 0xF { - let b: u8 = bincode::deserialize_from(&mut *input)?; - *flags |= (b & 0x1F) << 1; - } else { - for _ in 0..nspoiled { - let b: u8 = bincode::deserialize_from(&mut *input)?; - let (size, reg) = if b & 0x80 != 0 { - let size: u8 = bincode::deserialize_from(&mut *input)?; - let reg = b & 0x7F; - (size, reg) - } else { - ensure!(b > 1, "Unable to solve register from a spoiled function"); - let size = (b >> 4) + 1; - let reg = (b & 0xF) - 1; - (size, reg) - }; - _spoiled.push((size, reg)); - } - *flags |= 1; - } - - cm = TypeMetadata::read(&mut *input)?; - if !cm.get_calling_convention().is_spoiled() { - return Ok(cm); - } - } - } -} - -impl ArgLoc { - fn read(input: &mut I) -> Result { - let t: u8 = bincode::deserialize_from(&mut *input)?; - if t != 0xFF { - let b = t & 0x7F; - match (t, b) { - (0..=0x80, 1..) => Ok(Self::Reg1((b - 1).into())), - (0..=0x80, 0) => Ok(Self::Stack(0)), - _ => { - let c: u8 = bincode::deserialize_from(&mut *input)?; - if c == 0 { - Ok(Self::None) - } else { - Ok(Self::Reg2(u32::from(b) | u32::from(c - 1) << 16)) - } - } - } - } else { - let typ = read_dt(&mut *input)?; - match typ & 0xF { - 0 => Ok(Self::None), - 1 => { - let sval = read_de(&mut *input)?; - Ok(Self::Stack(sval)) - } - 2 => { - let n = (typ >> 5) & 0x7; - let dist: Vec<_> = (0..n) - .map(|_| { - let info = read_dt(&mut *input)?; - let off = read_dt(&mut *input)?; - let size = read_dt(&mut *input)?; - Ok(ArgLocDist { info, off, size }) - }) - .collect::>()?; - Ok(Self::Dist(dist)) - } - 3 => { - let reg_info = read_dt(&mut *input)?; - // TODO read other dt? - Ok(Self::Reg1(reg_info.into())) - } - 4 => { - let reg_info = read_dt(&mut *input)?; - // TODO read other dt? - Ok(Self::Reg2(reg_info.into())) - } - 5 => { - let reg = read_dt(&mut *input)?; - let off = read_de(&mut *input)?; - Ok(Self::RRel { reg, off }) - } - 6 => { - let sval = read_de(&mut *input)?; - Ok(Self::Static(sval)) - } - 0x7..=0xF => todo!("Custom implementation for ArgLoc"), - _ => unreachable!(), - } - } - } -} - -#[derive(Clone, Debug)] -pub struct Array { - pub base: u8, - pub nelem: u16, - pub tah: TAH, - pub elem_type: Box, -} -impl Array { - fn new(til: &TILSectionHeader, value: ArrayRaw, fields: Option>) -> Result { - if matches!(&fields, Some(f) if !f.is_empty()) { - return Err(anyhow!("fields in a Array")); - } - Ok(Self { - base: value.base, - nelem: value.nelem, - tah: value.tah, - elem_type: Type::new(til, *value.elem_type, None).map(Box::new)?, - }) - } -} - -#[derive(Clone, Debug)] -struct ArrayRaw { - pub base: u8, - pub nelem: u16, - pub tah: TAH, - pub elem_type: Box, -} - -impl ArrayRaw { - fn read( - input: &mut I, - metadata: TypeMetadata, - header: &TILSectionHeader, - ) -> Result { - let (base, nelem) = if metadata.get_type_flag().is_non_based() { - let nelem = read_dt(&mut *input)?; - (0, nelem) - } else { - let (base, nelem) = read_da(&mut *input)?; - (base, nelem.into()) - }; - let tah = TAH::read(&mut *input)?; - let elem_type = TypeRaw::read(&mut *input, header)?; - Ok(ArrayRaw { - base, - nelem, - tah, - elem_type: Box::new(elem_type), - }) - } -} - -#[derive(Clone, Debug)] -pub enum Typedef { - Ordinal(u32), - Name(String), -} - -impl Typedef { - fn read(input: &mut I) -> Result { - let buf = read_dt_bytes(&mut *input)?; - match &buf[..] { - [b'#', data @ ..] => { - let mut tmp = &data[..]; - let de = read_de(&mut tmp)?; - if !tmp.is_empty() { - return Err(anyhow!("Typedef Ordinal with more data then expected")); - } - Ok(Typedef::Ordinal(de)) - } - _ => Ok(Typedef::Name(String::from_utf8(buf)?)), - } - } -} - -#[derive(Clone, Debug)] -pub enum Struct { - Ref { - ref_type: Box, - taudt_bits: SDACL, - }, - NonRef { - effective_alignment: u16, - taudt_bits: SDACL, - members: Vec, - }, -} -impl Struct { - fn new(til: &TILSectionHeader, value: StructRaw, fields: Option>) -> Result { - match value { - StructRaw::Ref { - ref_type, - taudt_bits, - } => { - if matches!(&fields, Some(f) if !f.is_empty()) { - return Err(anyhow!("fields in a Ref Struct")); - } - Ok(Struct::Ref { - ref_type: Type::new(til, *ref_type, None).map(Box::new)?, - taudt_bits, - }) - } - StructRaw::NonRef { - effective_alignment, - taudt_bits, - members, - } => { - let members = associate_field_name_and_member(fields, members) - .context("Struct")? - .map(|(n, m)| StructMember::new(til, n, m)) - .collect::>()?; - Ok(Struct::NonRef { - effective_alignment, - taudt_bits, - members, - }) - } - } - } -} - -#[derive(Clone, Debug)] -enum StructRaw { - Ref { - ref_type: Box, - taudt_bits: SDACL, - }, - NonRef { - effective_alignment: u16, - taudt_bits: SDACL, - members: Vec, - }, -} - -impl StructRaw { - fn read(input: &mut I, header: &TILSectionHeader) -> Result { - let Some(n) = read_dt_de(&mut *input)? else { - // simple reference - let ref_type = TypeRaw::read_ref(&mut *input, header)?; - let taudt_bits = SDACL::read(&mut *input)?; - return Ok(Self::Ref { - ref_type: Box::new(ref_type), - taudt_bits, - }); - }; - - let alpow = n & 7; - let mem_cnt = n >> 3; - let effective_alignment = if alpow == 0 { 0 } else { 1 << (alpow - 1) }; - let taudt_bits = SDACL::read(&mut *input)?; - let members = (0..mem_cnt) - .map(|_| StructMemberRaw::read(&mut *input, header)) - .collect::>()?; - Ok(Self::NonRef { - effective_alignment, - taudt_bits, - members, - }) - } -} - -#[derive(Clone, Debug)] -pub enum Union { - Ref { - ref_type: Box, - taudt_bits: SDACL, - }, - NonRef { - taudt_bits: SDACL, - effective_alignment: u16, - members: Vec<(Option, Type)>, - }, -} -impl Union { - fn new(til: &TILSectionHeader, value: UnionRaw, fields: Option>) -> Result { - match value { - UnionRaw::Ref { - ref_type, - taudt_bits, - } => { - if matches!(fields, Some(f) if !f.is_empty()) { - return Err(anyhow!("fields in a Ref Union")); - } - Ok(Union::Ref { - ref_type: Type::new(til, *ref_type, None).map(Box::new)?, - taudt_bits, - }) - } - UnionRaw::NonRef { - taudt_bits, - effective_alignment, - members, - } => { - let members = associate_field_name_and_member(fields, members) - .context("Union")? - .map(|(n, m)| Type::new(til, m, None).map(|m| (n, m))) - .collect::>()?; - Ok(Union::NonRef { - taudt_bits, - effective_alignment, - members, - }) - } - } - } -} - -// TODO struct and union are basically identical, the diff is that member in union don't have SDACL, -// merge both -#[derive(Clone, Debug)] -enum UnionRaw { - Ref { - ref_type: Box, - taudt_bits: SDACL, - }, - NonRef { - taudt_bits: SDACL, - effective_alignment: u16, - members: Vec, - }, -} - -impl UnionRaw { - fn read(input: &mut I, header: &TILSectionHeader) -> Result { - let Some(n) = read_dt_de(&mut *input)? else { - // is ref - let ref_type = TypeRaw::read_ref(&mut *input, header)?; - let taudt_bits = SDACL::read(&mut *input)?; - return Ok(Self::Ref { - ref_type: Box::new(ref_type), - taudt_bits, - }); - }; - let alpow = n & 7; - let mem_cnt = n >> 3; - let effective_alignment = if alpow == 0 { 0 } else { 1 << (alpow - 1) }; - let taudt_bits = SDACL::read(&mut *input)?; - let members = (0..mem_cnt) - .map(|_| TypeRaw::read(&mut *input, header)) - .collect::>()?; - Ok(Self::NonRef { - effective_alignment, - taudt_bits, - members, - }) - } -} - -#[derive(Clone, Debug)] -pub enum Enum { - Ref { - ref_type: Box, - taenum_bits: TypeAttribute, - }, - NonRef { - group_sizes: Vec, - taenum_bits: TypeAttribute, - bte: u8, - members: Vec<(Option, u64)>, - bytesize: u64, - }, -} -impl Enum { - fn new(til: &TILSectionHeader, value: EnumRaw, fields: Option>) -> Result { - match value { - EnumRaw::Ref { - ref_type, - taenum_bits, - } => { - if matches!(&fields, Some(f) if !f.is_empty()) { - return Err(anyhow!("fields in a Ref Enum")); - } - Ok(Enum::Ref { - ref_type: Type::new(til, *ref_type, None).map(Box::new)?, - taenum_bits, - }) - } - EnumRaw::NonRef { - group_sizes, - taenum_bits, - bte, - members, - bytesize, - } => { - let members = associate_field_name_and_member(fields, members) - .context("Enum")? - .map(|(n, f)| (n, f)) - .collect(); - Ok(Enum::NonRef { - group_sizes, - taenum_bits, - bte, - members, - bytesize, - }) - } - } - } -} - -#[derive(Clone, Debug)] -enum EnumRaw { - Ref { - ref_type: Box, - taenum_bits: TypeAttribute, - }, - NonRef { - group_sizes: Vec, - taenum_bits: TypeAttribute, - bte: u8, - members: Vec, - bytesize: u64, - }, -} - -impl EnumRaw { - fn read(input: &mut I, header: &TILSectionHeader) -> Result { - let Some(n) = read_dt_de(&mut *input)? else { - // is ref - let ref_type = TypeRaw::read_ref(&mut *input, header)?; - let taenum_bits = SDACL::read(&mut *input)?.0; - return Ok(EnumRaw::Ref { - ref_type: Box::new(ref_type), - taenum_bits, - }); - }; - - let taenum_bits = TAH::read(&mut *input)?.0; - let bte = bincode::deserialize_from(&mut *input)?; - let mut cur: u64 = 0; - let emsize = bte & flag::tf_enum::BTE_SIZE_MASK; - let bytesize: u32 = match emsize { - 0 if header.size_enum != 0 => header.size_enum.into(), - 0 => return Err(anyhow!("BTE emsize is 0 without header")), - 5 | 6 | 7 => return Err(anyhow!("BTE emsize with reserved values")), - _ => 1u32 << (emsize - 1), - }; - - let mask: u64 = if bytesize >= 16 { - // is saturating valid? - //u64::MAX - return Err(anyhow!("Bytes size is too big")); - } else { - u64::MAX >> (u64::BITS - (bytesize * 8)) - }; - - let mut group_sizes = vec![]; - let mut members = vec![]; - for _ in 0..n { - let lo: u64 = read_de(&mut *input)?.into(); - let is_64 = (taenum_bits.0 & 0x0020) != 0; - let step = if is_64 { - let hi: u64 = read_de(&mut *input)?.into(); - (lo | (hi << 32)) & mask - } else { - lo & mask - }; - // TODO: subarrays - // https://www.hex-rays.com/products/ida/support/sdkdoc/group__tf__enum.html#ga9ae7aa54dbc597ec17cbb17555306a02 - if (bte & flag::tf_enum::BTE_BITFIELD) != 0 { - let group_size = read_dt(&mut *input)?; - group_sizes.push(group_size); - } - // TODO check is this is wrapping by default - let next_step = cur.wrapping_add(step); - cur = next_step; - members.push(cur); - } - return Ok(EnumRaw::NonRef { - group_sizes, - taenum_bits, - bte, - members, - bytesize: bytesize.into(), - }); - } -} - -#[derive(Debug, Clone)] -pub struct Bitfield { - pub unsigned: bool, - pub width: u16, - pub nbytes: i32, -} - -impl Bitfield { - fn read(input: &mut I, metadata: TypeMetadata) -> Result { - let nbytes = 1 << (metadata.get_type_flag().0 >> 4); - let dt = read_dt(&mut *input)?; - let width = dt >> 1; - let unsigned = (dt & 1) > 0; - let _tag = TAH::read(&mut *input)?; - Ok(Self { - unsigned, - width, - nbytes, - }) - } -} - -#[derive(Clone, Debug)] -pub struct StructMember { - pub name: Option, - pub member_type: Type, - pub sdacl: SDACL, -} - -impl StructMember { - fn new(til: &TILSectionHeader, name: Option, m: StructMemberRaw) -> Result { - Ok(Self { - name, - member_type: Type::new(til, m.0, None)?, - sdacl: m.1, - }) - } -} -#[derive(Clone, Debug)] -struct StructMemberRaw(pub TypeRaw, pub SDACL); -impl StructMemberRaw { - fn read(input: &mut I, header: &TILSectionHeader) -> Result { - let member_type = TypeRaw::read(&mut *input, header)?; - let sdacl = SDACL::read(&mut *input)?; - Ok(Self(member_type, sdacl)) - } -} - -#[derive(Debug, Clone)] -pub struct TILMacro { - pub name: String, - pub value: String, -} - -impl TILMacro { - fn read(input: &mut I) -> Result { - let name = read_c_string(&mut *input)?; - // TODO find what this is - let _flag: u16 = bincode::deserialize_from(&mut *input)?; - let value = read_c_string(&mut *input)?; - Ok(Self { name, value }) - } -} - -#[derive(Clone, Default, Debug)] -pub struct TypeMetadata(pub u8); -impl TypeMetadata { - fn new(value: u8) -> Self { - // TODO check for invalid values - Self(value) - } - fn read(input: I) -> Result { - Ok(Self::new(bincode::deserialize_from(input)?)) - } -} - -// TODO make those inner fields into enums or private -#[derive(Clone, Copy, Debug)] -pub struct BaseTypeFlag(pub u8); -#[derive(Clone, Copy, Debug)] -pub struct FullTypeFlag(pub u8); -#[derive(Clone, Copy, Debug)] -pub struct TypeFlag(pub u8); -#[derive(Clone, Copy, Debug)] -pub struct CallingConventionFlag(pub u8); - -#[derive(Clone, Copy, Debug)] -pub struct TypeAttribute(pub u16); -impl TypeAttribute { - fn read(input: &mut I) -> Result { - let mut val: u16 = 0; - let tah: u8 = bincode::deserialize_from(&mut *input)?; - let tmp = ((tah & 1) | ((tah >> 3) & 6)) + 1; - if tah == 0xFE || tmp == 8 { - if tmp == 8 { - val = tmp as u16; - } - let mut shift = 0; - loop { - let next_byte: u8 = bincode::deserialize_from(&mut *input)?; - if next_byte == 0 { - return Err(anyhow!("Failed to parse TypeAttribute")); - } - val |= ((next_byte & 0x7F) as u16) << shift; - if next_byte & 0x80 == 0 { - break; - } - shift += 7; - } - } - if (val & 0x0010) > 0 { - val = read_dt(&mut *input)?; - for _ in 0..val { - let _string = read_dt_string(&mut *input)?; - let another_de = read_dt(&mut *input)?; - let mut other_string = vec![0; another_de.into()]; - input.read_exact(&mut other_string)?; - } - } - Ok(TypeAttribute(val)) - } -} - -#[derive(Clone, Copy, Debug)] -pub struct TAH(pub TypeAttribute); -impl TAH { - fn read(input: &mut I) -> Result { - let Some(tah) = input.fill_buf()?.get(0).copied() else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on DA" - ))); - }; - if tah == 0xFE { - Ok(Self(TypeAttribute::read(input)?)) - } else { - Ok(Self(TypeAttribute(0))) - } - } -} - -#[derive(Clone, Copy, Debug)] -pub struct SDACL(pub TypeAttribute); -impl SDACL { - fn read(input: &mut I) -> Result { - let Some(sdacl) = input.fill_buf()?.get(0).copied() else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on SDACL" - ))); - }; - if ((sdacl & !0x30) ^ 0xC0) <= 0x01 { - Ok(Self(TypeAttribute::read(input)?)) - } else { - Ok(Self(TypeAttribute(0))) - } - } -} - -impl CallingConventionFlag { - fn is_spoiled(&self) -> bool { - self.0 == 0xA0 - } - - fn is_void_arg(&self) -> bool { - self.0 == 0x20 - } - - fn is_special_pe(&self) -> bool { - self.0 == 0xD0 || self.0 == 0xE0 || self.0 == 0xF0 - } -} - -impl TypeMetadata { - pub fn get_base_type_flag(&self) -> BaseTypeFlag { - BaseTypeFlag(self.0 & flag::tf_mask::TYPE_BASE_MASK) - } - - pub fn get_full_type_flag(&self) -> FullTypeFlag { - FullTypeFlag(self.0 & flag::tf_mask::TYPE_FULL_MASK) - } - - pub fn get_type_flag(&self) -> TypeFlag { - TypeFlag(self.0 & flag::tf_mask::TYPE_FLAGS_MASK) - } - - pub fn get_calling_convention(&self) -> CallingConventionFlag { - CallingConventionFlag(self.0 & 0xF0) - } -} - -impl TypeFlag { - fn is_non_based(&self) -> bool { - self.0 == 0x10 - } - - pub fn is_unsigned(&self) -> bool { - self.0 == 0x20 - } - - pub fn is_signed(&self) -> bool { - !self.is_unsigned() - } - - fn is_type_closure(&self) -> bool { - self.0 == flag::tf_ptr::BTMT_CLOSURE - } -} - -impl FullTypeFlag { - fn is_enum(&self) -> bool { - self.0 == flag::tf_shortcuts::BTF_ENUM - } - - fn is_void(&self) -> bool { - self.0 == flag::tf_shortcuts::BTF_VOID - } - - fn is_struct(&self) -> bool { - self.0 == flag::tf_shortcuts::BTF_STRUCT - } - - fn is_union(&self) -> bool { - self.0 == flag::tf_shortcuts::BTF_UNION - } - - fn is_typedef(&self) -> bool { - self.0 == flag::tf_shortcuts::BTF_TYPEDEF - } -} - -impl BaseTypeFlag { - fn is_pointer(&self) -> bool { - self.0 == flag::tf_ptr::BT_PTR - } - - fn is_function(&self) -> bool { - self.0 == flag::tf_func::BT_FUNC - } - - fn is_array(&self) -> bool { - self.0 == flag::tf_array::BT_ARRAY - } - - fn is_bitfield(&self) -> bool { - self.0 == flag::tf_complex::BT_BITFIELD - } - - fn is_typeid_last(&self) -> bool { - self.0 <= flag::tf_last_basic::BT_LAST_BASIC - } - - fn is_reserved(&self) -> bool { - self.0 == flag::BT_RESERVED - } -} - -fn read_dt_bytes(input: &mut I) -> Result> { - let buf_len = read_dt(&mut *input)?; - let mut buf = vec![0; buf_len.into()]; - input.read_exact(&mut buf)?; - Ok(buf) -} - -fn read_dt_string(input: &mut I) -> Result { - let buf = read_dt_bytes(input)?; - Ok(String::from_utf8(buf)?) -} - -/// Reads 1 to 5 bytes -/// Value Range: 0-0xFFFFFFFF -/// Usage: Enum Deltas -fn read_de(input: &mut I) -> std::io::Result { - let mut val: u32 = 0; - for _ in 0..5 { - let mut hi = val << 6; - let mut b = [0; 1]; - input.read_exact(&mut b)?; - let b: u32 = b[0].into(); - let sign = b & 0x80; - if sign == 0 { - let lo = b & 0x3F; - val = lo | hi; - return Ok(val); - } else { - let lo = 2 * hi; - hi = b & 0x7F; - val = lo | hi; - } - } - Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Can't find the end of DE", - )) -} - -/// Reads 1 or 2 bytes. -/// Value Range: 0-0xFFFE -/// Usage: 16bit numbers -fn read_dt(input: &mut I) -> std::io::Result { - let mut value = [0u8; 1]; - input.read_exact(&mut value)?; - let value = value[0].into(); - - let value = match value { - 0 => { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "DT can't have 0 value", - )) - } - //SEG = 2 - value if value & 0x80 != 0 => { - let mut iter = [0u8; 1]; - input.read_exact(&mut iter)?; - let inter: u16 = iter[0].into(); - value & 0x7F | inter << 7 - } - //SEG = 1 - _ => value, - }; - Ok(value - 1) -} - -fn serialize_dt(value: u16) -> Result> { - if value > 0x7FFE { - return Err(anyhow!("Invalid value for DT")); - } - let lo = value + 1; - let mut hi = value + 1; - let mut result: Vec = Vec::with_capacity(2); - if lo > 127 { - result.push((lo & 0x7F | 0x80) as u8); - hi = (lo >> 7) & 0xFF; - } - result.push(hi as u8); - Ok(result) -} - -/// Reads 1 to 9 bytes. -/// ValueRange: 0-0x7FFFFFFF, 0-0xFFFFFFFF -/// Usage: Arrays -fn read_da(input: &mut I) -> Result<(u8, u8)> { - let mut a = 0; - let mut b = 0; - let mut da = 0; - let mut base = 0; - let mut nelem = 0; - // TODO check no more then 9 bytes are read - loop { - let Some(typ) = input.fill_buf()?.get(0).copied() else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on DA" - ))); - }; - if typ & 0x80 == 0 { - break; - } - input.consume(1); - - da = (da << 7) | typ & 0x7F; - b += 1; - if b >= 4 { - let z: u8 = bincode::deserialize_from(&mut *input)?; - if z != 0 { - base = 0x10 * da | z & 0xF - } - nelem = (z >> 4) & 7; - loop { - let Some(y) = input.fill_buf()?.get(0).copied() else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on DA" - ))); - }; - if (y & 0x80) == 0 { - break; - } - input.consume(1); - nelem = (nelem << 7) | y & 0x7F; - a += 1; - if a >= 4 { - return Ok((nelem, base)); - } - } - } - } - return Ok((nelem, base)); -} - -/// Reads 2 to 7 bytes. -/// Value Range: Nothing or 0-0xFFFF_FFFF -/// Usage: some kind of size -fn read_dt_de(input: &mut I) -> std::io::Result> { - match read_dt(&mut *input)? { - 0 => Ok(None), - 0x7FFE => read_de(&mut *input).map(Some), - n => Ok(Some(n.into())), - } -} - -fn associate_field_name_and_member( - fields: Option>, - members: Vec, -) -> Result, T)>> { - let fields_len: usize = fields.iter().filter(|t| !t.is_empty()).count(); - ensure!(fields_len <= members.len(), "More fields then members"); - // allow to have less fields then members, first fields will have names, others not - Ok(fields - .into_iter() - .flat_map(Vec::into_iter) - .map(Option::Some) - .chain(std::iter::repeat(None)) - .into_iter() - .zip(members)) -} diff --git a/rust/examples/idb/idb_import/Cargo.toml b/rust/examples/idb_import/Cargo.toml similarity index 67% rename from rust/examples/idb/idb_import/Cargo.toml rename to rust/examples/idb_import/Cargo.toml index dd3e7550b..c54ba93bd 100644 --- a/rust/examples/idb/idb_import/Cargo.toml +++ b/rust/examples/idb_import/Cargo.toml @@ -9,6 +9,6 @@ crate-type = ["cdylib"] [dependencies] anyhow = "1.0.86" -binaryninja = { path = "../../../" } -idb-rs = { path = "../shared" } +binaryninja = { path = "../../" } +idb-rs = { git = "https://github.com/Vector35/idb-rs" } log = "0.4.20" diff --git a/rust/examples/idb/idb_import/src/lib.rs b/rust/examples/idb_import/src/lib.rs similarity index 98% rename from rust/examples/idb/idb_import/src/lib.rs rename to rust/examples/idb_import/src/lib.rs index e8fa6d066..7b9026fc3 100644 --- a/rust/examples/idb/idb_import/src/lib.rs +++ b/rust/examples/idb_import/src/lib.rs @@ -190,21 +190,11 @@ fn translate_enum(members: &[(Option, u64)], bytesize: u64) -> Ref fn translate_basic(mdata: &idb_rs::til::Basic) -> Ref { match *mdata { idb_rs::til::Basic::Void => Type::void(), - idb_rs::til::Basic::Unknown { bytes } => { - if bytes != 0 { - Type::array(&Type::char(), bytes.into()) - } else { - Type::void() - } - } - idb_rs::til::Basic::Bool { bytes } => { - if bytes.get() > 1 { - // NOTE Binja don't have any representation for bool other then the default - Type::int(bytes.get().into(), false) - } else { - Type::bool() - } - } + idb_rs::til::Basic::Unknown { bytes: 0 } => Type::void(), + idb_rs::til::Basic::Unknown { bytes } => Type::array(&Type::char(), bytes.into()), + idb_rs::til::Basic::Bool { bytes } if bytes.get() == 1 => Type::bool(), + // NOTE Binja don't have any representation for bool other then the default + idb_rs::til::Basic::Bool { bytes } => Type::int(bytes.get().into(), false), idb_rs::til::Basic::Char => Type::char(), // TODO what exacly is Segment Register? idb_rs::til::Basic::SegReg => Type::char(),