From 694bbc793f82cd5c867f830f822dbf9cba27fa37 Mon Sep 17 00:00:00 2001 From: Rubens Brandao Date: Wed, 14 Aug 2024 18:12:52 -0300 Subject: [PATCH] implement rust idb_dump --- rust/Cargo.lock | 65 +- rust/Cargo.toml | 2 + rust/examples/idb/idb_import/Cargo.toml | 14 + rust/examples/idb/idb_import/src/lib.rs | 777 ++++++++ rust/examples/idb/shared/Cargo.toml | 12 + rust/examples/idb/shared/resources/Readme.md | 2 + .../resources/idbs/idb_and_i64_files_here | 0 .../idb/shared/resources/tils/til_files_here | 0 rust/examples/idb/shared/src/lib.rs | 434 +++++ rust/examples/idb/shared/src/main.rs | 11 + rust/examples/idb/shared/src/test.rs | 92 + rust/examples/idb/shared/src/til/flag.rs | 432 +++++ rust/examples/idb/shared/src/til/mod.rs | 1707 +++++++++++++++++ 13 files changed, 3536 insertions(+), 12 deletions(-) create mode 100644 rust/examples/idb/idb_import/Cargo.toml create mode 100644 rust/examples/idb/idb_import/src/lib.rs create mode 100644 rust/examples/idb/shared/Cargo.toml create mode 100644 rust/examples/idb/shared/resources/Readme.md create mode 100644 rust/examples/idb/shared/resources/idbs/idb_and_i64_files_here create mode 100644 rust/examples/idb/shared/resources/tils/til_files_here create mode 100644 rust/examples/idb/shared/src/lib.rs create mode 100644 rust/examples/idb/shared/src/main.rs create mode 100644 rust/examples/idb/shared/src/test.rs create mode 100644 rust/examples/idb/shared/src/til/flag.rs create mode 100644 rust/examples/idb/shared/src/til/mod.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 2b1afe26c..ce0aa6c99 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -79,9 +79,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.82" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" [[package]] name = "autocfg" @@ -114,6 +114,15 @@ dependencies = [ "bindgen", ] +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bindgen" version = "0.69.4" @@ -398,9 +407,9 @@ checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "flate2" -version = "1.0.28" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" dependencies = [ "crc32fast", "miniz_oxide", @@ -468,6 +477,27 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "idb-rs" +version = "0.1.0" +dependencies = [ + "anyhow", + "bincode", + "flate2", + "serde", + "serde_repr", +] + +[[package]] +name = "idb_import" +version = "0.1.0" +dependencies = [ + "anyhow", + "binaryninja", + "idb-rs", + "log", +] + [[package]] name = "indexmap" version = "2.2.6" @@ -535,9 +565,9 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lzxd" @@ -875,18 +905,29 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.197" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.205" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", +] + +[[package]] +name = "serde_repr" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", @@ -924,9 +965,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 5ffc5ba43..0bbd1fa43 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -27,6 +27,8 @@ members = [ "examples/dwarf/dwarf_import", "examples/dwarf/dwarfdump", "examples/dwarf/shared", + "examples/idb/idb_import", + "examples/idb/shared", "examples/flowgraph", "examples/minidump", "examples/mlil_visitor", diff --git a/rust/examples/idb/idb_import/Cargo.toml b/rust/examples/idb/idb_import/Cargo.toml new file mode 100644 index 000000000..dd3e7550b --- /dev/null +++ b/rust/examples/idb/idb_import/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "idb_import" +version = "0.1.0" +authors = ["Rubens Brandao "] +edition = "2021" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +anyhow = "1.0.86" +binaryninja = { path = "../../../" } +idb-rs = { path = "../shared" } +log = "0.4.20" diff --git a/rust/examples/idb/idb_import/src/lib.rs b/rust/examples/idb/idb_import/src/lib.rs new file mode 100644 index 000000000..2b452352a --- /dev/null +++ b/rust/examples/idb/idb_import/src/lib.rs @@ -0,0 +1,777 @@ +use std::collections::HashMap; + +use binaryninja::architecture::{Architecture, CoreArchitecture}; +use binaryninja::binaryninjacore_sys::{BNMemberAccess, BNMemberScope}; +use binaryninja::binaryview::{BinaryView, BinaryViewBase, BinaryViewExt}; +use binaryninja::debuginfo::{CustomDebugInfoParser, DebugInfo, DebugInfoParser}; +use binaryninja::logger; +use binaryninja::rc::Ref; +use binaryninja::types::{ + Conf, EnumerationBuilder, FunctionParameter, NamedTypeReferenceClass, StructureBuilder, + StructureType, Type, +}; + +use idb_rs::{TILSection, TILTypeInfo}; + +use log::{error, trace, warn, LevelFilter}; + +use anyhow::Result; + +#[derive(Debug, Clone)] +enum BnTypeError { + BitField, + // TODO delete this and make this verification during the TIL/IDB parsing, translating the ordinal + // into a kind of type_idx + OrdinalNotFound(u32), + NameNotFound(String), + + //TypedefNameNotFound(String), + Pointer(Box), + FunctionReturn(Box), + FunctionArg(Box, usize), + Array(Box), + StructMember(Box, usize), + UnionMember(Box, usize), +} + +impl std::fmt::Display for BnTypeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BnTypeError::BitField => write!(f, "BitFields are not supported"), + BnTypeError::OrdinalNotFound(i) => write!(f, "Reference to non existing Ordinal {i}"), + BnTypeError::NameNotFound(name) => write!(f, "Reference to non existing name {name}"), + BnTypeError::Pointer(error) => { + write!(f, "Pointer: {error}") + } + BnTypeError::FunctionReturn(error) => { + write!(f, "Function return: {error}") + } + BnTypeError::FunctionArg(error, i) => { + write!(f, "Function argument {i}: {error}") + } + BnTypeError::Array(error) => write!(f, "Array: {error}"), + BnTypeError::StructMember(error, i) => { + write!(f, "StructMember {i}: {error}") + } + BnTypeError::UnionMember(error, i) => { + write!(f, "Union member {i}: {error}") + } + } + } +} + +struct IDBDebugInfoParser; +impl CustomDebugInfoParser for IDBDebugInfoParser { + fn is_valid(&self, view: &BinaryView) -> bool { + view.file().filename().as_str().ends_with(".i64") + || view.file().filename().as_str().ends_with(".idb") + } + + fn parse_info( + &self, + debug_info: &mut DebugInfo, + _bv: &BinaryView, + debug_file: &BinaryView, + progress: Box Result<(), ()>>, + ) -> bool { + match parse_idb_info(debug_info, debug_file, progress) { + Ok(()) => true, + Err(error) => { + error!("Unable to parse IDB file: {error}"); + false + } + } + } +} + +struct TILDebugInfoParser; +impl CustomDebugInfoParser for TILDebugInfoParser { + fn is_valid(&self, view: &BinaryView) -> bool { + view.file().filename().as_str().ends_with(".til") + } + + fn parse_info( + &self, + debug_info: &mut DebugInfo, + _bv: &BinaryView, + debug_file: &BinaryView, + progress: Box Result<(), ()>>, + ) -> bool { + match parse_til_info(debug_info, debug_file, progress) { + Ok(()) => true, + Err(error) => { + error!("Unable to parse TIL file: {error}"); + false + } + } + } +} + +struct BinaryViewReader<'a> { + bv: &'a BinaryView, + offset: u64, +} +impl std::io::Read for BinaryViewReader<'_> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + if !self.bv.offset_valid(self.offset) { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "")); + } + let len = self.bv.read(buf, self.offset); + self.offset += u64::try_from(len).unwrap(); + Ok(len) + } +} + +impl std::io::Seek for BinaryViewReader<'_> { + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + let new_offset = match pos { + std::io::SeekFrom::Start(offset) => Some(offset), + std::io::SeekFrom::End(end) => u64::try_from(self.bv.len()) + .unwrap() + .checked_add_signed(end), + std::io::SeekFrom::Current(next) => self.offset.checked_add_signed(next), + }; + let new_offset = + new_offset.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::UnexpectedEof, ""))?; + if !self.bv.offset_valid(new_offset) { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "")); + } + self.offset = new_offset; + Ok(new_offset) + } +} + +fn parse_idb_info( + debug_info: &mut DebugInfo, + debug_file: &BinaryView, + progress: Box Result<(), ()>>, +) -> Result<()> { + trace!("Opening a IDB file"); + let file = BinaryViewReader { + bv: debug_file, + offset: 0, + }; + trace!("Parsing a IDB file"); + let file = std::io::BufReader::new(file); + let mut parser = idb_rs::IDBParser::new(file)?; + let Some(til_section) = parser.til_section() else { + return Ok(()); + }; + trace!("Parsing the TIL section"); + let til = parser.read_til_section(til_section)?; + parse_til_section_info(debug_info, debug_file, &til, progress) +} + +fn translate_enum(members: &[(Option, u64)], bytesize: u64) -> Ref { + let eb = EnumerationBuilder::new(); + for (i, (name, bytesize)) in members.iter().enumerate() { + let name = name.to_owned().unwrap_or_else(|| format!("member_{i}")); + eb.insert(name, *bytesize); + } + Type::enumeration( + &eb.finalize(), + usize::try_from(bytesize).unwrap(), + Conf::new(false, 0), + ) +} + +fn translate_basic(mdata: &idb_rs::til::Basic, arch: CoreArchitecture) -> Ref { + match mdata { + idb_rs::til::Basic::Void => Type::void(), + idb_rs::til::Basic::Unknown { bytes } => { + if let Some(bytes) = bytes { + Type::array(&Type::char(), bytes.get().into()) + } else { + Type::void() + } + } + idb_rs::til::Basic::Bool { bytes } => { + if let Some(bytes) = bytes { + // NOTE Binja don't have any representation for bool other then the default + Type::int(bytes.get().into(), false) + } else { + Type::bool() + } + } + idb_rs::til::Basic::Char => Type::char(), + // TODO what exacly is Segment Register? + idb_rs::til::Basic::SegReg => Type::char(), + idb_rs::til::Basic::Int { bytes, is_signed } => { + // default into signed + let is_signed = is_signed.as_ref().copied().unwrap_or(true); + let bytes = bytes + .map(|x| x.get().into()) + .unwrap_or_else(|| arch.default_integer_size()); + Type::int(bytes, is_signed) + } + idb_rs::til::Basic::Float { bytes } => { + // TODO find a beter way to define the default float size + let bytes = bytes + .map(|x| x.get().into()) + .unwrap_or_else(|| arch.default_integer_size()); + Type::float(bytes) + } + } +} + +fn parse_til_info( + debug_info: &mut DebugInfo, + debug_file: &BinaryView, + progress: Box Result<(), ()>>, +) -> Result<()> { + trace!("Opening a TIL file"); + let file = BinaryViewReader { + bv: debug_file, + offset: 0, + }; + let file = std::io::BufReader::new(file); + trace!("Parsing the TIL section"); + let til = idb_rs::TILSection::parse(file)?; + parse_til_section_info(debug_info, debug_file, &til, progress) +} + +#[derive(Default)] +enum TranslateTypeResult { + #[default] + NotYet, + Error(BnTypeError), + // a type that is not final, but equivalent to the final type + PartialyTranslated(Ref), + Translated(Ref), +} + +impl From, BnTypeError>> for TranslateTypeResult { + fn from(value: Result, BnTypeError>) -> Self { + match value { + Ok(ty) => Self::Translated(ty), + Err(error) => Self::Error(error), + } + } +} + +struct TranslatesIDBType<'a> { + // sanitized name form IDB + name: String, + // class, just to make easy to create named_type + _class: NamedTypeReferenceClass, + // the result, if converted + ty: TranslateTypeResult, + og_ty: &'a TILTypeInfo, + is_symbol: bool, +} + +struct TranslateIDBTypes<'a> { + debug_info: &'a mut DebugInfo, + _debug_file: &'a BinaryView, + arch: CoreArchitecture, + progress: Box Result<(), ()>>, + _til: &'a TILSection, + // note it's mapped 1:1 with the same index from til types.chain(symbols) + types: Vec>, + // ordinals with index to types + types_by_ord: HashMap, + // original names with index to types + types_by_name: HashMap, +} + +impl TranslateIDBTypes<'_> { + fn find_typedef_by_ordinal(&self, ord: u64) -> Option { + self.types_by_ord + .get(&ord) + .map(|idx| self.find_typedef(&self.types[*idx])) + } + + fn find_typedef_by_name(&self, name: &str) -> Option { + if name.is_empty() { + // TODO this is my assumption, maybe an empty names Typedef means something else. + return Some(TranslateTypeResult::Translated(Type::void())); + } + + if let Some(other_ty) = self + .types_by_name + .get(name) + .map(|idx| self.find_typedef(&self.types[*idx])) + { + return Some(other_ty); + } + + // check for types that ar usually not defined directly + match name { + "Unkown" | "uint8_t" => Some(TranslateTypeResult::Translated(Type::int(1, false))), + "IUnkown" | "int8_t" => Some(TranslateTypeResult::Translated(Type::int(1, true))), + // TODO SHORT changes with ARCH? + "SHORT" | "int16_t" => Some(TranslateTypeResult::Translated(Type::int(2, true))), + "USHORT" | "uint16_t" => Some(TranslateTypeResult::Translated(Type::int(2, false))), + "int32_t" => Some(TranslateTypeResult::Translated(Type::int(4, true))), + "uint32_t" => Some(TranslateTypeResult::Translated(Type::int(4, false))), + "int64_t" => Some(TranslateTypeResult::Translated(Type::int(8, true))), + "uint64_t" => Some(TranslateTypeResult::Translated(Type::int(8, false))), + "int128_t" => Some(TranslateTypeResult::Translated(Type::int(16, true))), + "uint128_t" => Some(TranslateTypeResult::Translated(Type::int(16, false))), + _ => None, + } + } + + fn find_typedef(&self, ty: &TranslatesIDBType) -> TranslateTypeResult { + // only return a typedef, if it's solved, at lease partially + match &ty.ty { + TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, + TranslateTypeResult::Error(error) => TranslateTypeResult::Error(error.to_owned()), + TranslateTypeResult::PartialyTranslated(og_ty) + | TranslateTypeResult::Translated(og_ty) => TranslateTypeResult::Translated( + Type::named_type_from_type(ty.name.as_str(), &og_ty), + ), + } + } + + fn translate_pointer(&self, ty: &idb_rs::til::Type) -> TranslateTypeResult { + match self.translate_type(ty) { + TranslateTypeResult::Translated(trans) => TranslateTypeResult::Translated(trans), + TranslateTypeResult::Error(error) => { + TranslateTypeResult::Error(BnTypeError::Pointer(Box::new(error))) + } + TranslateTypeResult::PartialyTranslated(_) | TranslateTypeResult::NotYet => { + TranslateTypeResult::PartialyTranslated(Type::pointer(&self.arch, &Type::void())) + } + } + } + + fn translate_function(&self, fun: &idb_rs::til::Function) -> TranslateTypeResult { + // funtions are always 0 len, so it's translated or partial(void) + let return_ty = match self.translate_type(&fun.ret) { + TranslateTypeResult::Translated(trans) => trans, + TranslateTypeResult::Error(error) => { + return TranslateTypeResult::Error(BnTypeError::FunctionReturn(Box::new(error))) + } + TranslateTypeResult::PartialyTranslated(_) | TranslateTypeResult::NotYet => { + return TranslateTypeResult::PartialyTranslated(Type::void()) + } + }; + let mut bn_args = Vec::with_capacity(fun.args.len()); + for (i, (arg_name, arg_type, _arg_loc)) in fun.args.iter().enumerate() { + let arg = match self.translate_type(arg_type) { + TranslateTypeResult::Translated(trans) => trans, + TranslateTypeResult::PartialyTranslated(_) | TranslateTypeResult::NotYet => { + return TranslateTypeResult::PartialyTranslated(Type::void()) + } + TranslateTypeResult::Error(error) => { + return TranslateTypeResult::Error(BnTypeError::FunctionArg(Box::new(error), i)) + } + }; + // TODO create location from `arg_loc`? + let loc = None; + let name = arg_name.to_owned().unwrap_or_else(|| format!("arg_{i}")); + bn_args.push(FunctionParameter::new(arg, name, loc)); + } + + TranslateTypeResult::Translated(Type::function(&return_ty, &bn_args, false)) + } + + fn translate_array(&self, array: &idb_rs::til::Array) -> TranslateTypeResult { + match self.translate_type(&*array.elem_type) { + TranslateTypeResult::NotYet => TranslateTypeResult::NotYet, + TranslateTypeResult::Translated(ty) => { + TranslateTypeResult::Translated(Type::array(&ty, array.nelem.into())) + } + TranslateTypeResult::PartialyTranslated(ty) => { + TranslateTypeResult::PartialyTranslated(Type::array(&ty, array.nelem.into())) + } + TranslateTypeResult::Error(error) => { + TranslateTypeResult::Error(BnTypeError::Array(Box::new(error))) + } + } + } + + fn translate_bitfields_into_struct( + &self, + offset: usize, + members_slice: &[idb_rs::til::StructMember], + struct_builder: &StructureBuilder, + ) -> Result<(), BnTypeError> { + if members_slice.is_empty() { + unreachable!() + } + let mut members = members_slice + .iter() + .map(|ty| match &ty.member_type { + idb_rs::til::Type::Bitfield(b) => b, + _ => unreachable!(), + }) + .enumerate(); + let (_, first_field) = members.next().unwrap(); + let mut current_field_bytes = first_field.nbytes; + let mut current_field_bits: u32 = first_field.width.into(); + let mut start_idx = 0; + + for (i, member) in members { + // starting a new field + let max_bits = u32::try_from(current_field_bytes).unwrap() * 8; + // this bitfield start a a new field, or can't contain other bitfields + // finish the previous and start a new + if current_field_bytes != member.nbytes + || max_bits < current_field_bits + u32::from(member.width) + { + let name = if start_idx == i - 1 { + members_slice[i - 1] + .name + .to_owned() + .unwrap_or_else(|| format!("bitfield_{}", offset + start_idx)) + } else { + format!("bitfield_{}_{}", offset + start_idx, offset + (i - 1)) + }; + let field = field_from_bytes(current_field_bytes); + struct_builder.append( + &field, + name, + BNMemberAccess::NoAccess, + BNMemberScope::NoScope, + ); + current_field_bytes = member.nbytes; + current_field_bits = 0; + start_idx = i; + } + + // just add the current bitfield into the field + current_field_bits += u32::from(member.width); + } + Ok(()) + } + + fn translate_struct( + &self, + members: &[idb_rs::til::StructMember], + effective_alignment: u16, + ) -> TranslateTypeResult { + let mut is_partial = false; + let structure = StructureBuilder::new(); + structure.set_alignment(effective_alignment.into()); + + let mut first_bitfield_seq = None; + for (i, member) in members.iter().enumerate() { + match (&member.member_type, first_bitfield_seq) { + // accumulate the bitfield to be condensated + (idb_rs::til::Type::Bitfield(_bit), None) => { + first_bitfield_seq = Some(i); + continue; + } + (idb_rs::til::Type::Bitfield(_bit), Some(_)) => continue, + + // condensate the bitfields into byte-wide fields + (_, Some(start_idx)) => { + first_bitfield_seq = None; + let members_bitrange = &members[start_idx..i]; + if let Err(error) = + self.translate_bitfields_into_struct(i, members_bitrange, &structure) + { + return TranslateTypeResult::Error(BnTypeError::StructMember( + Box::new(error), + i, + )); + } + } + + (_, None) => {} + } + + let mem = match self.translate_type(&member.member_type) { + TranslateTypeResult::Translated(ty) => ty, + TranslateTypeResult::PartialyTranslated(partial_ty) => { + is_partial = true; + partial_ty + } + TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, + TranslateTypeResult::Error(error) => { + return TranslateTypeResult::Error(BnTypeError::StructMember( + Box::new(error), + i, + )) + } + }; + let name = member + .name + .to_owned() + .unwrap_or_else(|| format!("member_{i}")); + structure.append(&mem, name, BNMemberAccess::NoAccess, BNMemberScope::NoScope); + } + let bn_ty = Type::structure(&structure.finalize()); + if is_partial { + TranslateTypeResult::PartialyTranslated(bn_ty) + } else { + TranslateTypeResult::Translated(bn_ty) + } + } + + fn translate_union( + &self, + members: &[(Option, idb_rs::til::Type)], + _effective_alignment: u16, + ) -> TranslateTypeResult { + let mut is_partial = false; + let structure = StructureBuilder::new(); + structure.set_structure_type(StructureType::UnionStructureType); + for (i, (member_name, member_type)) in members.iter().enumerate() { + // bitfields can be translated into complete fields + let mem = match member_type { + idb_rs::til::Type::Bitfield(field) => field_from_bytes(field.nbytes), + member_type => match self.translate_type(member_type) { + TranslateTypeResult::Translated(ty) => ty, + TranslateTypeResult::Error(error) => { + return TranslateTypeResult::Error(BnTypeError::UnionMember( + Box::new(error), + i, + )) + } + TranslateTypeResult::NotYet => return TranslateTypeResult::NotYet, + TranslateTypeResult::PartialyTranslated(partial) => { + is_partial = true; + partial + } + }, + }; + + let name = member_name + .to_owned() + .unwrap_or_else(|| format!("member_{i}")); + structure.append(&mem, name, BNMemberAccess::NoAccess, BNMemberScope::NoScope); + } + let str_ref = structure.finalize(); + + let bn_ty = Type::structure(&str_ref); + if is_partial { + TranslateTypeResult::PartialyTranslated(bn_ty) + } else { + TranslateTypeResult::Translated(bn_ty) + } + } + + fn translate_type(&self, ty: &idb_rs::til::Type) -> TranslateTypeResult { + match &ty { + // binja don't allow bitfield outside structs + idb_rs::til::Type::Bitfield(_bit) => TranslateTypeResult::Error(BnTypeError::BitField), + + // types that are always translatable + idb_rs::til::Type::Basic(meta) => { + TranslateTypeResult::Translated(translate_basic(meta, self.arch)) + } + idb_rs::til::Type::Enum(idb_rs::til::Enum::NonRef { + members, bytesize, .. + }) => TranslateTypeResult::Translated(translate_enum(members, *bytesize)), + idb_rs::til::Type::Typedef(idb_rs::til::Typedef::Ordinal(ord)) => self + .find_typedef_by_ordinal((*ord).into()) + .unwrap_or_else(|| TranslateTypeResult::Error(BnTypeError::OrdinalNotFound(*ord))), + idb_rs::til::Type::Typedef(idb_rs::til::Typedef::Name(name)) => { + self.find_typedef_by_name(name).unwrap_or_else(|| { + TranslateTypeResult::Error(BnTypeError::NameNotFound(name.to_owned())) + }) + } + + // may not be translatable imediatly, but the size is known and can be + // updated after alBasicers are finished + idb_rs::til::Type::Union(idb_rs::til::Union::Ref { ref_type, .. }) + | idb_rs::til::Type::Struct(idb_rs::til::Struct::Ref { ref_type, .. }) + | idb_rs::til::Type::Enum(idb_rs::til::Enum::Ref { ref_type, .. }) => { + self.translate_pointer(&**ref_type) + } + idb_rs::til::Type::Pointer(ty) => self.translate_pointer(&ty.typ), + idb_rs::til::Type::Function(fun) => self.translate_function(fun), + + // can only be partially solved if all fields are solved or partially solved + idb_rs::til::Type::Array(array) => self.translate_array(array), + idb_rs::til::Type::Struct(idb_rs::til::Struct::NonRef { + members, + effective_alignment, + .. + }) => self.translate_struct(members, *effective_alignment), + idb_rs::til::Type::Union(idb_rs::til::Union::NonRef { + members, + effective_alignment, + .. + }) => self.translate_union(members, *effective_alignment), + } + } +} + +fn find_typedef_named_type_class(ty: &idb_rs::til::Type) -> NamedTypeReferenceClass { + match ty { + idb_rs::til::Type::Typedef(_) => NamedTypeReferenceClass::TypedefNamedTypeClass, + idb_rs::til::Type::Struct(_) => NamedTypeReferenceClass::StructNamedTypeClass, + idb_rs::til::Type::Union(_) => NamedTypeReferenceClass::UnionNamedTypeClass, + idb_rs::til::Type::Enum(_) => NamedTypeReferenceClass::EnumNamedTypeClass, + _ => NamedTypeReferenceClass::UnknownNamedTypeClass, + } +} + +fn field_from_bytes(bytes: i32) -> Ref { + match bytes { + 0 => unreachable!(), + num @ (1 | 2 | 4 | 8 | 16) => Type::int(num.try_into().unwrap(), false), + nelem => Type::array(&Type::char(), nelem.try_into().unwrap()), + } +} + +fn parse_til_section_info( + debug_info: &mut DebugInfo, + debug_file: &BinaryView, + til: &TILSection, + progress: Box Result<(), ()>>, +) -> Result<()> { + let total = til.symbols.len() + til.types.len(); + let mut types = Vec::with_capacity(total); + let mut types_by_ord = HashMap::with_capacity(total); + let mut types_by_name = HashMap::with_capacity(total); + let all_types = til.types.iter().zip(core::iter::repeat(false)); + // TODO: it's unclear how the demangle symbols and types names/ord, for now only parse types + //let all_types = all_types.chain(til.symbols.iter().zip(core::iter::repeat(true))); + for (i, (ty, is_symbol)) in all_types.enumerate() { + // TODO sanitized the input + // TODO find out how the namespaces used by TIL works + let name = ty.name.to_owned(); + types.push(TranslatesIDBType { + name, + is_symbol, + og_ty: ty, + _class: find_typedef_named_type_class(&ty.tinfo), + ty: TranslateTypeResult::NotYet, + }); + if ty.ordinal != 0 && !is_symbol { + let dup1 = types_by_ord.insert(ty.ordinal, i); + if let Some(old) = dup1 { + let old_type = &types[old]; + let new_type = types.last().unwrap(); + // TODO error? + panic!( + "dup ord {}:{} {}:\n{:?}\n{:?}", + old_type.is_symbol, + new_type.is_symbol, + ty.ordinal, + &old_type.og_ty, + &new_type.og_ty, + ) + } + } + if ty.name != "" { + let dup2 = types_by_name.insert(ty.name.to_owned(), i); + if let Some(old) = dup2 { + let old_type = &types[old]; + let new_type = types.last().unwrap(); + // TODO error? + panic!( + "dup name {}:{}: {}:\n{:?}\n{:?}", + old_type.is_symbol, + new_type.is_symbol, + &ty.name, + &old_type.og_ty, + &new_type.og_ty, + ) + } + } + } + + let mut translator = TranslateIDBTypes { + debug_info, + _debug_file: debug_file, + arch: debug_file.default_arch().unwrap(/* TODO */), + progress, + _til: til, + types, + types_by_ord, + types_by_name, + }; + if (translator.progress)(0, total).is_err() { + warn!("IDB import aborted"); + return Ok(()); + } + + loop { + let mut did_something = false; + let mut num_translated = 0usize; + for i in 0..translator.types.len() { + match &translator.types[i].ty { + TranslateTypeResult::NotYet => { + let result = translator.translate_type(&translator.types[i].og_ty.tinfo); + did_something = !matches!(&result, TranslateTypeResult::NotYet); + translator.types[i].ty = result; + // if originaly NotKnow and now translated, update the result on bn + match &translator.types[i].ty { + TranslateTypeResult::PartialyTranslated(bn_ty) + | TranslateTypeResult::Translated(bn_ty) => { + let name = &translator.types[i].name; + let success = + translator.debug_info.add_type(name, &bn_ty, &[/* TODO */]); + if !success { + error!("Unable to add type `{}`", name) + } + } + _ => {} + } + } + TranslateTypeResult::PartialyTranslated(_) => { + let result = translator.translate_type(&translator.types[i].og_ty.tinfo); + did_something = !matches!(&result, TranslateTypeResult::PartialyTranslated(_)); + translator.types[i].ty = result; + } + // NOTE for now we are just accumulating errors, just try to translate the max number + // of types as possible + TranslateTypeResult::Error(_) => {} + // already translated, nothing do to here + TranslateTypeResult::Translated(_) => {} + } + + // count the number of finished types + match &translator.types[i].ty { + TranslateTypeResult::Translated(_) => num_translated += 1, + _ => {} + } + } + + if !did_something { + break; + } + if (translator.progress)(num_translated, total).is_err() { + // error means the user aborted the progress + break; + } + } + + // print any errors + for ty in &translator.types { + match &ty.ty { + TranslateTypeResult::Error(error) => { + error!("Unable to parse type `{}`: {error}", &ty.name); + } + TranslateTypeResult::NotYet => { + error!("Unable to parse type `{}`", &ty.name); + } + TranslateTypeResult::PartialyTranslated(_) => { + error!("Unable to parse type `{}` correctly", &ty.name); + } + TranslateTypeResult::Translated(_) => {} + }; + } + + // add an second time to fix the references LOL + for ty in &translator.types { + match &ty.ty { + TranslateTypeResult::Translated(bn_ty) + | TranslateTypeResult::PartialyTranslated(bn_ty) => { + let success = translator + .debug_info + .add_type(&ty.name, &bn_ty, &[/* TODO */]); + if !success { + error!("Unable to fix type `{}`", &ty.name) + } + } + _ => {} + } + } + + Ok(()) +} + +#[no_mangle] +pub extern "C" fn CorePluginInit() -> bool { + let _logger = logger::init(LevelFilter::Error); + DebugInfoParser::register("IDB Parser", IDBDebugInfoParser); + DebugInfoParser::register("TIL Parser", TILDebugInfoParser); + true +} diff --git a/rust/examples/idb/shared/Cargo.toml b/rust/examples/idb/shared/Cargo.toml new file mode 100644 index 000000000..0a1921071 --- /dev/null +++ b/rust/examples/idb/shared/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "idb-rs" +version = "0.1.0" +authors = ["Rubens Brandao "] +edition = "2021" + +[dependencies] +anyhow = "1.0.86" +bincode = "1.3.3" +flate2 = "1.0.31" +serde = { version = "1.0.205", features = ["derive"] } +serde_repr = "0.1.19" diff --git a/rust/examples/idb/shared/resources/Readme.md b/rust/examples/idb/shared/resources/Readme.md new file mode 100644 index 000000000..c9411d7d3 --- /dev/null +++ b/rust/examples/idb/shared/resources/Readme.md @@ -0,0 +1,2 @@ +Put here the `*.idb` `*.i64` in `idbs` folder and `*.til` files in `tils` folder. +Those files will be used to test the parser by the `cargo test` command. diff --git a/rust/examples/idb/shared/resources/idbs/idb_and_i64_files_here b/rust/examples/idb/shared/resources/idbs/idb_and_i64_files_here new file mode 100644 index 000000000..e69de29bb diff --git a/rust/examples/idb/shared/resources/tils/til_files_here b/rust/examples/idb/shared/resources/tils/til_files_here new file mode 100644 index 000000000..e69de29bb diff --git a/rust/examples/idb/shared/src/lib.rs b/rust/examples/idb/shared/src/lib.rs new file mode 100644 index 000000000..1fc17c924 --- /dev/null +++ b/rust/examples/idb/shared/src/lib.rs @@ -0,0 +1,434 @@ +#[cfg(test)] +mod test; + +pub mod til; +pub use til::{TILSection, TILTypeInfo}; + +use std::fmt::Debug; +use std::io::{BufRead, Read, Seek, SeekFrom}; +use std::num::NonZeroU64; + +use serde::Deserialize; + +use anyhow::{anyhow, ensure, Result}; + +#[derive(Debug, Clone, Copy)] +pub struct IDBParser { + input: I, + header: IDBHeader, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TILOffset(NonZeroU64); + +impl IDBParser { + pub fn new(mut input: I) -> Result { + let header = IDBHeader::read(&mut input)?; + Ok(Self { input, header }) + } + + pub fn til_section(&self) -> Option { + self.header.til_offset.map(TILOffset) + } + + pub fn read_til_section(&mut self, til: TILOffset) -> Result { + self.input.seek(SeekFrom::Start(til.0.get()))?; + let section_header = IDBSectionHeader::read(&self.header, &mut self.input)?; + // makes sure the reader doesn't go out-of-bounds + let mut input = Read::take(&mut self.input, section_header.len); + let result = TILSection::read(&mut input, section_header.compress)?; + + // TODO seems its normal to have a few extra bytes at the end of the sector, maybe + // because of the compressions stuff, anyway verify that + ensure!( + input.limit() <= 16, + "Sector have more data then expected, left {} bytes", + input.limit() + ); + Ok(result) + } + + #[cfg(test)] + pub(crate) fn decompress_til_section( + &mut self, + til: TILOffset, + output: &mut impl std::io::Write, + ) -> Result<()> { + self.input.seek(SeekFrom::Start(til.0.get()))?; + let section_header = IDBSectionHeader::read(&self.header, &mut self.input)?; + // makes sure the reader doesn't go out-of-bounds + let mut input = Read::take(&mut self.input, section_header.len); + TILSection::decompress(&mut input, output, section_header.compress) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum IDBMagic { + IDA0, + IDA1, + IDA2, +} + +impl TryFrom<[u8; 4]> for IDBMagic { + type Error = anyhow::Error; + + fn try_from(value: [u8; 4]) -> Result { + match &value { + b"IDA0" => Ok(IDBMagic::IDA0), + b"IDA1" => Ok(IDBMagic::IDA1), + b"IDA2" => Ok(IDBMagic::IDA2), + _ => Err(anyhow!("Invalid IDB Magic number")), + } + } +} +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum IDBVersion { + V1, + V4, + V5, + V6, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct IDBHeader { + version: IDBVersion, + id0_offset: Option, + id1_offset: Option, + nam_offset: Option, + til_offset: Option, + checksums: [u32; 3], + unk0_checksum: u32, + data: IDBHeaderVersion, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum IDBHeaderVersion { + V1 { + seg_offset: Option, + }, + V4 { + seg_offset: Option, + }, + V5 { + unk16: u32, + unk1_checksum: u32, + }, + V6 { + unk16: u32, + id2_offset: Option, + unk1_checksum: u32, + }, +} + +#[derive(Debug, Clone, Copy)] +struct IDBSectionHeader { + compress: IDBSectionCompression, + len: u64, +} + +#[derive(Debug, Clone, Copy)] +#[repr(u8)] +enum IDBSectionCompression { + None = 0, + Zlib = 2, +} + +impl TryFrom for IDBSectionCompression { + type Error = (); + + fn try_from(value: u8) -> std::result::Result { + match value { + 0 => Ok(Self::None), + 2 => Ok(Self::Zlib), + _ => Err(()), + } + } +} + +#[derive(Debug, Deserialize)] +struct IDBHeaderRaw { + magic: [u8; 4], + _padding_0: u16, + offsets: [u32; 5], + signature: u32, + version: u16, + // more, depending on the version +} + +impl IDBHeader { + pub fn read(input: &mut I) -> Result { + let header_raw: IDBHeaderRaw = bincode::deserialize_from(&mut *input)?; + let _magic = IDBMagic::try_from(header_raw.magic)?; + ensure!( + header_raw.signature == 0xAABB_CCDD, + "Invalid header signature {:#x}", + header_raw.signature + ); + match header_raw.version { + 1 => Self::read_v1(&header_raw, input), + 4 => Self::read_v4(&header_raw, input), + 5 => Self::read_v5(&header_raw, input), + 6 => Self::read_v6(&header_raw, input), + v => return Err(anyhow!("Unable to parse version `{v}`")), + } + } + + fn read_v1(header_raw: &IDBHeaderRaw, input: I) -> Result { + #[derive(Debug, Deserialize)] + struct V1Raw { + id2_offset: u32, + checksums: [u32; 3], + unk30_zeroed: u32, + unk33_checksum: u32, + unk38_zeroed: [u8; 6], + } + + let v1_raw: V1Raw = bincode::deserialize_from(input)?; + ensure!(v1_raw.unk30_zeroed == 0, "unk30 not zeroed"); + ensure!(v1_raw.id2_offset == 0, "id2 in V1 is not zeroed"); + ensure!(v1_raw.unk38_zeroed == [0; 6], "unk38 is not zeroed"); + + Ok(Self { + version: IDBVersion::V1, + id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), + id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), + nam_offset: NonZeroU64::new(header_raw.offsets[2].into()), + til_offset: NonZeroU64::new(header_raw.offsets[4].into()), + checksums: v1_raw.checksums, + unk0_checksum: v1_raw.unk33_checksum, + data: IDBHeaderVersion::V1 { + seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), + }, + }) + } + + fn read_v4(header_raw: &IDBHeaderRaw, input: I) -> Result { + #[derive(Debug, Deserialize)] + struct V4Raw { + id2_offset: u32, + checksums: [u32; 3], + unk30_zeroed: u32, + unk33_checksum: u32, + unk38_zeroed: [u8; 8], + unk40_v5c: u32, + unk44_zeroed: [u8; 8], + _unk4c: [u8; 16], + unk5c_zeroed: [[u8; 16]; 8], + } + + let v4_raw: V4Raw = bincode::deserialize_from(input)?; + + ensure!(v4_raw.unk30_zeroed == 0, "unk30 not zeroed"); + ensure!(v4_raw.id2_offset == 0, "id2 in V4 is not zeroed"); + ensure!(v4_raw.unk38_zeroed == [0; 8], "unk38 is not zeroed"); + ensure!(v4_raw.unk40_v5c == 0x5c, "unk40 is not 0x5C"); + ensure!(v4_raw.unk44_zeroed == [0; 8], "unk44 is not zeroed"); + ensure!(v4_raw.unk5c_zeroed == [[0; 16]; 8], "unk5c is not zeroed"); + + Ok(Self { + version: IDBVersion::V4, + id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), + id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), + nam_offset: NonZeroU64::new(header_raw.offsets[2].into()), + til_offset: NonZeroU64::new(header_raw.offsets[4].into()), + checksums: v4_raw.checksums, + unk0_checksum: v4_raw.unk33_checksum, + data: IDBHeaderVersion::V4 { + seg_offset: NonZeroU64::new(header_raw.offsets[3].into()), + }, + }) + } + + fn read_v5(header_raw: &IDBHeaderRaw, input: impl Read) -> Result { + #[derive(Debug, Deserialize)] + struct V5Raw { + nam_offset: u64, + seg_offset_zeroed: u64, + til_offset: u64, + initial_checksums: [u32; 3], + unk4_zeroed: u32, + unk_checksum: u32, + id2_offset_zeroed: u64, + final_checksum: u32, + unk0_v7c: u32, + unk1_zeroed: [u8; 16], + _unk2: [u8; 16], + unk3_zeroed: [[u8; 16]; 8], + } + let v5_raw: V5Raw = bincode::deserialize_from(input)?; + let id0_offset = + u64::from_le(u64::from(header_raw.offsets[1]) << 32 | u64::from(header_raw.offsets[0])); + let id1_offset = + u64::from_le(u64::from(header_raw.offsets[3]) << 32 | u64::from(header_raw.offsets[2])); + + // TODO Final checksum is always zero on v5? + + ensure!(v5_raw.unk4_zeroed == 0, "unk4 not zeroed"); + ensure!(v5_raw.id2_offset_zeroed == 0, "id2 in V5 is not zeroed"); + ensure!(v5_raw.seg_offset_zeroed == 0, "seg in V5 is not zeroed"); + ensure!(v5_raw.unk0_v7c == 0x7C, "unk0 not 0x7C"); + ensure!(v5_raw.unk1_zeroed == [0; 16], "unk1 is not zeroed"); + ensure!(v5_raw.unk3_zeroed == [[0; 16]; 8], "unk3 is not zeroed"); + + Ok(Self { + version: IDBVersion::V5, + id0_offset: NonZeroU64::new(id0_offset), + id1_offset: NonZeroU64::new(id1_offset), + nam_offset: NonZeroU64::new(v5_raw.nam_offset), + til_offset: NonZeroU64::new(v5_raw.til_offset), + checksums: v5_raw.initial_checksums, + unk0_checksum: v5_raw.unk_checksum, + data: IDBHeaderVersion::V5 { + unk16: header_raw.offsets[4], + unk1_checksum: v5_raw.final_checksum, + }, + }) + } + + fn read_v6(header_raw: &IDBHeaderRaw, input: impl Read) -> Result { + #[derive(Debug, Deserialize)] + struct V6Raw { + nam_offset: u64, + seg_offset_zeroed: u64, + til_offset: u64, + initial_checksums: [u32; 3], + unk4_zeroed: [u8; 4], + unk5_checksum: u32, + id2_offset: u64, + final_checksum: u32, + unk0_v7c: u32, + unk1_zeroed: [u8; 16], + _unk2: [u8; 16], + unk3_zeroed: [[u8; 16]; 8], + } + let v6_raw: V6Raw = bincode::deserialize_from(input)?; + let id0_offset = + u64::from_le(u64::from(header_raw.offsets[1]) << 32 | u64::from(header_raw.offsets[0])); + let id1_offset = + u64::from_le(u64::from(header_raw.offsets[3]) << 32 | u64::from(header_raw.offsets[2])); + + ensure!(v6_raw.unk4_zeroed == [0; 4], "unk4 not zeroed"); + ensure!(v6_raw.seg_offset_zeroed == 0, "seg in V6 is not zeroed"); + ensure!(v6_raw.unk0_v7c == 0x7C, "unk0 not 0x7C"); + ensure!(v6_raw.unk1_zeroed == [0; 16], "unk1 is not zeroed"); + ensure!(v6_raw.unk3_zeroed == [[0; 16]; 8], "unk3 is not zeroed"); + + Ok(Self { + version: IDBVersion::V6, + id0_offset: NonZeroU64::new(id0_offset), + id1_offset: NonZeroU64::new(id1_offset), + nam_offset: NonZeroU64::new(v6_raw.nam_offset), + til_offset: NonZeroU64::new(v6_raw.til_offset), + checksums: v6_raw.initial_checksums, + unk0_checksum: v6_raw.unk5_checksum, + data: IDBHeaderVersion::V6 { + unk16: header_raw.offsets[4], + id2_offset: NonZeroU64::new(v6_raw.id2_offset), + unk1_checksum: v6_raw.final_checksum, + }, + }) + } +} + +impl IDBSectionHeader { + pub fn read(header: &IDBHeader, input: I) -> Result { + match header.version { + crate::IDBVersion::V1 | crate::IDBVersion::V4 => { + #[derive(Debug, Deserialize)] + struct Section32Raw { + compress: u8, + len: u32, + } + let header: Section32Raw = bincode::deserialize_from(input)?; + Ok(IDBSectionHeader { + compress: header + .compress + .try_into() + .map_err(|_| anyhow!("Invalid compression code"))?, + len: header.len.into(), + }) + } + crate::IDBVersion::V5 | crate::IDBVersion::V6 => { + #[derive(Debug, Deserialize)] + struct Section64Raw { + compress: u8, + len: u64, + } + let header: Section64Raw = bincode::deserialize_from(input)?; + Ok(IDBSectionHeader { + compress: header + .compress + .try_into() + .map_err(|_| anyhow!("Invalid compression code"))?, + len: header.len, + }) + } + } + } +} + +fn read_bytes_len_u8(mut input: I) -> Result> { + let mut len = [0]; + input.read_exact(&mut len)?; + let mut bytes = vec![0u8; len[0].into()]; + input.read_exact(&mut bytes)?; + Ok(bytes) +} + +fn read_string_len_u8(input: I) -> Result { + let bytes = read_bytes_len_u8(input)?; + Ok(String::from_utf8(bytes)?) +} + +#[cfg(test)] +fn write_string_len_u8(mut output: O, value: &str) -> Result<()> { + output.write_all(&[u8::try_from(value.len()).unwrap()])?; + Ok(output.write_all(value.as_bytes())?) +} + +fn read_c_string_raw(mut input: I) -> std::io::Result> { + let mut buf = vec![]; + input.read_until(b'\x00', &mut buf)?; + // last char need to be \x00 or we found a EoF + if buf.pop() != Some(b'\x00') { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on CStr", + )); + } + Ok(buf) +} + +fn read_c_string(input: &mut I) -> std::io::Result { + let buf = read_c_string_raw(input)?; + Ok(String::from_utf8_lossy(&buf).to_string()) +} + +fn read_c_string_vec(input: &mut I) -> std::io::Result> { + let buf = read_c_string_raw(input)?; + if buf.is_empty() { + return Ok(vec![]); + } + + let mut result = vec![]; + // NOTE never 0 because this came from a CStr + let mut len = buf[0] - 1; + // NOTE zero len (buf[0] == 1) string is allowed + let mut current = &buf[1..]; + loop { + if usize::from(len) > current.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Invalid len on Vec of CStr", + )); + } + let (value, rest) = current.split_at(len.into()); + result.push(String::from_utf8_lossy(value).to_string()); + if rest.is_empty() { + break; + } + len = rest[0] - 1; + current = &rest[1..]; + } + Ok(result) +} diff --git a/rust/examples/idb/shared/src/main.rs b/rust/examples/idb/shared/src/main.rs new file mode 100644 index 000000000..c05173bf7 --- /dev/null +++ b/rust/examples/idb/shared/src/main.rs @@ -0,0 +1,11 @@ +use std::fs::File; +use std::io::BufReader; + +use idb_rs::TILSection; + +fn main() { + let file = "/tmp/lasterror.til"; + let file = BufReader::new(File::open(file).unwrap()); + let til = TILSection::parse(file).unwrap(); + println!("TIL: {til:#?}"); +} diff --git a/rust/examples/idb/shared/src/test.rs b/rust/examples/idb/shared/src/test.rs new file mode 100644 index 000000000..3f03f9e37 --- /dev/null +++ b/rust/examples/idb/shared/src/test.rs @@ -0,0 +1,92 @@ +use std::ffi::OsStr; +use std::fs::File; +use std::io::{BufReader, BufWriter}; +use std::path::{Path, PathBuf}; + +use crate::{IDBParser, IDBSectionCompression, TILSection}; + +#[test] +fn parse_idbs() { + let files = find_all("resources/idbs".as_ref(), &["idb".as_ref(), "i64".as_ref()]).unwrap(); + for filename in files { + println!("{}", filename.to_str().unwrap()); + let file = BufReader::new(File::open(&filename).unwrap()); + let mut parser = IDBParser::new(file).unwrap(); + let til = parser.read_til_section(parser.til_section().unwrap()); + + // if success, parse next file + let error = match til { + Ok(_til) => continue, + Err(e) => e, + }; + + //otherwise create a decompress version of the file for more testing + let mut output = BufWriter::new(std::fs::File::create("/tmp/lasterror.til").unwrap()); + parser + .decompress_til_section(parser.til_section().unwrap(), &mut output) + .unwrap(); + panic!("{error:?}") + } +} + +#[test] +fn parse_tils() { + let files = find_all("resources/tils".as_ref(), &["til".as_ref()]).unwrap(); + let results = files + .into_iter() + .map(|x| parse_til_file(&x).map_err(|e| (x, e))) + .collect::>(); + let Err((file, error)) = results else { + // if success, finish the test + return; + }; + println!("Unable to parse {}", file.to_str().unwrap()); + //otherwise create a decompress version of the file for more testing + let mut input = BufReader::new(std::fs::File::open(&file).unwrap()); + let mut output = BufWriter::new(std::fs::File::create("/tmp/lasterror.til").unwrap()); + TILSection::decompress_inner(&mut input, &mut output).unwrap(); + panic!( + "Unable to parse file `{}`: {error:?}", + file.to_str().unwrap() + ); +} + +fn parse_til_file(file: &Path) -> anyhow::Result<()> { + println!("TIL file: {}", file.to_str().unwrap()); + // makes sure it don't read out-of-bounds + let mut input = BufReader::new(std::fs::File::open(file).unwrap()); + // TODO make a SmartReader + match TILSection::read(&mut input, IDBSectionCompression::None) { + Ok(_til) => Ok(()), + Err(e) => Err(e), + } +} + +fn find_all(path: &Path, exts: &[&OsStr]) -> anyhow::Result> { + fn inner_find_all(path: &Path, exts: &[&OsStr], buf: &mut Vec) -> anyhow::Result<()> { + for entry in std::fs::read_dir(path).unwrap().map(Result::unwrap) { + let entry_type = entry.metadata().unwrap().file_type(); + if entry_type.is_dir() { + inner_find_all(&entry.path(), exts, buf)?; + continue; + } + + if !entry_type.is_file() { + continue; + } + + let filename = entry.file_name(); + let Some(ext) = Path::new(&filename).extension() else { + continue; + }; + + if exts.contains(&ext) { + buf.push(entry.path()) + } + } + Ok(()) + } + let mut result = vec![]; + inner_find_all(path, exts, &mut result)?; + Ok(result) +} diff --git a/rust/examples/idb/shared/src/til/flag.rs b/rust/examples/idb/shared/src/til/flag.rs new file mode 100644 index 000000000..d23a51ef3 --- /dev/null +++ b/rust/examples/idb/shared/src/til/flag.rs @@ -0,0 +1,432 @@ +/// byte sequence used to describe a type in IDA +type TypeT = u8; +/// Enum type flags +type BteT = u8; + +/// multi-use +pub const RESERVED_BYTE: TypeT = 0xFF; + +/// Masks +pub mod tf_mask { + use super::TypeT; + /// the low 4 bits define the basic type + pub const TYPE_BASE_MASK: TypeT = 0x0F; + /// type flags - they have different meaning depending on the basic type + pub const TYPE_FLAGS_MASK: TypeT = 0x30; + /// modifiers. + /// for [super::tf_array::BT_ARRAY] see [super::tf_array] + /// ::BT_VOID can have them ONLY in 'void *' + pub const TYPE_MODIF_MASK: TypeT = 0xC0; + /// basic type with type flags + pub const TYPE_FULL_MASK: TypeT = TYPE_BASE_MASK | TYPE_FLAGS_MASK; +} + +/// Basic type: unknown & void +/// [BT_UNK] and [BT_VOID] with non-zero type flags can be used in function +/// (and struct) declarations to describe the function arguments or structure +/// fields if only their size is known. They may be used in ida to describe +/// the user input. +/// +/// In general BT_... bits should not be used alone to describe types. +/// Use BTF_... constants instead. +/// +/// For struct used also as 'single-field-alignment-suffix' +/// [__declspec(align(x))] with [tf_mask::TYPE_MODIF_MASK] == [tf_mask::TYPE_FULL_MASK] +pub mod tf_unk { + use super::TypeT; + /// unknown + pub const BT_UNK: TypeT = 0x00; + /// void + pub const BT_VOID: TypeT = 0x01; + /// [BT_VOID] - normal void; [BT_UNK] - don't use + pub const BTMT_SIZE0: TypeT = 0x00; + /// size = 1 byte if [BT_VOID]; 2 if [BT_UNK] + pub const BTMT_SIZE12: TypeT = 0x10; + /// size = 4 bytes if [BT_VOID]; 8 if [BT_UNK] + pub const BTMT_SIZE48: TypeT = 0x20; + /// size = 16 bytes if [BT_VOID]; unknown if [BT_UNK] (IN struct alignment - see below) + pub const BTMT_SIZE128: TypeT = 0x30; +} + +/// Basic type: integer +pub mod tf_int { + use super::TypeT; + /// __int8 + pub const BT_INT8: TypeT = 0x02; + /// __int16 + pub const BT_INT16: TypeT = 0x03; + /// __int32 + pub const BT_INT32: TypeT = 0x04; + /// __int64 + pub const BT_INT64: TypeT = 0x05; + /// __int128 (for alpha & future use) + pub const BT_INT128: TypeT = 0x06; + /// natural int. (size provided by idp module) + pub const BT_INT: TypeT = 0x07; + /// unknown signedness + pub const BTMT_UNKSIGN: TypeT = 0x00; + /// signed + pub const BTMT_SIGNED: TypeT = 0x10; + /// unsigned + pub const BTMT_UNSIGNED: TypeT = 0x20; + /// specify char or segment register + /// - [BT_INT8] - char + /// - [BT_INT] - segment register + /// - other [BT_INT]... - don't use + pub const BTMT_CHAR: TypeT = 0x30; +} + +/// Basic type: bool +pub mod tf_bool { + use super::TypeT; + /// bool + pub const BT_BOOL: TypeT = 0x08; + /// bool size is model specific or unknown(?) + pub const BTMT_DEFBOOL: TypeT = 0x00; + /// bool sized 1byte + pub const BTMT_BOOL1: TypeT = 0x10; + /// bool sized 2bytes - !inf_is_64bit() + pub const BTMT_BOOL2: TypeT = 0x20; + /// bool sized 8bytes - inf_is_64bit() + pub const BTMT_BOOL8: TypeT = 0x20; + /// bool sized 4bytes + pub const BTMT_BOOL4: TypeT = 0x30; +} + +/// Basic type: float +pub mod tf_float { + use super::TypeT; + /// float + pub const BT_FLOAT: TypeT = 0x09; + /// float (4 bytes) + pub const BTMT_FLOAT: TypeT = 0x00; + /// double (8 bytes) + pub const BTMT_DOUBLE: TypeT = 0x10; + /// long double (compiler specific) + pub const BTMT_LNGDBL: TypeT = 0x20; + /// float (variable size). `if { use_tbyte } then { tbyte_size } else { 2 }`, + pub const BTMT_SPECFLT: TypeT = 0x30; +} + +/// Basic type: last +pub mod tf_last_basic { + /// the last basic type, all basic types may be followed by `tah-typeattrs` + pub const BT_LAST_BASIC: super::TypeT = super::tf_float::BT_FLOAT; +} + +/// Derived type: pointer +/// Pointers to undeclared yet [tf_complex::BT_COMPLEX] types are prohibited +pub mod tf_ptr { + use super::TypeT; + /// pointer + /// has the following format: + /// `[db sizeof(ptr)]; [tah-typeattrs]; type_t...` + pub const BT_PTR: TypeT = 0x0A; + /// default for model + pub const BTMT_DEFPTR: TypeT = 0x00; + /// near + pub const BTMT_NEAR: TypeT = 0x10; + /// far + pub const BTMT_FAR: TypeT = 0x20; + /// closure + /// - if ptr to [super::tf_func::BT_FUNC] - __closure. + /// in this case next byte MUST be + /// [super::RESERVED_BYTE], and after it [super::tf_func::BT_FUNC] + /// - else the next byte contains size_of::() + /// allowed values are 1 - `\varmem{ph,processor_t,max_ptr_size}` + /// - if value is bigger than `\varmem{ph,processor_t,max_ptr_size}`, + /// based_ptr_name_and_size() is called to + /// find out the typeinfo + pub const BTMT_CLOSURE: TypeT = 0x30; +} + +/// Derived type: array +/// For [tf_array::BT_ARRAY], the BTMT_... flags must be equivalent to the BTMT_... flags of its elements +pub mod tf_array { + use super::TypeT; + /// array + pub const BT_ARRAY: TypeT = 0x0B; + + /// code + /// ```custom,{class=text} + /// if set + /// array base==0 + /// format: dt num_elem; [tah-typeattrs]; type_t... + /// if num_elem==0 then the array size is unknown + /// else + /// format: da num_elem, base; [tah-typeattrs]; type_t... \endcode + /// ``` + /// used only for serialization + pub const BTMT_NONBASED: TypeT = 0x10; + /// reserved bit + pub const BTMT_ARRESERV: TypeT = 0x20; +} + +/// \defgroup tf_func Derived type: function +/// Ellipsis is not taken into account in the number of parameters// +/// The return type cannot be ::BT_ARRAY or ::BT_FUNC. +/// +pub mod tf_func { + use super::TypeT; + /// function. + /// format:
+    ///  optional:
+    /// ```custom,{class=text}
+    ///   ::CM_CC_SPOILED | num_of_spoiled_regs
+    ///   if num_of_spoiled_reg == BFA_FUNC_MARKER:
+    ///     ::bfa_byte
+    ///     if (bfa_byte & BFA_FUNC_EXT_FORMAT) != 0
+    ///      ::fti_bits (only low bits: FTI_SPOILED,...,FTI_VIRTUAL)
+    ///      num_of_spoiled_reg times: spoiled reg info (see extract_spoiledreg)
+    ///     else
+    ///       bfa_byte is function attribute byte (see \ref BFA_...)
+    ///   else:
+    ///     num_of_spoiled_reg times: spoiled reg info (see extract_spoiledreg)
+    /// ```
+    ///  ::cm_t ... calling convention and memory model
+    ///  [tah-typeattrs];
+    ///  ::type_t ... return type;
+    ///  [serialized argloc_t of returned value (if ::CM_CC_SPECIAL{PE} && !return void);
+    /// ```custom,{class=text}
+    ///  if !::CM_CC_VOIDARG:
+    ///    dt N (N=number of parameters)
+    ///    if ( N == 0 )
+    ///    if ::CM_CC_ELLIPSIS or ::CM_CC_SPECIALE
+    ///        func(...)
+    ///      else
+    ///        parameters are unknown
+    ///    else
+    ///      N records:
+    ///        ::type_t ... (i.e. type of each parameter)
+    ///        [serialized argloc_t (if ::CM_CC_SPECIAL{PE})] (i.e. place of each parameter)
+    ///        [#FAH_BYTE + de( \ref funcarg_t::flags )] 
+ /// ``` + pub const BT_FUNC: TypeT = 0x0C; + + ///< call method - default for model or unknown + pub const BTMT_DEFCALL: TypeT = 0x00; + ///< function returns by retn + pub const BTMT_NEARCALL: TypeT = 0x10; + ///< function returns by retf + pub const BTMT_FARCALL: TypeT = 0x20; + ///< function returns by iret + ///< in this case cc MUST be 'unknown' + pub const BTMT_INTCALL: TypeT = 0x30; +} + +/// Derived type: complex +pub mod tf_complex { + use super::TypeT; + /// struct/union/enum/typedef. + /// format:
:
+    /// ```custom,{class=text}
+    ///   [dt N (N=field count) if !::BTMT_TYPEDEF]
+    ///   if N == 0:
+    ///     p_string name (unnamed types have names "anon_...")
+    ///     [sdacl-typeattrs];
+    ///   else, for struct & union:
+    ///     if N == 0x7FFE   // Support for high (i.e., > 4095) members count
+    ///       N = deserialize_de()
+    ///     ALPOW = N & 0x7
+    ///     MCNT = N >> 3
+    ///     if MCNT == 0
+    ///       empty struct
+    ///     if ALPOW == 0
+    ///       ALIGN = get_default_align()
+    ///     else
+    ///       ALIGN = (1 << (ALPOW - 1))
+    ///     [sdacl-typeattrs];
+    ///   else, for enums:
+    ///     if N == 0x7FFE   // Support for high enum entries count.
+    ///       N = deserialize_de()
+    ///     [tah-typeattrs]; 
+ /// ``` + pub const BT_COMPLEX: TypeT = 0x0D; + /// struct + /// `MCNT records: type_t; [sdacl-typeattrs];` + pub const BTMT_STRUCT: TypeT = 0x00; + /// union + /// `MCNT records: type_t...` + pub const BTMT_UNION: TypeT = 0x10; + /// enum + /// ```custom,{class=text} + /// next byte bte_t (see below) + /// N records: de delta(s) + /// OR + /// blocks (see below) + /// ``` + pub const BTMT_ENUM: TypeT = 0x20; + /// named reference + /// `always p_string name` + pub const BTMT_TYPEDEF: TypeT = 0x30; + /// bitfield (only in struct) + /// ```custom,{class=text} + /// ['bitmasked' enum see below] + /// next byte is dt + /// ((size in bits << 1) | (unsigned ? 1 : 0)) + /// ``` + pub const BT_BITFIELD: TypeT = 0x0E; + /// __int8 + pub const BTMT_BFLDI8: TypeT = 0x00; + /// __int16 + pub const BTMT_BFLDI16: TypeT = 0x10; + /// __int32 + pub const BTMT_BFLDI32: TypeT = 0x20; + /// __int64 + pub const BTMT_BFLDI64: TypeT = 0x30; +} + +/// RESERVED +pub const BT_RESERVED: TypeT = 0x0F; + +/// Type modifiers +/// "pub const volatile" types are forbidden +pub mod tf_modifiers { + use super::TypeT; + /// const + pub const BTM_CONST: TypeT = 0x40; + /// volatile + pub const BTM_VOLATILE: TypeT = 0x80; +} + +/// Special enum definitions +pub mod tf_enum { + use super::BteT; + /// storage size. + /// - if == 0 then inf_get_cc_size_e() + /// - else 1 << (n -1) = 1,2,4...64 + pub const BTE_SIZE_MASK: BteT = 0x07; + /// must be 0, in order to distinguish from a tah-byte + pub const BTE_RESERVED: BteT = 0x08; + /// 'subarrays'. In this case ANY record + /// has the following format: + /// - 'de' mask (has name) + /// - 'dt' cnt + /// - cnt records of 'de' values + /// (cnt CAN be 0) + /// NOTE: delta for ALL subsegment is ONE + pub const BTE_BITFIELD: BteT = 0x10; + /// output style mask + pub const BTE_OUT_MASK: BteT = 0x60; + /// hex + pub const BTE_HEX: BteT = 0x00; + /// char or hex + pub const BTE_CHAR: BteT = 0x20; + /// signed decimal + pub const BTE_SDEC: BteT = 0x40; + /// unsigned decimal + pub const BTE_UDEC: BteT = 0x60; + /// this bit MUST be present + pub const BTE_ALWAYS: BteT = 0x80; +} + +/// Convenience definitions: segment register +pub mod tf_conv_segreg { + use super::{tf_int, TypeT}; + /// segment register + pub const BT_SEGREG: TypeT = tf_int::BT_INT | tf_int::BTMT_CHAR; +} + +/// Convenience definitions: unknown types +pub mod tf_conv_unk { + use super::{tf_unk, TypeT}; + /// 1 byte + pub const BT_UNK_BYTE: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE12; + /// 2 bytes + pub const BT_UNK_WORD: TypeT = tf_unk::BT_UNK | tf_unk::BTMT_SIZE12; + /// 4 bytes + pub const BT_UNK_DWORD: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE48; + /// 8 bytes + pub const BT_UNK_QWORD: TypeT = tf_unk::BT_UNK | tf_unk::BTMT_SIZE48; + /// 16 bytes + pub const BT_UNK_OWORD: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE128; + /// unknown size - for parameters + pub const BT_UNKNOWN: TypeT = tf_unk::BT_UNK | tf_unk::BTMT_SIZE128; +} + +/// Convenience definitions: shortcuts +pub mod tf_shortcuts { + use super::{tf_bool, tf_complex, tf_conv_unk, tf_float, tf_int, tf_unk, TypeT}; + /// byte + pub const BTF_BYTE: TypeT = tf_conv_unk::BT_UNK_BYTE; + /// unknown + pub const BTF_UNK: TypeT = tf_conv_unk::BT_UNKNOWN; + /// void + pub const BTF_VOID: TypeT = tf_unk::BT_VOID | tf_unk::BTMT_SIZE0; + + /// signed byte + pub const BTF_INT8: TypeT = tf_int::BT_INT8 | tf_int::BTMT_SIGNED; + /// signed char + pub const BTF_CHAR: TypeT = tf_int::BT_INT8 | tf_int::BTMT_CHAR; + /// unsigned char + pub const BTF_UCHAR: TypeT = tf_int::BT_INT8 | tf_int::BTMT_UNSIGNED; + /// unsigned byte + pub const BTF_UINT8: TypeT = tf_int::BT_INT8 | tf_int::BTMT_UNSIGNED; + + /// signed short + pub const BTF_INT16: TypeT = tf_int::BT_INT16 | tf_int::BTMT_SIGNED; + /// unsigned short + pub const BTF_UINT16: TypeT = tf_int::BT_INT16 | tf_int::BTMT_UNSIGNED; + + /// signed int + pub const BTF_INT32: TypeT = tf_int::BT_INT32 | tf_int::BTMT_SIGNED; + /// unsigned int + pub const BTF_UINT32: TypeT = tf_int::BT_INT32 | tf_int::BTMT_UNSIGNED; + + /// signed long + pub const BTF_INT64: TypeT = tf_int::BT_INT64 | tf_int::BTMT_SIGNED; + /// unsigned long + pub const BTF_UINT64: TypeT = tf_int::BT_INT64 | tf_int::BTMT_UNSIGNED; + + /// signed 128-bit value + pub const BTF_INT128: TypeT = tf_int::BT_INT128 | tf_int::BTMT_SIGNED; + /// unsigned 128-bit value + pub const BTF_UINT128: TypeT = tf_int::BT_INT128 | tf_int::BTMT_UNSIGNED; + + /// int, unknown signedness + pub const BTF_INT: TypeT = tf_int::BT_INT | tf_int::BTMT_UNKSIGN; + /// unsigned int + pub const BTF_UINT: TypeT = tf_int::BT_INT | tf_int::BTMT_UNSIGNED; + /// singed int + pub const BTF_SINT: TypeT = tf_int::BT_INT | tf_int::BTMT_SIGNED; + + /// boolean + pub const BTF_BOOL: TypeT = tf_bool::BT_BOOL; + + /// float + pub const BTF_FLOAT: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_FLOAT; + /// double + pub const BTF_DOUBLE: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_DOUBLE; + /// long double + pub const BTF_LDOUBLE: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_LNGDBL; + /// see [tf_float::BTMT_SPECFLT] + pub const BTF_TBYTE: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_SPECFLT; + + /// struct + pub const BTF_STRUCT: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_STRUCT; + /// union + pub const BTF_UNION: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_UNION; + /// enum + pub const BTF_ENUM: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_ENUM; + /// typedef + pub const BTF_TYPEDEF: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_TYPEDEF; +} + +/// pack buckets using zip +pub const TIL_ZIP: u32 = 0x0001; +/// til has macro table +pub const TIL_MAC: u32 = 0x0002; +/// extended sizeof info (short, long, longlong) +pub const TIL_ESI: u32 = 0x0004; +/// universal til for any compiler +pub const TIL_UNI: u32 = 0x0008; +/// type ordinal numbers are present +pub const TIL_ORD: u32 = 0x0010; +/// type aliases are present (this bit is used only on the disk) +pub const TIL_ALI: u32 = 0x0020; +/// til has been modified, should be saved +pub const TIL_MOD: u32 = 0x0040; +/// til has extra streams +pub const TIL_STM: u32 = 0x0080; +/// sizeof(long double) +pub const TIL_SLD: u32 = 0x0100; diff --git a/rust/examples/idb/shared/src/til/mod.rs b/rust/examples/idb/shared/src/til/mod.rs new file mode 100644 index 000000000..315dc6b7a --- /dev/null +++ b/rust/examples/idb/shared/src/til/mod.rs @@ -0,0 +1,1707 @@ +/// The u8 values used to describes the type information records in IDA. +/// +/// The recommended way of using type info is to use the [tinfo_t] class. +/// The type information is internally kept as an array of bytes terminated by 0. +/// +/// Items in brackets [] are optional and sometimes are omitted. +/// ::type_t... means a sequence of ::type_t bytes which defines a type. +/// +/// NOTE: to work with the types of instructions or data in the database, +/// use `get_tinfo()`/`set_tinfo()` and similar functions. +#[allow(unused)] +mod flag; + +use std::io::{BufRead, BufReader, Read}; +use std::num::NonZeroU8; + +use anyhow::{anyhow, ensure, Context, Result}; +use serde::{Deserialize, Serialize}; + +use crate::{read_c_string, read_c_string_vec, read_string_len_u8, IDBSectionCompression}; + +// TODO migrate this to flags +const TIL_SECTION_MAGIC: &[u8; 6] = b"IDATIL"; + +#[derive(Debug, Clone)] +pub struct TILSection { + pub format: u32, + pub flags: TILSectionFlag, + pub title: String, + pub base: String, + pub id: u8, + pub cm: u8, + pub def_align: u8, + pub symbols: Vec, + pub type_ordinal_numbers: Option, + pub types: Vec, +} + +#[derive(Debug, Clone)] +pub(crate) struct TILSectionHeader { + format: u32, + flags: TILSectionFlag, + title: String, + base: String, + id: u8, + cm: u8, + _size_i: u8, + _size_b: u8, + size_e: u8, + def_align: u8, + _size_s_l_ll: Option<(u8, u8, u8)>, + _size_ldbl: Option, +} + +#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +struct TILSectionHeader1 { + signature: [u8; 6], + format: u32, + flags: TILSectionFlag, +} + +#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +struct TILSectionHeader2 { + id: u8, + cm: u8, + size_i: u8, + size_b: u8, + size_e: u8, + def_align: u8, +} + +impl TILSection { + pub fn parse(mut input: I) -> Result { + Self::read_inner(&mut input) + } + + pub(crate) fn read(input: &mut I, compress: IDBSectionCompression) -> Result { + match compress { + IDBSectionCompression::None => Self::read_inner(input), + IDBSectionCompression::Zlib => { + let mut input = BufReader::new(flate2::read::ZlibDecoder::new(input)); + Self::read_inner(&mut input) + } + } + } + + fn read_inner(input: &mut I) -> Result { + let header = Self::read_header(&mut *input)?; + let symbols = if header.flags.is_zip() { + Self::read_bucket_zip(&mut *input, &header)? + } else { + Self::read_bucket_normal(&mut *input, &header)? + }; + let type_ordinal_numbers = header + .flags + .is_ord() + .then(|| bincode::deserialize_from(&mut *input)) + .transpose()?; + let types = if header.flags.is_zip() { + Self::read_bucket_zip(&mut *input, &header)? + } else { + Self::read_bucket_normal(&mut *input, &header)? + }; + + Ok(TILSection { + format: header.format, + flags: header.flags, + title: header.title, + base: header.base, + id: header.id, + cm: header.cm, + def_align: header.def_align, + symbols, + type_ordinal_numbers, + types, + }) + } + + fn read_header(input: &mut I) -> Result { + let header1: TILSectionHeader1 = bincode::deserialize_from(&mut *input)?; + ensure!( + header1.signature == *TIL_SECTION_MAGIC, + "Invalid TIL Signature" + ); + + let title = read_string_len_u8(&mut *input)?; + let base = read_string_len_u8(&mut *input)?; + + let header2: TILSectionHeader2 = bincode::deserialize_from(&mut *input)?; + let size_s_l_ll: Option<(u8, u8, u8)> = header1 + .flags + .is_esi() + .then(|| bincode::deserialize_from(&mut *input)) + .transpose()?; + let size_ldbl: Option = header1 + .flags + .size_long_double() + .then(|| bincode::deserialize_from(&mut *input)) + .transpose()?; + Ok(TILSectionHeader { + format: header1.format, + flags: header1.flags, + title, + base, + id: header2.id, + _size_i: header2.size_i, + _size_b: header2.size_b, + size_e: header2.size_e, + cm: header2.cm, + def_align: header2.def_align, + _size_s_l_ll: size_s_l_ll, + _size_ldbl: size_ldbl, + }) + } + + #[cfg(test)] + pub(crate) fn decompress( + input: &mut I, + output: &mut O, + compress: IDBSectionCompression, + ) -> Result<()> { + match compress { + IDBSectionCompression::Zlib => { + let mut input = BufReader::new(flate2::read::ZlibDecoder::new(input)); + Self::decompress_inner(&mut input, output) + } + IDBSectionCompression::None => Self::decompress_inner(input, output), + } + } + + #[cfg(test)] + pub(crate) fn decompress_inner( + input: &mut I, + output: &mut O, + ) -> Result<()> { + let mut header = Self::read_header(&mut *input)?; + let og_flags = header.flags; + // disable the zip flag + header.flags.set_zip(false); + let header1 = TILSectionHeader1 { + signature: *TIL_SECTION_MAGIC, + format: header.format, + flags: header.flags, + }; + let header2 = TILSectionHeader2 { + id: header.id, + cm: header.cm, + size_i: header._size_i, + size_b: header._size_b, + size_e: header.size_e, + def_align: header.def_align, + }; + bincode::serialize_into(&mut *output, &header1)?; + crate::write_string_len_u8(&mut *output, &header.title)?; + crate::write_string_len_u8(&mut *output, &header.base)?; + bincode::serialize_into(&mut *output, &header2)?; + header + ._size_s_l_ll + .map(|value| bincode::serialize_into(&mut *output, &value)) + .transpose()?; + header + ._size_ldbl + .map(|value| bincode::serialize_into(&mut *output, &value)) + .transpose()?; + + // if not zipped, just copy the rest of the data, there is no posible zip + // block inside a bucket + if !og_flags.is_zip() { + std::io::copy(&mut *input, output)?; + return Ok(()); + } + + // symbols + Self::decompress_bucket(&mut *input, &mut *output)?; + let _type_ordinal_numbers: Option = header + .flags + .is_ord() + .then(|| -> Result { + let result: u32 = bincode::deserialize_from(&mut *input)?; + bincode::serialize_into(&mut *output, &result)?; + Ok(result) + }) + .transpose()?; + // types + Self::decompress_bucket(&mut *input, &mut *output)?; + + Ok(()) + } +} + +#[derive(Clone, Copy, Debug, Deserialize, Serialize)] +pub struct TILSectionFlag(u32); +impl TILSectionFlag { + pub fn is_zip(&self) -> bool { + self.0 & flag::TIL_ZIP != 0 + } + pub fn set_zip(&mut self, value: bool) { + if value { + self.0 |= flag::TIL_ZIP + } else { + self.0 &= !flag::TIL_ZIP + } + } + pub fn has_macro_table(&self) -> bool { + self.0 & flag::TIL_MAC != 0 + } + /// extended sizeof info (short, long, longlong) + pub fn is_esi(&self) -> bool { + self.0 & flag::TIL_ESI != 0 + } + /// universal til for any compiler + pub fn is_uni(&self) -> bool { + self.0 & flag::TIL_UNI != 0 + } + /// type ordinal numbers are present + pub fn is_ord(&self) -> bool { + self.0 & flag::TIL_ORD != 0 + } + /// type aliases are present + pub fn is_ali(&self) -> bool { + self.0 & flag::TIL_ALI != 0 + } + /// til has been modified, should be saved + pub fn is_mod(&self) -> bool { + self.0 & flag::TIL_MOD != 0 + } + /// til has extra streams + pub fn is_stm(&self) -> bool { + self.0 & flag::TIL_STM != 0 + } + /// sizeof(long double) + pub fn size_long_double(&self) -> bool { + self.0 & flag::TIL_SLD != 0 + } +} + +#[derive(Debug, Deserialize, Serialize)] +struct TILBucketRaw { + ndefs: u32, + len: u32, +} + +impl TILSection { + fn read_bucket_header(input: &mut I) -> Result<(u32, u32)> { + let ndefs = bincode::deserialize_from(&mut *input)?; + let len = bincode::deserialize_from(&mut *input)?; + Ok((ndefs, len)) + } + + fn read_bucket_zip_header(input: &mut I) -> Result<(u32, u32, u32)> { + let (ndefs, len) = Self::read_bucket_header(&mut *input)?; + let compressed_len = bincode::deserialize_from(&mut *input)?; + Ok((ndefs, len, compressed_len)) + } + + fn read_bucket_normal( + input: &mut I, + header: &TILSectionHeader, + ) -> Result> { + let (ndefs, len) = Self::read_bucket_header(&mut *input)?; + let mut input = input.take(len.into()); + let type_info = (0..ndefs) + .map(|_| TILTypeInfo::read(&mut input, header)) + .collect::>()?; + ensure!( + input.limit() == 0, + "TypeBucket total data is smaller then expected" + ); + Ok(type_info) + } + + fn read_bucket_zip( + input: &mut I, + header: &TILSectionHeader, + ) -> Result> { + let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; + // make sure the decompressor don't read out-of-bounds + let mut compressed_input = input.take(compressed_len.into()); + let inflate = BufReader::new(flate2::read::ZlibDecoder::new(&mut compressed_input)); + // make sure only the defined size is decompressed + let mut decompressed_input = inflate.take(len.into()); + let type_info = (0..ndefs.try_into().unwrap()) + .map(|_| TILTypeInfo::read(&mut decompressed_input, header)) + .collect::, _>>()?; + // make sure the input was fully consumed + ensure!( + decompressed_input.limit() == 0, + "TypeBucket data is smaller then expected" + ); + ensure!( + compressed_input.limit() == 0, + "TypeBucket compressed data is smaller then expected" + ); + Ok(type_info) + } + + #[cfg(test)] + fn decompress_bucket( + input: &mut I, + output: &mut O, + ) -> Result<()> { + let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; + bincode::serialize_into(&mut *output, &TILBucketRaw { len, ndefs })?; + // write the decompressed data + let mut compressed_input = input.take(compressed_len.into()); + let inflate = flate2::read::ZlibDecoder::new(&mut compressed_input); + let mut decompressed_input = inflate.take(len.into()); + std::io::copy(&mut decompressed_input, output)?; + ensure!( + decompressed_input.limit() == 0, + "TypeBucket data is smaller then expected" + ); + ensure!( + compressed_input.limit() == 0, + "TypeBucket compressed data is smaller then expected" + ); + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct TILTypeInfo { + _flags: u32, + pub name: String, + pub ordinal: u64, + pub tinfo: Type, + _cmt: String, + _fieldcmts: String, + _sclass: u8, +} + +impl TILTypeInfo { + pub(crate) fn read(input: &mut I, header: &TILSectionHeader) -> Result { + let flags: u32 = bincode::deserialize_from(&mut *input)?; + let name = read_c_string(&mut *input)?; + let is_u64 = (flags >> 31) != 0; + let ordinal = match (header.format, is_u64) { + // formats below 0x12 doesn't have 64 bits ord + (0..0x12, _) | (_, false) => bincode::deserialize_from::<_, u32>(&mut *input)?.into(), + (_, true) => bincode::deserialize_from(&mut *input)?, + }; + let tinfo_raw = + TypeRaw::read(&mut *input, header).context("parsing `TILTypeInfo::tiinfo`")?; + let _info = read_c_string(&mut *input)?; + let cmt = read_c_string(&mut *input)?; + let fields = read_c_string_vec(&mut *input)?; + let fieldcmts = read_c_string(&mut *input)?; + let sclass: u8 = bincode::deserialize_from(&mut *input)?; + + let tinfo = Type::new(tinfo_raw, Some(fields))?; + + Ok(Self { + _flags: flags, + name, + ordinal, + tinfo, + _cmt: cmt, + _fieldcmts: fieldcmts, + _sclass: sclass, + }) + } +} + +#[derive(Debug, Clone)] +pub enum Type { + Basic(Basic), + Pointer(Pointer), + Function(Function), + Array(Array), + Typedef(Typedef), + Struct(Struct), + Union(Union), + Enum(Enum), + Bitfield(Bitfield), +} +impl Type { + fn new(tinfo_raw: TypeRaw, fields: Option>) -> Result { + match tinfo_raw { + TypeRaw::Basic(x) => Basic::new(x, fields).map(Type::Basic), + TypeRaw::Bitfield(x) => { + if matches!(fields, Some(f) if !f.is_empty()) { + return Err(anyhow!("fields in a Bitfield")); + } + Ok(Type::Bitfield(x)) + } + TypeRaw::Typedef(x) => { + if matches!(fields, Some(f) if !f.is_empty()) { + return Err(anyhow!("fields in a Typedef")); + } + Ok(Type::Typedef(x)) + } + TypeRaw::Pointer(x) => Pointer::new(x, fields).map(Type::Pointer), + TypeRaw::Function(x) => Function::new(x, fields).map(Type::Function), + TypeRaw::Array(x) => Array::new(x, fields).map(Type::Array), + TypeRaw::Struct(x) => Struct::new(x, fields).map(Type::Struct), + TypeRaw::Union(x) => Union::new(x, fields).map(Type::Union), + TypeRaw::Enum(x) => Enum::new(x, fields).map(Type::Enum), + } + } +} + +#[derive(Debug, Clone)] +enum TypeRaw { + Basic(TypeMetadata), + Pointer(PointerRaw), + Function(FunctionRaw), + Array(ArrayRaw), + Typedef(Typedef), + Struct(StructRaw), + Union(UnionRaw), + Enum(EnumRaw), + Bitfield(Bitfield), +} + +impl TypeRaw { + pub fn read(input: &mut I, header: &TILSectionHeader) -> Result { + let metadata = TypeMetadata::read(&mut *input)?; + if metadata.get_base_type_flag().is_typeid_last() + || metadata.get_base_type_flag().is_reserved() + { + return Ok(TypeRaw::Basic(metadata)); + } else if metadata.get_base_type_flag().is_pointer() { + Ok(TypeRaw::Pointer( + PointerRaw::read(input, metadata, header).context("Type::Pointer")?, + )) + } else if metadata.get_base_type_flag().is_function() { + Ok(TypeRaw::Function( + FunctionRaw::read(input, &metadata, header).context("Type::Function")?, + )) + } else if metadata.get_base_type_flag().is_array() { + Ok(TypeRaw::Array( + ArrayRaw::read(input, metadata, header).context("Type::Array")?, + )) + } else if metadata.get_full_type_flag().is_typedef() { + Ok(TypeRaw::Typedef( + Typedef::read(input).context("Type::Typedef")?, + )) + } else if metadata.get_full_type_flag().is_union() { + Ok(TypeRaw::Union( + UnionRaw::read(input, header).context("Type::Union")?, + )) + } else if metadata.get_full_type_flag().is_struct() { + Ok(TypeRaw::Struct( + StructRaw::read(input, header).context("Type::Struct")?, + )) + } else if metadata.get_full_type_flag().is_enum() { + Ok(TypeRaw::Enum( + EnumRaw::read(input, header).context("Type::Enum")?, + )) + } else if metadata.get_base_type_flag().is_bitfield() { + Ok(TypeRaw::Bitfield( + Bitfield::read(input, metadata).context("Type::Bitfield")?, + )) + } else { + todo!(); + //Ok(Type::Unknown(read_c_string_raw(input)?)) + } + } + + pub fn read_ref(input: &mut I, header: &TILSectionHeader) -> Result { + let mut bytes = read_dt_bytes(&mut *input)?; + + if !bytes.starts_with(b"=") { + let dt = serialize_dt(bytes.len().try_into().unwrap())?; + bytes = [b'='].into_iter().chain(dt).chain(bytes).collect(); + } + + let mut bytes = &bytes[..]; + let result = TypeRaw::read(&mut bytes, header)?; + if !bytes.is_empty() { + return Err(anyhow!("Unable to fully parser Type ref")); + } + Ok(result) + } +} + +#[derive(Debug, Clone, Copy)] +pub enum Basic { + Void, + // NOTE Unknown with None bytes is NOT the same as Void + Unknown { + bytes: Option, + }, + + Bool { + bytes: Option, + }, + Char, + SegReg, + Int { + bytes: Option, + is_signed: Option, + }, + Float { + bytes: Option, + }, +} + +impl Basic { + fn new(mdata: TypeMetadata, fields: Option>) -> Result { + const fn bytes(bytes: u8) -> NonZeroU8 { + if bytes == 0 { + unreachable!() + } + unsafe { NonZeroU8::new_unchecked(bytes) } + } + if let Some(fields) = fields { + ensure!(fields.is_empty(), "Unset with fields"); + } + let bt = mdata.get_base_type_flag().0; + let btmt = mdata.get_type_flag().0; + use flag::{tf_bool::*, tf_float::*, tf_int::*, tf_unk::*}; + match bt { + BT_VOID => { + let bytes = match btmt { + // special case, void + BTMT_SIZE0 => return Ok(Self::Void), + BTMT_SIZE12 => Some(bytes(1)), + BTMT_SIZE48 => Some(bytes(4)), + BTMT_SIZE128 => Some(bytes(16)), + _ => unreachable!(), + }; + Ok(Self::Unknown { bytes }) + } + BT_UNK => { + let bytes = match btmt { + BTMT_SIZE0 => return Err(anyhow!("forbidden use of BT_UNK")), + BTMT_SIZE12 => Some(bytes(2)), + BTMT_SIZE48 => Some(bytes(8)), + BTMT_SIZE128 => None, + _ => unreachable!(), + }; + Ok(Self::Unknown { bytes }) + } + + bt_int @ BT_INT8..=BT_INT => { + let is_signed = match btmt { + BTMT_UNKSIGN => None, + BTMT_SIGNED => Some(true), + BTMT_UNSIGNED => Some(false), + // special case for char + BTMT_CHAR => match bt_int { + BT_INT8 => return Ok(Self::Char), + BT_INT => return Ok(Self::SegReg), + _ => { + return Err(anyhow!("Reserved use of tf_int::BTMT_CHAR {:x}", mdata.0)) + } + }, + _ => unreachable!(), + }; + let bytes = match bt_int { + BT_INT8 => Some(bytes(1)), + BT_INT16 => Some(bytes(2)), + BT_INT32 => Some(bytes(4)), + BT_INT64 => Some(bytes(8)), + BT_INT128 => Some(bytes(16)), + BT_INT => None, + _ => unreachable!(), + }; + Ok(Self::Int { bytes, is_signed }) + } + + BT_BOOL => { + let bytes = match btmt { + BTMT_DEFBOOL => None, + BTMT_BOOL1 => Some(bytes(1)), + BTMT_BOOL4 => Some(bytes(4)), + // TODO get the inf_is_64bit field + //BTMT_BOOL2 if !inf_is_64bit => Some(bytes(2)), + //BTMT_BOOL8 if inf_is_64bit => Some(bytes(8)), + BTMT_BOOL8 => Some(bytes(2)), // delete this + _ => unreachable!(), + }; + Ok(Self::Bool { bytes }) + } + + BT_FLOAT => { + let bytes = match btmt { + BTMT_FLOAT => Some(bytes(4)), + BTMT_DOUBLE => Some(bytes(8)), + BTMT_LNGDBL => None, + // TODO find the tbyte_size field + //(BTMT_SPECFLT, Some(bytes)) => Some(bytes), + //(BTMT_SPECFLT, None) => Some(bytes(2)), + BTMT_SPECFLT => Some(bytes(8)), // delete this + _ => unreachable!(), + }; + Ok(Self::Float { bytes }) + } + _ => Err(anyhow!("Unkown Unset Type {}", mdata.0)), + } + } +} + +#[derive(Debug, Clone)] +pub struct Pointer { + pub closure: Option, + pub tah: TAH, + pub typ: Box, +} + +impl Pointer { + fn new(raw: PointerRaw, fields: Option>) -> Result { + Ok(Self { + closure: raw.closure.map(Closure::new).transpose()?, + tah: raw.tah, + typ: Type::new(*raw.typ, fields).map(Box::new)?, + }) + } +} + +#[derive(Debug, Clone)] +pub enum Closure { + Closure(Box), + PointerBased(u8), +} + +impl Closure { + fn new(raw: ClosureRaw) -> Result { + match raw { + ClosureRaw::Closure(c) => Type::new(*c, None).map(Box::new).map(Self::Closure), + ClosureRaw::PointerBased(p) => Ok(Self::PointerBased(p)), + } + } +} + +#[derive(Debug, Clone)] +struct PointerRaw { + pub closure: Option, + pub tah: TAH, + pub typ: Box, +} + +#[derive(Debug, Clone)] +enum ClosureRaw { + Closure(Box), + PointerBased(u8), +} + +impl PointerRaw { + fn read( + input: &mut I, + metadata: TypeMetadata, + header: &TILSectionHeader, + ) -> Result { + let closure = metadata + .get_type_flag() + .is_type_closure() + .then(|| ClosureRaw::read(&mut *input, header)) + .transpose()?; + let tah = TAH::read(&mut *input)?; + let typ = TypeRaw::read(&mut *input, header)?; + Ok(Self { + closure, + tah, + typ: Box::new(typ), + }) + } +} + +impl ClosureRaw { + fn read(input: &mut I, header: &TILSectionHeader) -> Result { + let closure_type: u8 = bincode::deserialize_from(&mut *input)?; + if closure_type == 0xFF { + let closure = TypeRaw::read(&mut *input, header)?; + Ok(Self::Closure(Box::new(closure))) + } else { + let closure_ptr = bincode::deserialize_from(&mut *input)?; + Ok(Self::PointerBased(closure_ptr)) + } + } +} + +#[derive(Debug, Clone)] +pub struct Function { + pub ret: Box, + pub args: Vec<(Option, Type, Option)>, + pub retloc: Option, +} +impl Function { + fn new(value: FunctionRaw, fields: Option>) -> Result { + let args = associate_field_name_and_member(fields, value.args) + .context("Function")? + .map(|(n, (t, a))| Type::new(t, None).map(|t| (n, t, a))) + .collect::>()?; + Ok(Self { + ret: Type::new(*value.ret, None).map(Box::new)?, + args, + retloc: value.retloc, + }) + } +} + +#[derive(Debug, Clone)] +struct FunctionRaw { + pub ret: Box, + pub args: Vec<(TypeRaw, Option)>, + pub retloc: Option, +} + +#[derive(Debug, Clone)] +pub enum ArgLoc { + // TODO add those to flags + // ::ALOC_STACK + // ::ALOC_STATIC + // ::ALOC_REG1 + // ::ALOC_REG2 + // ::ALOC_RREL + // ::ALOC_DIST + // ::ALOC_CUSTOM + /// 0 - None + None, + /// 1 - stack offset + Stack(u32), + /// 2 - distributed (scattered) + Dist(Vec), + /// 3 - one register (and offset within it) + Reg1(u32), + /// 4 - register pair + Reg2(u32), + /// 5 - register relative + RRel { reg: u16, off: u32 }, + /// 6 - global address + Static(u32), + // 7..=0xf custom + // TODO is possible to know the custom impl len? +} + +#[derive(Debug, Clone)] +pub struct ArgLocDist { + pub info: u16, + pub off: u16, + pub size: u16, +} + +impl FunctionRaw { + fn read( + input: &mut I, + metadata: &TypeMetadata, + header: &TILSectionHeader, + ) -> Result { + // TODO what is that? + let mut flags = metadata.get_type_flag().0 << 2; + + let cc = Self::read_cc(&mut *input, &mut flags)?; + + let _tah = TAH::read(&mut *input)?; + let ret = TypeRaw::read(&mut *input, header)?; + let have_retloc = cc.get_calling_convention().is_special_pe() + && !matches!(&ret, TypeRaw::Basic(mdata) if mdata.get_full_type_flag().is_void()); + let retloc = have_retloc.then(|| ArgLoc::read(&mut *input)).transpose()?; + if cc.get_calling_convention().is_void_arg() { + return Ok(Self { + ret: Box::new(ret), + args: vec![], + retloc, + }); + } + + let n = read_dt(&mut *input)?; + let is_special_pe = cc.get_calling_convention().is_special_pe(); + let args = (0..n) + .map(|_| -> Result<_> { + let tmp = input.fill_buf()?.get(0).copied(); + if tmp == Some(0xFF) { + // TODO what is this? + let _tmp: u8 = bincode::deserialize_from(&mut *input)?; + let _flags = read_de(&mut *input)?; + } + let tinfo = TypeRaw::read(&mut *input, header)?; + let argloc = is_special_pe + .then(|| ArgLoc::read(&mut *input)) + .transpose()?; + + Ok((tinfo, argloc)) + }) + .collect::>()?; + + Ok(Self { + ret: Box::new(ret), + args, + retloc, + }) + } + + fn read_cc(input: &mut I, flags: &mut u8) -> Result { + let mut cm = TypeMetadata::read(&mut *input)?; + if !cm.get_calling_convention().is_spoiled() { + return Ok(cm); + } + // TODO find what to do with this spoiled and flags stuff + let mut _spoiled = vec![]; + loop { + // TODO create flags::CM_CC_MASK + let nspoiled = cm.0 & !0xf0; + if nspoiled == 0xF { + let b: u8 = bincode::deserialize_from(&mut *input)?; + *flags |= (b & 0x1F) << 1; + } else { + for _ in 0..nspoiled { + let b: u8 = bincode::deserialize_from(&mut *input)?; + let (size, reg) = if b & 0x80 != 0 { + let size: u8 = bincode::deserialize_from(&mut *input)?; + let reg = b & 0x7F; + (size, reg) + } else { + ensure!(b > 1, "Unable to solve register from a spoiled function"); + let size = (b >> 4) + 1; + let reg = (b & 0xF) - 1; + (size, reg) + }; + _spoiled.push((size, reg)); + } + *flags |= 1; + } + + cm = TypeMetadata::read(&mut *input)?; + if !cm.get_calling_convention().is_spoiled() { + return Ok(cm); + } + } + } +} + +impl ArgLoc { + fn read(input: &mut I) -> Result { + let t: u8 = bincode::deserialize_from(&mut *input)?; + if t != 0xFF { + let b = t & 0x7F; + match (t, b) { + (0..=0x80, 1..) => Ok(Self::Reg1((b - 1).into())), + (0..=0x80, 0) => Ok(Self::Stack(0)), + _ => { + let c: u8 = bincode::deserialize_from(&mut *input)?; + if c == 0 { + Ok(Self::None) + } else { + Ok(Self::Reg2(u32::from(b) | u32::from(c - 1) << 16)) + } + } + } + } else { + let typ = read_dt(&mut *input)?; + match typ & 0xF { + 0 => Ok(Self::None), + 1 => { + let sval = read_de(&mut *input)?; + Ok(Self::Stack(sval)) + } + 2 => { + let n = (typ >> 5) & 0x7; + let dist: Vec<_> = (0..n) + .map(|_| { + let info = read_dt(&mut *input)?; + let off = read_dt(&mut *input)?; + let size = read_dt(&mut *input)?; + Ok(ArgLocDist { info, off, size }) + }) + .collect::>()?; + Ok(Self::Dist(dist)) + } + 3 => { + let reg_info = read_dt(&mut *input)?; + // TODO read other dt? + Ok(Self::Reg1(reg_info.into())) + } + 4 => { + let reg_info = read_dt(&mut *input)?; + // TODO read other dt? + Ok(Self::Reg2(reg_info.into())) + } + 5 => { + let reg = read_dt(&mut *input)?; + let off = read_de(&mut *input)?; + Ok(Self::RRel { reg, off }) + } + 6 => { + let sval = read_de(&mut *input)?; + Ok(Self::Static(sval)) + } + 0x7..=0xF => todo!("Custom implementation for ArgLoc"), + _ => unreachable!(), + } + } + } +} + +#[derive(Clone, Debug)] +pub struct Array { + pub base: u8, + pub nelem: u16, + pub tah: TAH, + pub elem_type: Box, +} +impl Array { + fn new(value: ArrayRaw, fields: Option>) -> Result { + if matches!(&fields, Some(f) if !f.is_empty()) { + return Err(anyhow!("fields in a Array")); + } + Ok(Self { + base: value.base, + nelem: value.nelem, + tah: value.tah, + elem_type: Type::new(*value.elem_type, None).map(Box::new)?, + }) + } +} + +#[derive(Clone, Debug)] +struct ArrayRaw { + pub base: u8, + pub nelem: u16, + pub tah: TAH, + pub elem_type: Box, +} + +impl ArrayRaw { + fn read( + input: &mut I, + metadata: TypeMetadata, + header: &TILSectionHeader, + ) -> Result { + let (base, nelem) = if metadata.get_type_flag().is_non_based() { + let nelem = read_dt(&mut *input)?; + (0, nelem) + } else { + let (base, nelem) = read_da(&mut *input)?; + (base, nelem.into()) + }; + let tah = TAH::read(&mut *input)?; + let elem_type = TypeRaw::read(&mut *input, header)?; + Ok(ArrayRaw { + base, + nelem, + tah, + elem_type: Box::new(elem_type), + }) + } +} + +#[derive(Clone, Debug)] +pub enum Typedef { + Ordinal(u32), + Name(String), +} + +impl Typedef { + fn read(input: &mut I) -> Result { + let buf = read_dt_bytes(&mut *input)?; + match &buf[..] { + [b'#', data @ ..] => { + let mut tmp = &data[..]; + let de = read_de(&mut tmp)?; + if !tmp.is_empty() { + return Err(anyhow!("Typedef Ordinal with more data then expected")); + } + Ok(Typedef::Ordinal(de)) + } + _ => Ok(Typedef::Name(String::from_utf8(buf)?)), + } + } +} + +#[derive(Clone, Debug)] +pub enum Struct { + Ref { + ref_type: Box, + taudt_bits: SDACL, + }, + NonRef { + effective_alignment: u16, + taudt_bits: SDACL, + members: Vec, + }, +} +impl Struct { + fn new(value: StructRaw, fields: Option>) -> Result { + match value { + StructRaw::Ref { + ref_type, + taudt_bits, + } => { + if matches!(&fields, Some(f) if !f.is_empty()) { + return Err(anyhow!("fields in a Ref Struct")); + } + Ok(Struct::Ref { + ref_type: Type::new(*ref_type, None).map(Box::new)?, + taudt_bits, + }) + } + StructRaw::NonRef { + effective_alignment, + taudt_bits, + members, + } => { + let members = associate_field_name_and_member(fields, members) + .context("Struct")? + .map(|(n, m)| StructMember::new(n, m)) + .collect::>()?; + Ok(Struct::NonRef { + effective_alignment, + taudt_bits, + members, + }) + } + } + } +} + +#[derive(Clone, Debug)] +enum StructRaw { + Ref { + ref_type: Box, + taudt_bits: SDACL, + }, + NonRef { + effective_alignment: u16, + taudt_bits: SDACL, + members: Vec, + }, +} + +impl StructRaw { + fn read(input: &mut I, header: &TILSectionHeader) -> Result { + let Some(n) = read_dt_de(&mut *input)? else { + // simple reference + let ref_type = TypeRaw::read_ref(&mut *input, header)?; + let taudt_bits = SDACL::read(&mut *input)?; + return Ok(Self::Ref { + ref_type: Box::new(ref_type), + taudt_bits, + }); + }; + + let alpow = n & 7; + let mem_cnt = n >> 3; + let effective_alignment = if alpow == 0 { 0 } else { 1 << (alpow - 1) }; + let taudt_bits = SDACL::read(&mut *input)?; + let members = (0..mem_cnt) + .map(|_| StructMemberRaw::read(&mut *input, header)) + .collect::>()?; + Ok(Self::NonRef { + effective_alignment, + taudt_bits, + members, + }) + } +} + +#[derive(Clone, Debug)] +pub enum Union { + Ref { + ref_type: Box, + taudt_bits: SDACL, + }, + NonRef { + taudt_bits: SDACL, + effective_alignment: u16, + members: Vec<(Option, Type)>, + }, +} +impl Union { + fn new(value: UnionRaw, fields: Option>) -> Result { + match value { + UnionRaw::Ref { + ref_type, + taudt_bits, + } => { + if matches!(fields, Some(f) if !f.is_empty()) { + return Err(anyhow!("fields in a Ref Union")); + } + Ok(Union::Ref { + ref_type: Type::new(*ref_type, None).map(Box::new)?, + taudt_bits, + }) + } + UnionRaw::NonRef { + taudt_bits, + effective_alignment, + members, + } => { + let members = associate_field_name_and_member(fields, members) + .context("Union")? + .map(|(n, m)| Type::new(m, None).map(|m| (n, m))) + .collect::>()?; + Ok(Union::NonRef { + taudt_bits, + effective_alignment, + members, + }) + } + } + } +} + +// TODO struct and union are basically identical, the diff is that member in union don't have SDACL, +// merge both +#[derive(Clone, Debug)] +enum UnionRaw { + Ref { + ref_type: Box, + taudt_bits: SDACL, + }, + NonRef { + taudt_bits: SDACL, + effective_alignment: u16, + members: Vec, + }, +} + +impl UnionRaw { + fn read(input: &mut I, header: &TILSectionHeader) -> Result { + let Some(n) = read_dt_de(&mut *input)? else { + // is ref + let ref_type = TypeRaw::read_ref(&mut *input, header)?; + let taudt_bits = SDACL::read(&mut *input)?; + return Ok(Self::Ref { + ref_type: Box::new(ref_type), + taudt_bits, + }); + }; + let alpow = n & 7; + let mem_cnt = n >> 3; + let effective_alignment = if alpow == 0 { 0 } else { 1 << (alpow - 1) }; + let taudt_bits = SDACL::read(&mut *input)?; + let members = (0..mem_cnt) + .map(|_| TypeRaw::read(&mut *input, header)) + .collect::>()?; + Ok(Self::NonRef { + effective_alignment, + taudt_bits, + members, + }) + } +} + +#[derive(Clone, Debug)] +pub enum Enum { + Ref { + ref_type: Box, + taenum_bits: TypeAttribute, + }, + NonRef { + group_sizes: Vec, + taenum_bits: TypeAttribute, + bte: u8, + members: Vec<(Option, u64)>, + bytesize: u64, + }, +} +impl Enum { + fn new(value: EnumRaw, fields: Option>) -> Result { + match value { + EnumRaw::Ref { + ref_type, + taenum_bits, + } => { + if matches!(&fields, Some(f) if !f.is_empty()) { + return Err(anyhow!("fields in a Ref Enum")); + } + Ok(Enum::Ref { + ref_type: Type::new(*ref_type, None).map(Box::new)?, + taenum_bits, + }) + } + EnumRaw::NonRef { + group_sizes, + taenum_bits, + bte, + members, + bytesize, + } => { + let members = associate_field_name_and_member(fields, members) + .context("Enum")? + .map(|(n, f)| (n, f)) + .collect(); + Ok(Enum::NonRef { + group_sizes, + taenum_bits, + bte, + members, + bytesize, + }) + } + } + } +} + +#[derive(Clone, Debug)] +enum EnumRaw { + Ref { + ref_type: Box, + taenum_bits: TypeAttribute, + }, + NonRef { + group_sizes: Vec, + taenum_bits: TypeAttribute, + bte: u8, + members: Vec, + bytesize: u64, + }, +} + +impl EnumRaw { + fn read(input: &mut I, header: &TILSectionHeader) -> Result { + let Some(n) = read_dt_de(&mut *input)? else { + // is ref + let ref_type = TypeRaw::read_ref(&mut *input, header)?; + let taenum_bits = SDACL::read(&mut *input)?.0; + return Ok(EnumRaw::Ref { + ref_type: Box::new(ref_type), + taenum_bits, + }); + }; + + let taenum_bits = TAH::read(&mut *input)?.0; + let bte = bincode::deserialize_from(&mut *input)?; + let mut cur: u64 = 0; + let emsize = bte & flag::tf_enum::BTE_SIZE_MASK; + let bytesize: u32 = match emsize { + 0 if header.size_e != 0 => header.size_e.into(), + 0 => return Err(anyhow!("BTE emsize is 0 without header")), + 5 | 6 | 7 => return Err(anyhow!("BTE emsize with reserved values")), + _ => 1u32 << (emsize - 1), + }; + + let mask: u64 = if bytesize >= 16 { + // is saturating valid? + //u64::MAX + return Err(anyhow!("Bytes size is too big")); + } else { + u64::MAX >> (u64::BITS - (bytesize * 8)) + }; + + let mut group_sizes = vec![]; + let mut members = vec![]; + for _ in 0..n { + let lo: u64 = read_de(&mut *input)?.into(); + let is_64 = (taenum_bits.0 & 0x0020) != 0; + let step = if is_64 { + let hi: u64 = read_de(&mut *input)?.into(); + (lo | (hi << 32)) & mask + } else { + lo & mask + }; + // TODO: subarrays + // https://www.hex-rays.com/products/ida/support/sdkdoc/group__tf__enum.html#ga9ae7aa54dbc597ec17cbb17555306a02 + if (bte & flag::tf_enum::BTE_BITFIELD) != 0 { + let group_size = read_dt(&mut *input)?; + group_sizes.push(group_size); + } + // TODO check is this is wrapping by default + let next_step = cur.wrapping_add(step); + cur = next_step; + members.push(cur); + } + return Ok(EnumRaw::NonRef { + group_sizes, + taenum_bits, + bte, + members, + bytesize: bytesize.into(), + }); + } +} + +#[derive(Debug, Clone)] +pub struct Bitfield { + pub unsigned: bool, + pub width: u16, + pub nbytes: i32, +} + +impl Bitfield { + fn read(input: &mut I, metadata: TypeMetadata) -> Result { + let nbytes = 1 << (metadata.get_type_flag().0 >> 4); + let dt = read_dt(&mut *input)?; + let width = dt >> 1; + let unsigned = (dt & 1) > 0; + let _tag = TAH::read(&mut *input)?; + Ok(Self { + unsigned, + width, + nbytes, + }) + } +} + +#[derive(Clone, Debug)] +pub struct StructMember { + pub name: Option, + pub member_type: Type, + pub sdacl: SDACL, +} + +impl StructMember { + fn new(name: Option, m: StructMemberRaw) -> Result { + Ok(Self { + name, + member_type: Type::new(m.0, None)?, + sdacl: m.1, + }) + } +} +#[derive(Clone, Debug)] +struct StructMemberRaw(pub TypeRaw, pub SDACL); +impl StructMemberRaw { + fn read(input: &mut I, header: &TILSectionHeader) -> Result { + let member_type = TypeRaw::read(&mut *input, header)?; + let sdacl = SDACL::read(&mut *input)?; + Ok(Self(member_type, sdacl)) + } +} + +#[derive(Clone, Default, Debug)] +pub struct TypeMetadata(pub u8); +impl TypeMetadata { + fn new(value: u8) -> Self { + // TODO check for invalid values + Self(value) + } + fn read(input: I) -> Result { + Ok(Self::new(bincode::deserialize_from(input)?)) + } +} + +// TODO make those inner fields into enums or private +#[derive(Clone, Copy, Debug)] +pub struct BaseTypeFlag(pub u8); +#[derive(Clone, Copy, Debug)] +pub struct FullTypeFlag(pub u8); +#[derive(Clone, Copy, Debug)] +pub struct TypeFlag(pub u8); +#[derive(Clone, Copy, Debug)] +pub struct CallingConventionFlag(pub u8); + +#[derive(Clone, Copy, Debug)] +pub struct TypeAttribute(pub u16); +impl TypeAttribute { + fn read(input: &mut I) -> Result { + let mut val: u16 = 0; + let tah: u8 = bincode::deserialize_from(&mut *input)?; + let tmp = ((tah & 1) | ((tah >> 3) & 6)) + 1; + if tah == 0xFE || tmp == 8 { + if tmp == 8 { + val = tmp as u16; + } + let mut shift = 0; + loop { + let next_byte: u8 = bincode::deserialize_from(&mut *input)?; + if next_byte == 0 { + return Err(anyhow!("Failed to parse TypeAttribute")); + } + val |= ((next_byte & 0x7F) as u16) << shift; + if next_byte & 0x80 == 0 { + break; + } + shift += 7; + } + } + if (val & 0x0010) > 0 { + val = read_dt(&mut *input)?; + for _ in 0..val { + let _string = read_dt_string(&mut *input)?; + let another_de = read_dt(&mut *input)?; + let mut other_string = vec![0; another_de.into()]; + input.read_exact(&mut other_string)?; + } + } + Ok(TypeAttribute(val)) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct TAH(pub TypeAttribute); +impl TAH { + fn read(input: &mut I) -> Result { + let Some(tah) = input.fill_buf()?.get(0).copied() else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on DA" + ))); + }; + if tah == 0xFE { + Ok(Self(TypeAttribute::read(input)?)) + } else { + Ok(Self(TypeAttribute(0))) + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct SDACL(pub TypeAttribute); +impl SDACL { + fn read(input: &mut I) -> Result { + let Some(sdacl) = input.fill_buf()?.get(0).copied() else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on SDACL" + ))); + }; + if ((sdacl & !0x30) ^ 0xC0) <= 0x01 { + Ok(Self(TypeAttribute::read(input)?)) + } else { + Ok(Self(TypeAttribute(0))) + } + } +} + +impl CallingConventionFlag { + fn is_spoiled(&self) -> bool { + self.0 == 0xA0 + } + + fn is_void_arg(&self) -> bool { + self.0 == 0x20 + } + + fn is_special_pe(&self) -> bool { + self.0 == 0xD0 || self.0 == 0xE0 || self.0 == 0xF0 + } +} + +impl TypeMetadata { + pub fn get_base_type_flag(&self) -> BaseTypeFlag { + BaseTypeFlag(self.0 & flag::tf_mask::TYPE_BASE_MASK) + } + + pub fn get_full_type_flag(&self) -> FullTypeFlag { + FullTypeFlag(self.0 & flag::tf_mask::TYPE_FULL_MASK) + } + + pub fn get_type_flag(&self) -> TypeFlag { + TypeFlag(self.0 & flag::tf_mask::TYPE_FLAGS_MASK) + } + + pub fn get_calling_convention(&self) -> CallingConventionFlag { + CallingConventionFlag(self.0 & 0xF0) + } +} + +impl TypeFlag { + fn is_non_based(&self) -> bool { + self.0 == 0x10 + } + + pub fn is_unsigned(&self) -> bool { + self.0 == 0x20 + } + + pub fn is_signed(&self) -> bool { + !self.is_unsigned() + } + + fn is_type_closure(&self) -> bool { + self.0 == flag::tf_ptr::BTMT_CLOSURE + } +} + +impl FullTypeFlag { + fn is_enum(&self) -> bool { + self.0 == flag::tf_shortcuts::BTF_ENUM + } + + fn is_void(&self) -> bool { + self.0 == flag::tf_shortcuts::BTF_VOID + } + + fn is_struct(&self) -> bool { + self.0 == flag::tf_shortcuts::BTF_STRUCT + } + + fn is_union(&self) -> bool { + self.0 == flag::tf_shortcuts::BTF_UNION + } + + fn is_typedef(&self) -> bool { + self.0 == flag::tf_shortcuts::BTF_TYPEDEF + } +} + +impl BaseTypeFlag { + fn is_pointer(&self) -> bool { + self.0 == flag::tf_ptr::BT_PTR + } + + fn is_function(&self) -> bool { + self.0 == flag::tf_func::BT_FUNC + } + + fn is_array(&self) -> bool { + self.0 == flag::tf_array::BT_ARRAY + } + + fn is_bitfield(&self) -> bool { + self.0 == flag::tf_complex::BT_BITFIELD + } + + fn is_typeid_last(&self) -> bool { + self.0 <= flag::tf_last_basic::BT_LAST_BASIC + } + + fn is_reserved(&self) -> bool { + self.0 == flag::BT_RESERVED + } +} + +fn read_dt_bytes(input: &mut I) -> Result> { + let buf_len = read_dt(&mut *input)?; + let mut buf = vec![0; buf_len.into()]; + input.read_exact(&mut buf)?; + Ok(buf) +} + +fn read_dt_string(input: &mut I) -> Result { + let buf = read_dt_bytes(input)?; + Ok(String::from_utf8(buf)?) +} + +/// Reads 1 to 5 bytes +/// Value Range: 0-0xFFFFFFFF +/// Usage: Enum Deltas +fn read_de(input: &mut I) -> std::io::Result { + let mut val: u32 = 0; + for _ in 0..5 { + let mut hi = val << 6; + let mut b = [0; 1]; + input.read_exact(&mut b)?; + let b: u32 = b[0].into(); + let sign = b & 0x80; + if sign == 0 { + let lo = b & 0x3F; + val = lo | hi; + return Ok(val); + } else { + let lo = 2 * hi; + hi = b & 0x7F; + val = lo | hi; + } + } + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Can't find the end of DE", + )) +} + +/// Reads 1 or 2 bytes. +/// Value Range: 0-0xFFFE +/// Usage: 16bit numbers +fn read_dt(input: &mut I) -> std::io::Result { + let mut value = [0u8; 1]; + input.read_exact(&mut value)?; + let value = value[0].into(); + + let value = match value { + 0 => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "DT can't have 0 value", + )) + } + //SEG = 2 + value if value & 0x80 != 0 => { + let mut iter = [0u8; 1]; + input.read_exact(&mut iter)?; + let inter: u16 = iter[0].into(); + value & 0x7F | inter << 7 + } + //SEG = 1 + _ => value, + }; + Ok(value - 1) +} + +fn serialize_dt(value: u16) -> Result> { + if value > 0x7FFE { + return Err(anyhow!("Invalid value for DT")); + } + let lo = value + 1; + let mut hi = value + 1; + let mut result: Vec = Vec::with_capacity(2); + if lo > 127 { + result.push((lo & 0x7F | 0x80) as u8); + hi = (lo >> 7) & 0xFF; + } + result.push(hi as u8); + Ok(result) +} + +/// Reads 1 to 9 bytes. +/// ValueRange: 0-0x7FFFFFFF, 0-0xFFFFFFFF +/// Usage: Arrays +fn read_da(input: &mut I) -> Result<(u8, u8)> { + let mut a = 0; + let mut b = 0; + let mut da = 0; + let mut base = 0; + let mut nelem = 0; + // TODO check no more then 9 bytes are read + loop { + let Some(typ) = input.fill_buf()?.get(0).copied() else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on DA" + ))); + }; + if typ & 0x80 == 0 { + break; + } + input.consume(1); + + da = (da << 7) | typ & 0x7F; + b += 1; + if b >= 4 { + let z: u8 = bincode::deserialize_from(&mut *input)?; + if z != 0 { + base = 0x10 * da | z & 0xF + } + nelem = (z >> 4) & 7; + loop { + let Some(y) = input.fill_buf()?.get(0).copied() else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on DA" + ))); + }; + if (y & 0x80) == 0 { + break; + } + input.consume(1); + nelem = (nelem << 7) | y & 0x7F; + a += 1; + if a >= 4 { + return Ok((nelem, base)); + } + } + } + } + return Ok((nelem, base)); +} + +/// Reads 2 to 7 bytes. +/// Value Range: Nothing or 0-0xFFFF_FFFF +/// Usage: some kind of size +fn read_dt_de(input: &mut I) -> std::io::Result> { + match read_dt(&mut *input)? { + 0 => Ok(None), + 0x7FFE => read_de(&mut *input).map(Some), + n => Ok(Some(n.into())), + } +} + +fn associate_field_name_and_member( + fields: Option>, + members: Vec, +) -> Result, T)>> { + let fields_len: usize = fields.iter().filter(|t| !t.is_empty()).count(); + ensure!(fields_len <= members.len(), "More fields then members"); + // allow to have less fields then members, first fields will have names, others not + Ok(fields + .into_iter() + .flat_map(Vec::into_iter) + .map(Option::Some) + .chain(std::iter::repeat(None)) + .into_iter() + .zip(members)) +}