diff --git a/.gitmodules b/.gitmodules index 31272c5a04..31de52809e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "arch/x86/mbuild"] path = arch/x86/mbuild url = https://github.com/intelxed/mbuild.git +[submodule "rust/examples/pdb-ng/pdb-0.8.0-patched"] + path = rust/examples/pdb-ng/pdb-0.8.0-patched + url = https://github.com/Vector35/pdb-rs.git diff --git a/arch/mips/arch_mips.cpp b/arch/mips/arch_mips.cpp index 77de378f83..6a3d80e4d7 100644 --- a/arch/mips/arch_mips.cpp +++ b/arch/mips/arch_mips.cpp @@ -1829,6 +1829,121 @@ class MipsImportedFunctionRecognizer: public FunctionRecognizer return false; } + + bool RecognizeELFPLTEntries2(BinaryView* data, Function* func, LowLevelILFunction* il) + { + // Look for the following code pattern: + // $t7 = addr_past_got_end + // $t9 = [$t7 - backward_offset_into_got].d + // $t8 = $t7 + (-backward_offset_into_got) + // OPTIONAL: $t7 = addr_past_got_end + // tailcall($t9) + if (il->GetInstructionCount() < 4) + return false; + if (il->GetInstructionCount() > 5) + return false; + + LowLevelILInstruction lui = il->GetInstruction(0); + if (lui.operation != LLIL_SET_REG) + return false; + LowLevelILInstruction luiOperand = lui.GetSourceExpr(); + if (!LowLevelILFunction::IsConstantType(luiOperand.operation)) + return false; + if (luiOperand.size != func->GetArchitecture()->GetAddressSize()) + return false; + uint64_t addrPastGot = luiOperand.GetConstant(); + uint32_t pltReg = lui.GetDestRegister(); + + LowLevelILInstruction ld = il->GetInstruction(1); + if (ld.operation != LLIL_SET_REG) + return false; + uint32_t targetReg = ld.GetDestRegister(); + LowLevelILInstruction ldOperand = ld.GetSourceExpr(); + if (ldOperand.operation != LLIL_LOAD) + return false; + if (ldOperand.size != func->GetArchitecture()->GetAddressSize()) + return false; + LowLevelILInstruction ldAddrOperand = ldOperand.GetSourceExpr(); + uint64_t entry = addrPastGot; + int64_t ldAddrRightOperandValue = 0; + + if ((ldAddrOperand.operation == LLIL_ADD) || (ldAddrOperand.operation == LLIL_SUB)) + { + LowLevelILInstruction ldAddrLeftOperand = ldAddrOperand.GetRawOperandAsExpr(0); + LowLevelILInstruction ldAddrRightOperand = ldAddrOperand.GetRawOperandAsExpr(1); + if (ldAddrLeftOperand.operation != LLIL_REG) + return false; + if (ldAddrLeftOperand.GetSourceRegister() != pltReg) + return false; + if (!LowLevelILFunction::IsConstantType(ldAddrRightOperand.operation)) + return false; + ldAddrRightOperandValue = ldAddrRightOperand.GetConstant(); + if (ldAddrOperand.operation == LLIL_SUB) + ldAddrRightOperandValue = -ldAddrRightOperandValue; + entry = addrPastGot + ldAddrRightOperandValue; + } + else if (ldAddrOperand.operation != LLIL_REG) //If theres no constant + return false; + + Ref sym = data->GetSymbolByAddress(entry); + if (!sym) + return false; + if (sym->GetType() != ImportAddressSymbol) + return false; + + LowLevelILInstruction add = il->GetInstruction(2); + if (add.operation != LLIL_SET_REG) + return false; + LowLevelILInstruction addOperand = add.GetSourceExpr(); + + if (addOperand.operation == LLIL_ADD) + { + LowLevelILInstruction addLeftOperand = addOperand.GetLeftExpr(); + LowLevelILInstruction addRightOperand = addOperand.GetRightExpr(); + if (addLeftOperand.operation != LLIL_REG) + return false; + if (addLeftOperand.GetSourceRegister() != pltReg) + return false; + if (!LowLevelILFunction::IsConstantType(addRightOperand.operation)) + return false; + if (addRightOperand.GetConstant() != ldAddrRightOperandValue) + return false; + } + else if ((addOperand.operation != LLIL_REG) || (addOperand.GetSourceRegister() != pltReg)) //Simple assignment + return false; + + LowLevelILInstruction jump = il->GetInstruction(3); + if (jump.operation == LLIL_SET_REG) + { + if (il->GetInstructionCount() != 5) + return false; + if (jump.GetDestRegister() != pltReg) + return false; + LowLevelILInstruction luiOperand = jump.GetSourceExpr(); + if (!LowLevelILFunction::IsConstantType(luiOperand.operation)) + return false; + if (luiOperand.size != func->GetArchitecture()->GetAddressSize()) + return false; + if (((uint64_t) luiOperand.GetConstant()) != addrPastGot) + return false; + jump = il->GetInstruction(4); + } + + if ((jump.operation != LLIL_JUMP) && (jump.operation != LLIL_TAILCALL)) + return false; + LowLevelILInstruction jumpOperand = (jump.operation == LLIL_JUMP) ? jump.GetDestExpr() : jump.GetDestExpr(); + if (jumpOperand.operation != LLIL_REG) + return false; + if (jumpOperand.GetSourceRegister() != targetReg) + return false; + + Ref funcSym = Symbol::ImportedFunctionFromImportAddressSymbol(sym, func->GetStart()); + data->DefineAutoSymbol(funcSym); + func->ApplyImportedTypes(funcSym); + return true; + } + + public: virtual bool RecognizeLowLevelIL(BinaryView* data, Function* func, LowLevelILFunction* il) override { @@ -1838,6 +1953,9 @@ class MipsImportedFunctionRecognizer: public FunctionRecognizer if (RecognizeELFPLTEntries1(data, func, il)) return true; + if (RecognizeELFPLTEntries2(data, func, il)) + return true; + return false; } }; diff --git a/arch/riscv/src/lib.rs b/arch/riscv/src/lib.rs index 645bb45a2a..91a418f7eb 100644 --- a/arch/riscv/src/lib.rs +++ b/arch/riscv/src/lib.rs @@ -508,23 +508,23 @@ impl architecture::Intrinsic for RiscVIntrinsic { } } - fn inputs(&self) -> Vec> { + fn inputs(&self) -> Vec> { match self.id { Intrinsic::Uret | Intrinsic::Sret | Intrinsic::Mret | Intrinsic::Wfi => { vec![] } Intrinsic::Csrrd => { vec![NameAndType::new( - "csr".into(), + "csr", &Type::int(4, false), max_confidence(), )] } Intrinsic::Csrrw | Intrinsic::Csrwr | Intrinsic::Csrrs | Intrinsic::Csrrc => { vec![ - NameAndType::new("csr".into(), &Type::int(4, false), max_confidence()), + NameAndType::new("csr", &Type::int(4, false), max_confidence()), NameAndType::new( - "value".into(), + "value", &Type::int(::Int::width(), false), min_confidence(), ), @@ -540,8 +540,8 @@ impl architecture::Intrinsic for RiscVIntrinsic { | Intrinsic::Fmin(size) | Intrinsic::Fmax(size) => { vec![ - NameAndType::new("".into(), &Type::float(size as usize), max_confidence()), - NameAndType::new("".into(), &Type::float(size as usize), max_confidence()), + NameAndType::new("", &Type::float(size as usize), max_confidence()), + NameAndType::new("", &Type::float(size as usize), max_confidence()), ] } Intrinsic::Fsqrt(size, _) @@ -550,28 +550,28 @@ impl architecture::Intrinsic for RiscVIntrinsic { | Intrinsic::FcvtFToI(size, _, _) | Intrinsic::FcvtFToU(size, _, _) => { vec![NameAndType::new( - "".into(), + "", &Type::float(size as usize), max_confidence(), )] } Intrinsic::FcvtIToF(size, _, _) => { vec![NameAndType::new( - "".into(), + "", &Type::int(size as usize, true), max_confidence(), )] } Intrinsic::FcvtUToF(size, _, _) => { vec![NameAndType::new( - "".into(), + "", &Type::int(size as usize, false), max_confidence(), )] } Intrinsic::Fence => { vec![NameAndType::new( - "".into(), + "", &Type::int(4, false), min_confidence(), )] @@ -2431,10 +2431,9 @@ impl RelocationHandler .iter() .find(|r| r.info().native_type == Self::R_RISCV_PCREL_HI20) { - Some(target) => target, + Some(target) => target.target().wrapping_add(target.info().addend as u64), None => return false, }; - let target = target.target().wrapping_add(target.info().addend as u64); let offset = target.wrapping_sub(reloc.target()) as u32; let low_offset = offset & 0xfff; diff --git a/basedetection.cpp b/basedetection.cpp new file mode 100644 index 0000000000..5cc7c39fa8 --- /dev/null +++ b/basedetection.cpp @@ -0,0 +1,92 @@ +// Copyright (c) 2015-2024 Vector 35 Inc +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +#include "binaryninjaapi.h" + +using namespace BinaryNinja; + + +BaseAddressDetection::BaseAddressDetection(Ref bv) +{ + m_object = BNCreateBaseAddressDetection(bv->GetObject()); +} + + +BaseAddressDetection::~BaseAddressDetection() +{ + BNFreeBaseAddressDetection(m_object); +} + + +bool BaseAddressDetection::DetectBaseAddress(BaseAddressDetectionSettings& settings) +{ + BNBaseAddressDetectionSettings bnSettings = { + settings.Architecture.c_str(), + settings.Analysis.c_str(), + settings.MinStrlen, + settings.Alignment, + settings.LowerBoundary, + settings.UpperBoundary, + settings.POIAnalysis, + settings.MaxPointersPerCluster, + }; + + return BNDetectBaseAddress(m_object, bnSettings); +} + + +void BaseAddressDetection::Abort() +{ + return BNAbortBaseAddressDetection(m_object); +} + + +bool BaseAddressDetection::IsAborted() +{ + return BNIsBaseAddressDetectionAborted(m_object); +} + + +std::set> BaseAddressDetection::GetScores(BNBaseAddressDetectionConfidence* confidence, + uint64_t *lastTestedBaseAddress) +{ + std::set> result; + BNBaseAddressDetectionScore scores[10]; + size_t numCandidates = BNGetBaseAddressDetectionScores(m_object, scores, 10, confidence, lastTestedBaseAddress); + for (size_t i = 0; i < numCandidates; i++) + result.insert(std::make_pair(scores[i].Score, scores[i].BaseAddress)); + return result; +} + + +std::vector BaseAddressDetection::GetReasonsForBaseAddress(uint64_t baseAddress) +{ + std::vector result; + size_t count; + BNBaseAddressDetectionReason *reasons = BNGetBaseAddressDetectionReasons(m_object, baseAddress, &count); + if (!reasons) + return result; + + for (size_t i = 0; i < count; i++) + result.push_back(reasons[i]); + + BNFreeBaseAddressDetectionReasons(reasons); + return result; +} diff --git a/binaryninjaapi.h b/binaryninjaapi.h index 6bd63a4b0f..803ebf7699 100644 --- a/binaryninjaapi.h +++ b/binaryninjaapi.h @@ -17378,6 +17378,62 @@ namespace BinaryNinja { const std::function& add); void Process(); }; + + struct BaseAddressDetectionSettings + { + std::string Architecture; + std::string Analysis; + uint32_t MinStrlen; + uint32_t Alignment; + uint64_t LowerBoundary; + uint64_t UpperBoundary; + BNBaseAddressDetectionPOISetting POIAnalysis; + uint32_t MaxPointersPerCluster; + }; + + /*! + \ingroup baseaddressdetection + */ + class BaseAddressDetection + { + BNBaseAddressDetection* m_object; + + public: + BaseAddressDetection(Ref view); + ~BaseAddressDetection(); + + /*! Analyze program, identify pointers and points-of-interest, and detect candidate base addresses + + \param settings Base address detection settings + \return true on success, false otherwise + */ + bool DetectBaseAddress(BaseAddressDetectionSettings& settings); + + /*! Get the top 10 candidate base addresses and thier scores + + \param confidence Confidence level that indicates the likelihood the top base address candidate is correct + \param lastTestedBaseAddress Last base address tested before analysis was aborted or completed + \return Set of pairs containing candidate base addresses and their scores + */ + std::set> GetScores(BNBaseAddressDetectionConfidence* confidence, uint64_t *lastTestedBaseAddress); + + /*! Get a vector of BNBaseAddressDetectionReasons containing information that indicates why a base address was reported as a candidate + + \param baseAddress Base address to query reasons for + \return Vector of reason structures containing information about why a base address was reported as a candidate + */ + std::vector GetReasonsForBaseAddress(uint64_t baseAddress); + + /*! Abort base address detection + */ + void Abort(); + + /*! Determine if base address detection is aborted + + \return true if aborted by user, false otherwise + */ + bool IsAborted(); + }; } // namespace BinaryNinja diff --git a/binaryninjacore.h b/binaryninjacore.h index 68828ce8ca..d0f7a5e2c0 100644 --- a/binaryninjacore.h +++ b/binaryninjacore.h @@ -37,14 +37,14 @@ // Current ABI version for linking to the core. This is incremented any time // there are changes to the API that affect linking, including new functions, // new types, or modifications to existing functions or types. -#define BN_CURRENT_CORE_ABI_VERSION 59 +#define BN_CURRENT_CORE_ABI_VERSION 60 // Minimum ABI version that is supported for loading of plugins. Plugins that // are linked to an ABI version less than this will not be able to load and // will require rebuilding. The minimum version is increased when there are // incompatible changes that break binary compatibility, such as changes to // existing types or functions. -#define BN_MINIMUM_CORE_ABI_VERSION 59 +#define BN_MINIMUM_CORE_ABI_VERSION 60 #ifdef __GNUC__ #ifdef BINARYNINJACORE_LIBRARY @@ -279,6 +279,7 @@ extern "C" typedef struct BNExternalLibrary BNExternalLibrary; typedef struct BNExternalLocation BNExternalLocation; typedef struct BNProjectFolder BNProjectFolder; + typedef struct BNBaseAddressDetection BNBaseAddressDetection; //! Console log levels typedef enum BNLogLevel @@ -3157,6 +3158,54 @@ extern "C" ConflictSyncStatus } BNSyncStatus; + typedef enum BNBaseAddressDetectionPOISetting + { + POIAnalysisStringsOnly, + POIAnalysisFunctionsOnly, + POIAnalysisAll, + } BNBaseAddressDetectionPOISetting; + + typedef enum BNBaseAddressDetectionPOIType + { + POIString, + POIFunction, + POIDataVariable, + POIFileStart, + POIFileEnd, + } BNBaseAddressDetectionPOIType; + + typedef enum BNBaseAddressDetectionConfidence + { + NoConfidence, + LowConfidence, + HighConfidence, + } BNBaseAddressDetectionConfidence; + + typedef struct BNBaseAddressDetectionSettings + { + const char* Architecture; + const char* Analysis; + uint32_t MinStrlen; + uint32_t Alignment; + uint64_t LowerBoundary; + uint64_t UpperBoundary; + BNBaseAddressDetectionPOISetting POIAnalysis; + uint32_t MaxPointersPerCluster; + } BNBaseAddressDetectionSettings; + + typedef struct BNBaseAddressDetectionReason + { + uint64_t Pointer; + uint64_t POIOffset; + BNBaseAddressDetectionPOIType POIType; + } BNBaseAddressDetectionReason; + + typedef struct BNBaseAddressDetectionScore + { + size_t Score; + uint64_t BaseAddress; + } BNBaseAddressDetectionScore; + BINARYNINJACOREAPI char* BNAllocString(const char* contents); BINARYNINJACOREAPI void BNFreeString(char* str); BINARYNINJACOREAPI char** BNAllocStringList(const char** contents, size_t size); @@ -3869,6 +3918,7 @@ extern "C" BINARYNINJACOREAPI bool BNReadBE16(BNBinaryReader* stream, uint16_t* result); BINARYNINJACOREAPI bool BNReadBE32(BNBinaryReader* stream, uint32_t* result); BINARYNINJACOREAPI bool BNReadBE64(BNBinaryReader* stream, uint64_t* result); + BINARYNINJACOREAPI bool BNReadPointer(BNBinaryView* view, BNBinaryReader* stream, uint64_t* result); BINARYNINJACOREAPI uint64_t BNGetReaderPosition(BNBinaryReader* stream); BINARYNINJACOREAPI void BNSeekBinaryReader(BNBinaryReader* stream, uint64_t offset); @@ -6988,6 +7038,17 @@ extern "C" BINARYNINJACOREAPI bool BNBinaryViewPullTypeArchiveTypes(BNBinaryView* view, const char* archiveId, const char* const* archiveTypeIds, size_t archiveTypeIdCount, char*** updatedArchiveTypeIds, char*** updatedAnalysisTypeIds, size_t* updatedTypeCount); BINARYNINJACOREAPI bool BNBinaryViewPushTypeArchiveTypes(BNBinaryView* view, const char* archiveId, const char* const* typeIds, size_t typeIdCount, char*** updatedAnalysisTypeIds, char*** updatedArchiveTypeIds, size_t* updatedTypeCount); + // Base Address Detection + BINARYNINJACOREAPI BNBaseAddressDetection* BNCreateBaseAddressDetection(BNBinaryView *view); + BINARYNINJACOREAPI bool BNDetectBaseAddress(BNBaseAddressDetection* bad, BNBaseAddressDetectionSettings& settings); + BINARYNINJACOREAPI size_t BNGetBaseAddressDetectionScores(BNBaseAddressDetection* bad, BNBaseAddressDetectionScore* scores, size_t count, + BNBaseAddressDetectionConfidence* confidence, uint64_t* lastTestedBaseAddress); + BINARYNINJACOREAPI BNBaseAddressDetectionReason* BNGetBaseAddressDetectionReasons(BNBaseAddressDetection* bad, + uint64_t baseAddress, size_t* count); + BINARYNINJACOREAPI void BNFreeBaseAddressDetectionReasons(BNBaseAddressDetectionReason* reasons); + BINARYNINJACOREAPI void BNAbortBaseAddressDetection(BNBaseAddressDetection* bad); + BINARYNINJACOREAPI bool BNIsBaseAddressDetectionAborted(BNBaseAddressDetection* bad); + BINARYNINJACOREAPI void BNFreeBaseAddressDetection(BNBaseAddressDetection* bad); #ifdef __cplusplus } #endif diff --git a/binaryreader.cpp b/binaryreader.cpp index c49637f982..4b69831076 100644 --- a/binaryreader.cpp +++ b/binaryreader.cpp @@ -314,14 +314,7 @@ bool BinaryReader::TryRead64(uint64_t& result) bool BinaryReader::TryReadPointer(uint64_t& result) { - size_t addressSize = m_view->GetAddressSize(); - if (addressSize > 8 || addressSize == 0) - return false; - - if (GetEndianness() == BigEndian) - return TryReadBEPointer(result); - - return TryReadLEPointer(result); + return BNReadPointer(m_view->GetObject(), m_stream, &result); } diff --git a/docs/dev/concepts.md b/docs/dev/concepts.md index 25dd5af0a5..454f0fbfd3 100644 --- a/docs/dev/concepts.md +++ b/docs/dev/concepts.md @@ -1,5 +1,26 @@ # Important Concepts +## Binary Views + +The highest level analysis object in Binary Ninja is a [BinaryView](https://api.binary.ninja/binaryninja.binaryview-module.html#binaryninja.binaryview.BinaryView) (or `bv` for short). You can think of a `bv` as the Binary Ninja equivalent of what an operating system does when loading an executable binary. These `bv`'s are the top-level analysis object representing how a file is loaded into memory as well as debug information, tables of function pointers, and many other structures. + +When you are interacting in the UI with an executable file, you can access `bv` in the python scripting console to see the representation of the current file's BinaryView: + +```python +>>> bv + +>>> len(bv.functions) +140 +``` + +???+ Info "Tip" + Note the use of `bv` here as a shortcut to the currently open BinaryView. For other "magic" variables, see the [user guide](../guide/index.md#magic-console-variables) + +If you want to start writing a plugin, most top-level methods will exist off of the BinaryView. Conceptually, you can think about the organization as a hierarchy starting with a BinaryView, then functions, then basic blocks, then instructions. There are of course lots of other ways to access parts of the binary but this is the most common organization. Check out the tab completion in the scripting console for `bv.get` for example (a common prefix for many APIs): + +![Tab Completion ><](../img/getcompletion.png "Tab Completion") + +Some BinaryViews have parent views. The view used for decompilation includes memory mappings through segments and sections for example, but the "parent_view" property is a view of the original file on-disk. ## REPL versus Scripts @@ -46,7 +67,13 @@ t = [ bv.get_symbol_by_raw_name('__builtin_strncpy').address ] -list(current_hlil.traverse(find_strcpy, t)) +# Find the first call to a builtin: +for result in current_hlil.traverse(find_strcpy, t): + # Any logic should live here, not inside the callable which is just for + # matching. Because this is a generator, it can fail fast when used for + # search! + print(result) + break def get_memcpy_data(i, t) -> bytes: @@ -56,7 +83,8 @@ def get_memcpy_data(i, t) -> bytes: # Iterate through all instructions in the HLIL t = bv.get_symbol_by_raw_name('__builtin_memcpy').address -list(current_hlil.traverse(get_memcpy_data, t)) +for i in current_hlil.traverse(get_memcpy_data, t): + print(f"Found some memcpy data: {repr(i)}") # find all the calls to __builtin_strcpy and get their values @@ -69,13 +97,20 @@ t = [ bv.get_symbol_by_raw_name('__builtin_strcpy').address, bv.get_symbol_by_raw_name('__builtin_strncpy').address ] -list(current_hlil.traverse(find_strcpy, t)) + +for i in current_hlil.traverse(find_strcpy, t): + print(i) # collect the number of parameters for each function call def param_counter(i) -> int: match i: case HighLevelILCall(): return len(i.params) + +# Note that the results are a generator and usually anything that is found +# should have processing done outside the callback, but you can always +# convert it to a list like this: + list(current_hlil.traverse(param_counter)) @@ -84,6 +119,7 @@ def collect_call_target(i) -> None: match i: case HighLevelILCall(dest=HighLevelILConstPtr(constant=c)): return c + set([hex(a) for a in current_hlil.traverse(collect_call_target)]) @@ -92,6 +128,7 @@ def collect_this_vars(i) -> Variable: match i: case HighLevelILVar(var=v) if v.name == 'this': return v + list(v for v in current_hlil.traverse(collect_this_vars)) ``` diff --git a/docs/guide/index.md b/docs/guide/index.md index db20be62b6..e4cff8e9ff 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -132,7 +132,7 @@ When you create a new file, you're given the [hex view](index.md#hex-view) of an To paste, right click anywhere in the view, select "Paste From," and choose whichever option matches the data you copied. For example, the string `\x01\x02\x03\x04` can be pasted as an Escape String, while `01020304` is Raw Hex. -From here, you can save the contents of your new binary to disk and reopen it for auto-analysis. Of course, you could also switch out of hex view and start creating functions yourself. +From here, you can save the contents of your new binary to disk and reopen it for auto-analysis. Of course, you could also switch out of hex view into linear view and start creating functions directly. ## New Tab @@ -210,6 +210,9 @@ There's also [many](#using-the-keyboard) keyboard-based navigation options. Switching views happens multiple ways. In some instances, it is automatic, such as clicking a data reference from graph view. This will navigate to linear view as data is not shown in the graph view. While navigating, you can use the [view hotkeys](#default-hotkeys) to switch to a specific view at the same location as the current selection. Next you can use the [command palette](#command-palette). Additionally, the view menu in the header at the top of each pane can be used to change views without navigating to any given location. Finally, you can also use the `View` application menu. +???+ Tip "Tip" + Any loaded BinaryView will show up in the upper-left of the main pane. You can switch between (for example), `ELF` and `Raw` to switch between multiple loaded [BinaryViews](../dev/concepts.md#Binary-Views). + ## The Sidebar ![the sidebar ><](../img/sidebars.png "The Sidebar"){ width = "800" } @@ -410,6 +413,7 @@ The normal find dialog also exists as a sidebar panel that allows persistent, ta The search types are available from a drop-down next to the text input field and include: + - Advanced Binary Search: A new search type using the [bv.search](https://dev-api.binary.ninja/binaryninja.binaryview-module.html#binaryninja.binaryview.BinaryView.search) syntax (supporting regular expressions and wildcard hex strings) - Escaped: Escaped strings such as `OneString\x09\Tabsx09Another` - Hex: All values much be valid hex characters such as `ebfffc390` and the bytes will only be searched for in this particular order - Raw: A simple string search that matches the exact string as specified @@ -573,9 +577,10 @@ The hexadecimal view is useful for viewing raw binary files that may or may not The hex view is particularly good for transforming data in various ways via the `Copy as`, `Transform`, and `Paste from` menus. Note that like any other edits, `Transform` menu options will transform the data in-place, but unlike other means of editing the binary, the transformation dialog will work even when the lock button is toggled on (🔒). -???+ Tip "Tip" - Any changes made in the Hex view will take effect immediately in any other views open into the same file (new views can be created via the `Split to new tab`, or `Split to new window` options under `View`, or via [splitting panes](#tiling-panes)). This can, however, cause large amounts of re-analysis so be warned before making large edits or transformations in a large binary file. +If you're using the hex view for a Binary View like ELF, Mach-O or PE, you probably want to make sure you're also in the `Raw` view if you want to see the file as it exists on disk in hex view. +### Live Preview + Any changes made in the Hex view will take effect immediately in any other views open into the same file (new views can be created via the `Split to new tab`, or `Split to new window` options under `View`, or via [splitting panes](#tiling-panes)). This can, however, cause large amounts of re-analysis so be warned before making large edits or transformations in a large binary file. ## Linear View diff --git a/docs/img/find.png b/docs/img/find.png index ab081ee7a9..6a10a0140d 100644 Binary files a/docs/img/find.png and b/docs/img/find.png differ diff --git a/docs/img/getcompletion.png b/docs/img/getcompletion.png new file mode 100644 index 0000000000..15aaf6a8a7 Binary files /dev/null and b/docs/img/getcompletion.png differ diff --git a/docs/img/hlil-braces.png b/docs/img/hlil-braces.png index 551c4e9750..26d9404ef5 100644 Binary files a/docs/img/hlil-braces.png and b/docs/img/hlil-braces.png differ diff --git a/docs/img/logs.png b/docs/img/logs.png index be89c38489..bfcf938944 100644 Binary files a/docs/img/logs.png and b/docs/img/logs.png differ diff --git a/docs/img/sidebaricons.png b/docs/img/sidebaricons.png index d95dc20836..05de75dd3a 100644 Binary files a/docs/img/sidebaricons.png and b/docs/img/sidebaricons.png differ diff --git a/docs/img/stack.png b/docs/img/stack.png index 0efad194cf..06db97c803 100644 Binary files a/docs/img/stack.png and b/docs/img/stack.png differ diff --git a/docs/img/strings.png b/docs/img/strings.png index 5f94c6d1b0..2ad7a459df 100644 Binary files a/docs/img/strings.png and b/docs/img/strings.png differ diff --git a/docs/img/themes-console.png b/docs/img/themes-console.png index 7a41003410..a8de28f2bc 100644 Binary files a/docs/img/themes-console.png and b/docs/img/themes-console.png differ diff --git a/docs/img/themes-graph.png b/docs/img/themes-graph.png index df74c012c0..b118f516fe 100644 Binary files a/docs/img/themes-graph.png and b/docs/img/themes-graph.png differ diff --git a/docs/img/themes-hex.png b/docs/img/themes-hex.png index cad30b42b1..01551b9a72 100644 Binary files a/docs/img/themes-hex.png and b/docs/img/themes-hex.png differ diff --git a/docs/img/themes-hexview.png b/docs/img/themes-hexview.png index 495eb93572..8c33a1c31a 100644 Binary files a/docs/img/themes-hexview.png and b/docs/img/themes-hexview.png differ diff --git a/docs/img/themes-highlighting.png b/docs/img/themes-highlighting.png index 097bfb28e0..5e66536689 100644 Binary files a/docs/img/themes-highlighting.png and b/docs/img/themes-highlighting.png differ diff --git a/docs/img/themes-linear.png b/docs/img/themes-linear.png index 392bab47b2..1ecfd3fd15 100644 Binary files a/docs/img/themes-linear.png and b/docs/img/themes-linear.png differ diff --git a/docs/img/themes-minigraph.png b/docs/img/themes-minigraph.png index b3dc131813..f58c911b14 100644 Binary files a/docs/img/themes-minigraph.png and b/docs/img/themes-minigraph.png differ diff --git a/docs/img/themes-panes.png b/docs/img/themes-panes.png index 0f88c8bec7..3c6170564e 100644 Binary files a/docs/img/themes-panes.png and b/docs/img/themes-panes.png differ diff --git a/docs/img/themes-statusbar.png b/docs/img/themes-statusbar.png index f35970f81a..71ec911f78 100644 Binary files a/docs/img/themes-statusbar.png and b/docs/img/themes-statusbar.png differ diff --git a/docs/img/themes-tokens.png b/docs/img/themes-tokens.png index ed6cc1e2dc..96b8926bfe 100644 Binary files a/docs/img/themes-tokens.png and b/docs/img/themes-tokens.png differ diff --git a/docs/img/variables.png b/docs/img/variables.png index 441de572a9..99fc2d50f5 100644 Binary files a/docs/img/variables.png and b/docs/img/variables.png differ diff --git a/examples/triage/baseaddress.cpp b/examples/triage/baseaddress.cpp new file mode 100644 index 0000000000..67d8301cc3 --- /dev/null +++ b/examples/triage/baseaddress.cpp @@ -0,0 +1,429 @@ +#include "baseaddress.h" + +using namespace std; + + +BNBaseAddressDetectionPOISetting BaseAddressDetectionPOISettingFromString(const string& setting) +{ + if (setting == "Strings only") + return POIAnalysisStringsOnly; + if (setting == "Functions only") + return POIAnalysisFunctionsOnly; + return POIAnalysisAll; // Default to All +} + + +string BaseAddressDetectionConfidenceToString(BNBaseAddressDetectionConfidence level) +{ + switch (level) + { + case NoConfidence: + return "Unassigned"; + case HighConfidence: + return "High"; + case LowConfidence: + return "Low"; + default: + return "Unknown"; + } +} + + +void BaseAddressDetectionThread::run() +{ + BaseAddressDetectionQtResults results; + uint64_t value; + string errorStr; + + if (!BinaryNinja::BinaryView::ParseExpression( + m_view, m_inputs->AlignmentLineEdit->text().toStdString(), value, 0, errorStr)) + { + results.Status = "Invalid alignment value (" + errorStr + ")"; + emit ResultReady(results); + return; + } + uint32_t alignment = value; + + if (!BinaryNinja::BinaryView::ParseExpression( + m_view, m_inputs->StrlenLineEdit->text().toStdString(), value, 0, errorStr)) + { + results.Status = "Invalid minimum string length (" + errorStr + ")"; + emit ResultReady(results); + return; + } + uint32_t minStrlen = value; + + uint64_t upperBoundary; + if (!BinaryNinja::BinaryView::ParseExpression( + m_view, m_inputs->UpperBoundary->text().toStdString(), upperBoundary, 0, errorStr)) + { + results.Status = "Invalid upper boundary address (" + errorStr + ")"; + emit ResultReady(results); + return; + } + + uint64_t lowerBoundary; + if (!BinaryNinja::BinaryView::ParseExpression( + m_view, m_inputs->LowerBoundary->text().toStdString(), lowerBoundary, 0, errorStr)) + { + results.Status = "Invalid lower boundary address (" + errorStr + ")"; + emit ResultReady(results); + return; + } + + if (lowerBoundary >= upperBoundary) + { + results.Status = "Upper boundary address is less than lower"; + emit ResultReady(results); + return; + } + + if (!BinaryNinja::BinaryView::ParseExpression( + m_view, m_inputs->MaxPointersPerCluster->text().toStdString(), value, 0, errorStr)) + { + results.Status = "Invalid max pointers (" + errorStr + ")"; + emit ResultReady(results); + return; + } + + uint32_t maxPointersPerCluster = value; + if (maxPointersPerCluster < 2) + { + results.Status = "Invalid max pointers (must be >= 2)"; + emit ResultReady(results); + return; + } + + BNBaseAddressDetectionPOISetting poiSetting = BaseAddressDetectionPOISettingFromString( + m_inputs->POIBox->currentText().toStdString()); + BinaryNinja::BaseAddressDetectionSettings settings = { + m_inputs->ArchitectureBox->currentText().toStdString(), + m_inputs->AnalysisBox->currentText().toStdString(), + minStrlen, + alignment, + lowerBoundary, + upperBoundary, + poiSetting, + maxPointersPerCluster, + }; + + if (!m_baseDetection->DetectBaseAddress(settings)) + emit ResultReady(results); + + auto scores = m_baseDetection->GetScores(&results.Confidence, &results.LastTestedBaseAddress); + results.Scores = scores; + for (const auto& score : scores) + { + auto reasons = m_baseDetection->GetReasonsForBaseAddress(score.second); + results.Reasons[score.second] = reasons; + } + + emit ResultReady(results); +} + + +void BaseAddressDetectionWidget::HideResultsWidgets(bool hide) +{ + if (hide) + { + m_preferredBaseLabel->setHidden(true); + m_preferredBase->setHidden(true); + m_confidenceLabel->setHidden(true); + m_confidence->setHidden(true); + m_resultsTableWidget->setHidden(true); + m_reloadBase->setHidden(true); + m_rebaseButton->setHidden(true); + } + else + { + m_preferredBaseLabel->setHidden(false); + m_preferredBase->setHidden(false); + m_confidenceLabel->setHidden(false); + m_confidence->setHidden(false); + m_resultsTableWidget->setHidden(false); + m_reloadBase->setHidden(false); + m_rebaseButton->setHidden(false); + } +} + + +void BaseAddressDetectionWidget::GetClickedBaseAddress(const QModelIndex& index) +{ + if (index.isValid()) + { + auto baseAddress = m_resultsTableWidget->item(index.row(), 0)->text(); + m_reloadBase->setText(baseAddress); + } +} + + +void BaseAddressDetectionWidget::HandleResults(const BaseAddressDetectionQtResults& results) +{ + if (!results.Status.empty()) + m_status->setText(QString::fromStdString(results.Status)); + + if (results.Status.empty() && m_worker->IsAborted()) + m_status->setText(QString("Aborted by user (Last Base: 0x%1)").arg(results.LastTestedBaseAddress, 0, 16)); + + if (results.Scores.empty()) + { + if (!m_worker->IsAborted() && results.Status.empty()) + m_status->setText("Completed with no results"); + m_preferredBase->setText("Not available"); + m_confidence->setText("Not available"); + } + else + { + HideResultsWidgets(false); + if (results.Status.empty() && !m_worker->IsAborted()) + m_status->setText("Completed with results"); + m_preferredBase->setText(QString("0x%1").arg(results.Scores.rbegin()->second, 0, 16)); + m_confidence->setText(QString("%1 (Score: %2)").arg( + QString::fromStdString(BaseAddressDetectionConfidenceToString(results.Confidence)), + QString::number(results.Scores.rbegin()->first))); + m_reloadBase->setText(QString("0x%1").arg(results.Scores.rbegin()->second, 0, 16)); + } + + m_resultsTableWidget->clearContents(); + m_resultsTableWidget->setRowCount(results.Scores.size()); + size_t row = 0; + for (auto rit = results.Scores.rbegin(); rit != results.Scores.rend(); rit++) + { + auto [score, baseaddr] = *rit; + size_t strHits = 0; + size_t funcHits = 0; + size_t dataHits = 0; + for (const auto& reason : results.Reasons.at(baseaddr)) + { + switch (reason.POIType) + { + case POIString: + strHits++; + break; + case POIFunction: + funcHits++; + break; + case POIDataVariable: + dataHits++; + break; + default: + break; + } + } + + m_resultsTableWidget->setItem(row, 0, new QTableWidgetItem(QString("0x%1").arg(baseaddr, 0, 16))); + m_resultsTableWidget->setItem(row, 1, new QTableWidgetItem(QString::number(score))); + m_resultsTableWidget->setItem(row, 2, new QTableWidgetItem(QString::number(strHits))); + m_resultsTableWidget->setItem(row, 3, new QTableWidgetItem(QString::number(funcHits))); + m_resultsTableWidget->setItem(row, 4, new QTableWidgetItem(QString::number(dataHits))); + row++; + } + + m_abortButton->setHidden(true); + m_startButton->setHidden(false); + m_startButton->setEnabled(true); +} + + +void BaseAddressDetectionWidget::DetectBaseAddress() +{ + HideResultsWidgets(true); + m_status->setText("Running..."); + m_resultsTableWidget->clearContents(); + m_preferredBase->setText("Not available"); + m_confidence->setText("Not available"); + m_startButton->setHidden(true); + m_worker = new BaseAddressDetectionThread(&m_inputs, m_view); + connect(m_worker, &BaseAddressDetectionThread::ResultReady, this, &BaseAddressDetectionWidget::HandleResults); + connect(m_worker, &BaseAddressDetectionThread::finished, m_worker, &QObject::deleteLater); + m_worker->start(); + m_abortButton->setHidden(false); +} + + +void BaseAddressDetectionWidget::Abort() +{ + m_worker->Abort(); + m_abortButton->setHidden(true); + m_startButton->setHidden(false); + m_startButton->setEnabled(false); +} + + +void BaseAddressDetectionWidget::RebaseWithFullAnalysis() +{ + auto mappedView = m_view->GetFile()->GetViewOfType("Mapped"); + if (!mappedView) + return; + + auto fileMetadata = m_view->GetFile(); + if (!fileMetadata) + return; + + uint64_t address; + string errorStr; + if (!BinaryNinja::BinaryView::ParseExpression(m_view, m_reloadBase->text().toStdString(), address, 0, errorStr)) + { + m_status->setText(QString("Invalid rebase address (%1)").arg(QString::fromStdString(errorStr))); + return; + } + + if (!fileMetadata->Rebase(mappedView, address)) + return; + + BinaryNinja::Settings::Instance()->Set("analysis.mode", "full", mappedView); + mappedView->Reanalyze(); + + auto frame = ViewFrame::viewFrameForWidget(this); + if (!frame) + return; + + auto fileContext = frame->getFileContext(); + if (!fileContext) + return; + + auto uiContext = UIContext::contextForWidget(this); + if (!uiContext) + return; + + uiContext->recreateViewFrames(fileContext); + fileContext->refreshDataViewCache(); + + auto newFrame = ViewFrame::viewFrameForWidget(this); + if (!newFrame) + return; + + auto view = newFrame->getCurrentViewInterface(); + if (!view) + return; + + auto data = view->getData(); + if (!data) + return; + + if (!view->navigate(address)) + data->Navigate("Linear:Mapped", address); +} + + +void BaseAddressDetectionWidget::CreateAdvancedSettingsGroup() +{ + int32_t row = 0; + int32_t column = 0; + auto grid = new QGridLayout(); + + grid->addWidget(new QLabel("Min. String Length:"), row, column, Qt::AlignLeft); + m_inputs.StrlenLineEdit = new QLineEdit("0n10"); + grid->addWidget(m_inputs.StrlenLineEdit, row, column + 1, Qt::AlignLeft); + + grid->addWidget(new QLabel("Alignment:"), row, column + 2, Qt::AlignLeft); + m_inputs.AlignmentLineEdit = new QLineEdit("0n1024"); + grid->addWidget(m_inputs.AlignmentLineEdit, row++, column + 3, Qt::AlignLeft); + + grid->addWidget(new QLabel("Lower Boundary:"), row, column, Qt::AlignLeft); + m_inputs.LowerBoundary = new QLineEdit("0x0"); + grid->addWidget(m_inputs.LowerBoundary, row, column + 1, Qt::AlignLeft); + + grid->addWidget(new QLabel("Upper Boundary:"), row, column + 2, Qt::AlignLeft); + m_inputs.UpperBoundary = new QLineEdit("0xffffffffffffffff"); + grid->addWidget(m_inputs.UpperBoundary, row++, column + 3, Qt::AlignLeft); + + grid->addWidget(new QLabel("Points Of Interest:"), row, column, Qt::AlignLeft); + auto poiList = QStringList() << "All" << "Strings only" << "Functions only"; + m_inputs.POIBox = new QComboBox(this); + m_inputs.POIBox->addItems(poiList); + grid->addWidget(m_inputs.POIBox, row, column + 1, Qt::AlignLeft); + + grid->addWidget(new QLabel("Max Pointers:"), row, column + 2, Qt::AlignLeft); + m_inputs.MaxPointersPerCluster = new QLineEdit("0n128"); + grid->addWidget(m_inputs.MaxPointersPerCluster, row++, column + 3, Qt::AlignLeft); + + m_advancedSettingsGroup = new ExpandableGroup(grid); + m_advancedSettingsGroup->setTitle("Advanced Settings"); +} + + +BaseAddressDetectionWidget::BaseAddressDetectionWidget(QWidget* parent, + BinaryNinja::Ref bv) : QWidget(parent) +{ + m_view = bv->GetParentView() ? bv->GetParentView() : bv; + m_layout = new QGridLayout(); + int32_t row = 0; + int32_t column = 0; + + m_layout->addWidget(new QLabel("Architecture:"), row, column, Qt::AlignLeft); + m_inputs.ArchitectureBox = new QComboBox(this); + auto architectures = BinaryNinja::Architecture::GetList(); + auto archItemList = QStringList(); + archItemList << "auto detect"; + for (const auto& arch : architectures) + archItemList << QString::fromStdString(arch->GetName()); + m_inputs.ArchitectureBox->addItems(archItemList); + m_layout->addWidget(m_inputs.ArchitectureBox, row++, column + 1, Qt::AlignLeft); + + m_layout->addWidget(new QLabel("Analysis Level:"), row, column, Qt::AlignLeft); + m_inputs.AnalysisBox = new QComboBox(this); + auto analysisItemList = QStringList() << "basic" << "controlFlow" << "full"; + m_inputs.AnalysisBox->addItems(analysisItemList); + m_layout->addWidget(m_inputs.AnalysisBox, row++, column + 1, Qt::AlignLeft); + + CreateAdvancedSettingsGroup(); + m_layout->addWidget(m_advancedSettingsGroup, row++, column, 1, 4); + + m_startButton = new QPushButton("Start"); + connect(m_startButton, &QPushButton::clicked, this, &BaseAddressDetectionWidget::DetectBaseAddress); + m_layout->addWidget(m_startButton, row, column, Qt::AlignLeft); + + m_abortButton = new QPushButton("Abort"); + connect(m_abortButton, &QPushButton::clicked, this, &BaseAddressDetectionWidget::Abort); + m_abortButton->setHidden(true); + m_layout->addWidget(m_abortButton, row, column, Qt::AlignLeft); + + m_status = new QLabel("Not running"); + auto palette = m_status->palette(); + palette.setColor(QPalette::WindowText, getThemeColor(AlphanumericHighlightColor)); + m_status->setPalette(palette); + m_status->setFont(getMonospaceFont(this)); + m_layout->addWidget(m_status, row++, column + 1, 1, 2, Qt::AlignLeft); + + m_preferredBaseLabel = new QLabel("Preferred Base:"); + m_layout->addWidget(m_preferredBaseLabel, row, column, Qt::AlignLeft); + m_preferredBase = new QLabel("Not available"); + m_preferredBase->setTextInteractionFlags(Qt::TextSelectableByMouse); + m_preferredBase->setFont(getMonospaceFont(this)); + m_preferredBase->setPalette(palette); + m_layout->addWidget(m_preferredBase, row, column + 1, Qt::AlignLeft); + + m_confidenceLabel = new QLabel("Confidence:"); + m_layout->addWidget(m_confidenceLabel, row, column + 2, Qt::AlignLeft); + m_confidence = new QLabel("Not available"); + m_confidence->setFont(getMonospaceFont(this)); + m_confidence->setPalette(palette); + m_layout->addWidget(m_confidence, row++, column + 3, Qt::AlignLeft); + + m_resultsTableWidget = new QTableWidget(this); + m_resultsTableWidget->setColumnCount(5); + QStringList header; + header << "Base Address" << "Score" << "String Hits" << "Function Hits" << "Data Hits"; + m_resultsTableWidget->setHorizontalHeaderLabels(header); + m_resultsTableWidget->horizontalHeader()->setDefaultAlignment(Qt::AlignLeft); + m_resultsTableWidget->horizontalHeader()->setStretchLastSection(true); + m_resultsTableWidget->verticalHeader()->setVisible(false); + m_resultsTableWidget->setEditTriggers(QAbstractItemView::NoEditTriggers); + m_resultsTableWidget->setSelectionBehavior(QAbstractItemView::SelectRows); + m_resultsTableWidget->setSelectionMode(QAbstractItemView::SingleSelection); + m_resultsTableWidget->setMinimumHeight(150); + m_layout->addWidget(m_resultsTableWidget, row++, column, 1, 5); + connect(m_resultsTableWidget, &QTableWidget::clicked, this, &BaseAddressDetectionWidget::GetClickedBaseAddress); + + m_reloadBase = new QLineEdit("0x0"); + m_layout->addWidget(m_reloadBase, row, column, Qt::AlignLeft); + + m_rebaseButton = new QPushButton("Start Full Analysis"); + connect(m_rebaseButton, &QPushButton::clicked, this, &BaseAddressDetectionWidget::RebaseWithFullAnalysis); + m_layout->addWidget(m_rebaseButton, row, column + 1, Qt::AlignLeft); + + HideResultsWidgets(true); + m_layout->setColumnStretch(3, 1); + setLayout(m_layout); +} \ No newline at end of file diff --git a/examples/triage/baseaddress.h b/examples/triage/baseaddress.h new file mode 100644 index 0000000000..5f3dbe9825 --- /dev/null +++ b/examples/triage/baseaddress.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include "theme.h" +#include "fontsettings.h" +#include "expandablegroup.h" +#include "viewframe.h" +#include "binaryninjaapi.h" +#include "binaryninjacore.h" + +struct BaseAddressDetectionQtInputs +{ + QComboBox* ArchitectureBox; + QComboBox* AnalysisBox; + QLineEdit* StrlenLineEdit; + QLineEdit* AlignmentLineEdit; + QLineEdit* LowerBoundary; + QLineEdit* UpperBoundary; + QComboBox* POIBox; + QLineEdit* MaxPointersPerCluster; +}; + +struct BaseAddressDetectionQtResults +{ + std::string Status; + std::set> Scores; + BNBaseAddressDetectionConfidence Confidence; + std::map> Reasons; + uint64_t LastTestedBaseAddress; +}; + +class BaseAddressDetectionThread : public QThread +{ + Q_OBJECT + BinaryNinja::Ref m_view; + BinaryNinja::BaseAddressDetection* m_baseDetection; + BaseAddressDetectionQtInputs* m_inputs {}; + void run() override; + +public: + BaseAddressDetectionThread(BaseAddressDetectionQtInputs* widgetInputs, BinaryNinja::Ref bv) + { + m_inputs = widgetInputs; + m_view = bv; + m_baseDetection = new BinaryNinja::BaseAddressDetection(m_view); + } + + void Abort() { m_baseDetection->Abort(); } + bool IsAborted() { return m_baseDetection->IsAborted(); } + +signals: + void ResultReady(const BaseAddressDetectionQtResults& result); +}; + +class BaseAddressDetectionWidget : public QWidget +{ + BaseAddressDetectionThread* m_worker; + BinaryNinja::Ref m_view; + BaseAddressDetectionQtInputs m_inputs; + + QGridLayout* m_layout {}; + QPushButton* m_startButton = nullptr; + QPushButton* m_abortButton = nullptr; + QLabel* m_preferredBaseLabel; + QLabel* m_preferredBase; + QLabel* m_confidenceLabel; + QLabel* m_confidence; + QLabel* m_status; + QLineEdit* m_reloadBase; + QPushButton* m_rebaseButton; + QTableWidget* m_resultsTableWidget; + ExpandableGroup* m_advancedSettingsGroup; + + void DetectBaseAddress(); + void RebaseWithFullAnalysis(); + void Abort(); + void HandleResults(const BaseAddressDetectionQtResults& results); + void HideResultsWidgets(bool hide); + void CreateAdvancedSettingsGroup(); + void GetClickedBaseAddress(const QModelIndex& index); + +public: + BaseAddressDetectionWidget(QWidget* parent, BinaryNinja::Ref bv); +}; \ No newline at end of file diff --git a/examples/triage/view.cpp b/examples/triage/view.cpp index 87cdb61aeb..579f8917d5 100644 --- a/examples/triage/view.cpp +++ b/examples/triage/view.cpp @@ -10,6 +10,7 @@ #include "librariesinfo.h" #include "headers.h" #include "strings.h" +#include "baseaddress.h" #include "fontsettings.h" #include @@ -52,6 +53,18 @@ TriageView::TriageView(QWidget* parent, BinaryViewRef data) : QScrollArea(parent delete hdr; } + auto fileMetadata = m_data->GetFile(); + auto existingViews = fileMetadata->GetExistingViews(); + if ((existingViews.size() == 2 && fileMetadata->GetViewOfType("Mapped")) || existingViews.size() == 1) + { + // Binary either only has raw view (Open for triage mode) or raw and mapped view + QGroupBox* baseDetectionGroup = new QGroupBox("Base Address Detection", container); + QVBoxLayout* baseDetectionLayout = new QVBoxLayout(); + baseDetectionLayout->addWidget(new BaseAddressDetectionWidget(this, data)); + baseDetectionGroup->setLayout(baseDetectionLayout); + layout->addWidget(baseDetectionGroup); + } + QGroupBox* librariesGroup = new QGroupBox("Libraries", container); QVBoxLayout* librariesLayout = new QVBoxLayout(); librariesLayout->addWidget(new LibrariesWidget(this, data)); diff --git a/python/__init__.py b/python/__init__.py index 498475f4d3..b770256967 100644 --- a/python/__init__.py +++ b/python/__init__.py @@ -74,6 +74,7 @@ from .typecontainer import * from .exceptions import * from .project import * +from .basedetection import * # We import each of these by name to prevent conflicts between # log.py and the function 'log' which we don't import below from .log import ( diff --git a/python/basedetection.py b/python/basedetection.py new file mode 100644 index 0000000000..655f017b16 --- /dev/null +++ b/python/basedetection.py @@ -0,0 +1,321 @@ +# coding=utf-8 +# Copyright (c) 2015-2024 Vector 35 Inc +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import os +import ctypes +from typing import Optional, Union, Literal +from dataclasses import dataclass +from .enums import BaseAddressDetectionPOIType, BaseAddressDetectionConfidence, BaseAddressDetectionPOISetting +from .binaryview import BinaryView +from . import _binaryninjacore as core + + +@dataclass +class BaseAddressDetectionReason: + """``class BaseAddressDetectionReason`` is a class that stores information used to understand why a base address + is a candidate. It consists of a pointer, the offset of the point-of-interest that the pointer aligns with, and the + type of point-of-interest (string, function, or data variable)""" + + pointer: int + offset: int + type: BaseAddressDetectionPOIType + + +class BaseAddressDetection: + """ + ``class BaseAddressDetection`` is a class that is used to detect candidate base addresses for position-dependent + raw binaries + + :Example: + + >>> from binaryninja import * + >>> bad = BaseAddressDetection("firmware.bin") + >>> bad.detect_base_address() + True + >>> hex(bad.preferred_base_address) + '0x4000000' + """ + + def __init__(self, view: Union[str, os.PathLike, BinaryView]) -> None: + if isinstance(view, str) or isinstance(view, os.PathLike): + view = BinaryView.load(str(view), update_analysis=False) + + _handle = core.BNCreateBaseAddressDetection(view.handle) + assert _handle is not None, "core.BNCreateBaseAddressDetection returned None" + self._handle = _handle + self._view_arch = view.arch + + self._scores = list() + self._confidence = 0 + self._last_tested_base_address = None + + def __del__(self): + if core is not None: + core.BNFreeBaseAddressDetection(self._handle) + + @property + def scores(self) -> list[tuple[int, int]]: + """ + ``scores`` returns a list of candidate base addresses and their scores + + .. note:: The score is set to the number of times a pointer pointed to a point-of-interest at that base address + + :Example: + + >>> from binaryninja import * + >>> bad = BaseAddressDetection("firmware.bin") + >>> bad.detect_base_address() + True + >>> for addr, score in bad.scores: + ... print(f"0x{addr:x}: {score}") + ... + 0x4000000: 7 + 0x400dc00: 1 + 0x400d800: 1 + 0x400cc00: 1 + 0x400c400: 1 + 0x400bc00: 1 + 0x400b800: 1 + 0x3fffc00: 1 + + :return: list of tuples containing each base address and score + :rtype: list[tuple[int, int]] + """ + + return self._scores + + @property + def confidence(self) -> BaseAddressDetectionConfidence: + """ + ``confidence`` returns an enum that indicates confidence the preferred candidate base address is correct + + :return: confidence of the base address detection results + :rtype: BaseAddressDetectionConfidence + """ + + return self._confidence + + @property + def last_tested_base_address(self) -> int: + """ + ``last_tested_base_address`` returns the last candidate base address that was tested + + .. note:: This is useful for situations where the user aborts the analysis and wants to restart from the last \ + tested base address by setting the ``low_boundary`` parameter in :py:func:`BaseAddressDetection.detect_base_address` + + :return: last candidate base address tested + :rtype: int + """ + + return self._last_tested_base_address + + @property + def preferred_base_address(self) -> Optional[int]: + """ + ``preferred_base_address`` returns the candidate base address which contains the most amount of pointers that + align with discovered points-of-interest in the binary + + .. note:: :py:attr:`BaseAddressDetection.confidence` reports a confidence level that the preferred base is correct + + .. note:: :py:attr:`BaseAddressDetection.scores` returns a list of the top 10 candidate base addresses and their \ + scores and can be used to discover other potential candidates + + :return: preferred candidate base address + :rtype: int + """ + + if not self._scores: + return None + + return self._scores[0][0] + + @property + def aborted(self) -> bool: + """ + ``aborted`` indicates whether or not base address detection analysis was aborted early + + :return: True if the analysis was aborted, False otherwise + :rtype: bool + """ + + return core.BNIsBaseAddressDetectionAborted(self._handle) + + def detect_base_address( + self, + arch: Optional[str] = "", + analysis: Optional[str] = Literal["basic", "controlFlow", "full"], + min_strlen: Optional[int] = 10, + alignment: Optional[int] = 1024, + low_boundary: Optional[int] = 0, + high_boundary: Optional[int] = 0xFFFFFFFFFFFFFFFF, + poi_analysis: Optional[BaseAddressDetectionPOISetting] = BaseAddressDetectionPOISetting.POIAnalysisAll, + max_pointers: Optional[int] = 128, + ) -> bool: + """ + ``detect_base_address`` runs initial analysis and attempts to identify candidate base addresses + + .. note:: This operation can take a long time to complete depending on the size and complexity of the binary \ + and the settings used + + :param str arch: CPU architecture of the binary (defaults to using auto-detection) + :param str analysis: analysis mode (``basic``, ``controlFlow``, or ``full``) + :param int min_strlen: minimum length of a string to be considered a point-of-interest + :param int alignment: byte boundary to align the base address to while brute-forcing + :param int low_boundary: lower boundary of the base address range to test + :param int high_boundary: upper boundary of the base address range to test + :param BaseAddressDetectionPOISetting poi_analysis: specifies types of points-of-interest to use for analysis + :param int max_pointers: maximum number of candidate pointers to collect per pointer cluster + :return: True if initial analysis completed with results, False otherwise + :rtype: bool + """ + + if not arch and self._view_arch: + arch = str(self._view_arch) + + if analysis not in ["basic", "controlFlow", "full"]: + raise ValueError("invalid analysis setting") + + if alignment <= 0: + raise ValueError("alignment must be greater than 0") + + if max_pointers < 2: + raise ValueError("max pointers must be at least 2") + + if high_boundary < low_boundary: + raise ValueError("upper boundary must be greater than lower boundary") + + settings = core.BNBaseAddressDetectionSettings( + arch.encode(), + analysis.encode(), + min_strlen, + alignment, + low_boundary, + high_boundary, + poi_analysis, + max_pointers, + ) + + if not core.BNDetectBaseAddress(self._handle, settings): + return False + + max_candidates = 10 + scores = (core.BNBaseAddressDetectionScore * max_candidates)() + confidence = core.BaseAddressDetectionConfidenceEnum() + last_base = ctypes.c_ulonglong() + num_candidates = core.BNGetBaseAddressDetectionScores( + self._handle, scores, max_candidates, ctypes.byref(confidence), ctypes.byref(last_base) + ) + + if num_candidates == 0: + return False + + self._scores.clear() + for i in range(num_candidates): + self._scores.append((scores[i].BaseAddress, scores[i].Score)) + + self._confidence = confidence.value + self._last_tested_base_address = last_base.value + return True + + def abort(self) -> None: + """ + ``abort`` aborts base address detection analysis + + .. note:: ``abort`` does not stop base address detection until after initial analysis has completed and it is \ + in the base address enumeration phase + + :rtype: None + """ + + core.BNAbortBaseAddressDetection(self._handle) + + def get_reasons(self, base_address: int) -> list[BaseAddressDetectionReason]: + """ + ``get_reasons`` returns a list of reasons that can be used to determine why a base address is a candidate + + :param int base_address: base address to get reasons for + :return: list of reasons for the specified base address + :rtype: list[BaseAddressDetectionReason] + """ + + count = ctypes.c_size_t() + reasons = core.BNGetBaseAddressDetectionReasons(self._handle, base_address, ctypes.byref(count)) + if count.value == 0: + return [] + + try: + result = list() + for i in range(count.value): + result.append(BaseAddressDetectionReason(reasons[i].Pointer, reasons[i].POIOffset, reasons[i].POIType)) + return result + finally: + core.BNFreeBaseAddressDetectionReasons(reasons) + + def _get_data_hits_by_type(self, base_address: int, poi_type: int) -> int: + reasons = self.get_reasons(base_address) + if not reasons: + return 0 + + hits = 0 + for reason in reasons: + if reason.type == poi_type: + hits += 1 + + return hits + + def get_string_hits(self, base_address: int) -> int: + """ + ``get_string_hits`` returns the number of times a pointer pointed to a string at the specified + base address + + .. note:: Data variables are only used as points-of-interest if analysis doesn't discover enough strings and \ + functions + + :param int base_address: base address to get string hits for + :return: number of string hits for the specified base address + :rtype: int + """ + + return self._get_data_hits_by_type(base_address, BaseAddressDetectionPOIType.POIString) + + def get_function_hits(self, base_address: int) -> int: + """ + ``get_function_hits`` returns the number of times a pointer pointed to a function at the + specified base address + + :param int base_address: base address to get function hits for + :return: number of function hits for the specified base address + :rtype: int + """ + + return self._get_data_hits_by_type(base_address, BaseAddressDetectionPOIType.POIFunction) + + def get_data_hits(self, base_address: int) -> int: + """ + ``get_data_hits`` returns the number of times a pointer pointed to a data variable at the + specified base address + + :param int base_address: base address to get data hits for + :return: number of data hits for the specified base address + :rtype: int + """ + + return self._get_data_hits_by_type(base_address, BaseAddressDetectionPOIType.POIDataVariable) diff --git a/python/binaryview.py b/python/binaryview.py index a281a46030..43eda87f92 100644 --- a/python/binaryview.py +++ b/python/binaryview.py @@ -30,7 +30,7 @@ import os import uuid from typing import Callable, Generator, Optional, Union, Tuple, List, Mapping, Any, \ - Iterator, Iterable, KeysView, ItemsView, ValuesView, Dict + Iterator, Iterable, KeysView, ItemsView, ValuesView, Dict, overload from dataclasses import dataclass from enum import IntFlag @@ -1391,6 +1391,42 @@ def __repr__(self): def __len__(self): return self.length + @classmethod + def serialize(cls, image_base: int, start: int, length: int, data_offset: int=0, data_length: int=0, flags: 'SegmentFlag'=SegmentFlag.SegmentReadable, auto_defined=True, segments: str="[]"): + """ + Serialize segment parameters into a JSON string. This is useful for generating a properly formatted segment description as options when using `load`. + :param int image_base: The base address of the image. + :param int start: The start address of the segment. + :param int length: The length of the segment. + :param int data_offset: The offset of the data within the segment. + :param int data_length: The length of the data within the segment. + :param SegmentFlag flags: The flags of the segment. + :param bool auto_defined: Whether the segment is auto-defined. + :param str segments: An optional, existing array of segments to append to. + :return: A JSON string representing the segment. + :rtype: str + + Example usage: + ``` + >>> base = 0x400000 + >>> rom_base = 0xffff0000 + >>> segments = Segment.serialize(image_base=base, start=base, length=0x1000, data_offset=0, data_length=0x1000, flags=SegmentFlag.SegmentReadable|SegmentFlag.SegmentExecutable) + >>> segments = Segment.serialize(image_base=base, start=rom_base, length=0x1000, flags=SegmentFlag.SegmentReadable, segments=segments) + >>> view = load(bytes.fromhex('5054ebfe'), options={'loader.imageBase': base, 'loader.architecture': 'x86', 'loader.segments': segments}) + ``` + """ + segments_list = json.loads(segments) + segment_info = { + "auto_defined": auto_defined, + "data_length": data_length, + "data_offset": data_offset, + "flags": flags, + "length": length, + "start": start - image_base + } + segments_list.append(segment_info) + return json.dumps(segments_list) + @property def length(self): return int(core.BNSegmentGetLength(self.handle)) @@ -1516,6 +1552,43 @@ def __hash__(self): def __contains__(self, i: int): return i >= self.start and i < self.end + @classmethod + def serialize(cls, image_base: int, name: str, start: int, length: int, semantics: SectionSemantics=SectionSemantics.DefaultSectionSemantics, type: str="", align: int=1, entry_size: int=0, link: str="", info_section: str="", info_data: int=0, auto_defined: bool=True, sections: str="[]"): + """ + Serialize section parameters into a JSON string. This is useful for generating a properly formatted section description as options when using `load`. + :param int image_base: The base address of the image. + :param str name: The name of the section. + :param int start: The start address of the section. + :param int length: The length of the section. + :param SectionSemantics semantics: The semantics of the section. + :param str type: The type of the section. + :param int align: The alignment of the section. + :param int entry_size: The entry size of the section. + :param str link: The linked section of the section. + :param str info_section: The info section of the section. + :param int info_data: The info data of the section. + :param bool auto_defined: Whether the section is auto-defined. + :param str sections: An optional, existing array of sections to append to. + :return: A JSON string representing the section. + :rtype: str + """ + sections_list = json.loads(sections) + section_info = { + "align": align, + "auto_defined": auto_defined, + "entry_size": entry_size, + "info_data": info_data, + "info_section": info_section, + "length": length, + "link": link, + "name": name, + "semantics": semantics, + "start": start - image_base, + "type": type + } + sections_list.append(section_info) + return json.dumps(sections_list) + @property def name(self) -> str: return core.BNSectionGetName(self.handle) @@ -1912,6 +1985,12 @@ def __next__(self): self._n += 1 return _function.Function(self._view, func) + @overload + def __getitem__(self, i: int) -> '_function.Function': ... + + @overload + def __getitem__(self, i: slice) -> List['_function.Function']: ... + def __getitem__(self, i: Union[int, slice]) -> Union['_function.Function', List['_function.Function']]: if isinstance(i, int): if i < 0: @@ -7810,6 +7889,7 @@ def attach_type_archive(self, archive: 'typearchive.TypeArchive'): Attach a given type archive to the analysis and try to connect to it. If attaching was successful, names from that archive will become available to pull, but no types will actually be associated by calling this. + :param archive: New archive """ attached = self.attach_type_archive_by_id(archive.id, archive.path) @@ -7854,6 +7934,7 @@ def attach_type_archive_by_id(self, id: str, path: str) -> Optional['typearchive def detach_type_archive(self, archive: 'typearchive.TypeArchive'): """ Detach from a type archive, breaking all associations to types within the archive + :param archive: Type archive to detach """ self.detach_type_archive_by_id(archive.id) @@ -7861,6 +7942,7 @@ def detach_type_archive(self, archive: 'typearchive.TypeArchive'): def detach_type_archive_by_id(self, id: str): """ Detach from a type archive, breaking all associations to types within the archive + :param id: Id of archive to detach """ if not core.BNBinaryViewDetachTypeArchive(self.handle, id): @@ -7869,6 +7951,7 @@ def detach_type_archive_by_id(self, id: str): def get_type_archive(self, id: str) -> Optional['typearchive.TypeArchive']: """ Look up a connected archive by its id + :param id: Id of archive :return: Archive, if one exists with that id. Otherwise None """ @@ -7880,6 +7963,7 @@ def get_type_archive(self, id: str) -> Optional['typearchive.TypeArchive']: def get_type_archive_path(self, id: str) -> Optional[str]: """ Look up the path for an attached (but not necessarily connected) type archive by its id + :param id: Id of archive :return: Archive path, if it is attached. Otherwise None. """ @@ -7892,6 +7976,7 @@ def get_type_archive_path(self, id: str) -> Optional[str]: def type_archive_type_names(self) -> Mapping['_types.QualifiedName', List[Tuple['typearchive.TypeArchive', str]]]: """ Get a list of all available type names in all connected archives, and their archive/type id pair + :return: name <-> [(archive, archive type id)] for all type names """ result = {} @@ -7908,6 +7993,7 @@ def type_archive_type_names(self) -> Mapping['_types.QualifiedName', List[Tuple[ def get_type_archives_for_type_name(self, name: '_types.QualifiedNameType') -> List[Tuple['typearchive.TypeArchive', str]]: """ Get a list of all connected type archives that have a given type name + :return: (archive, archive type id) for all archives """ name = _types.QualifiedName(name) @@ -7932,6 +8018,7 @@ def get_type_archives_for_type_name(self, name: '_types.QualifiedNameType') -> L def associated_type_archive_types(self) -> Mapping['_types.QualifiedName', Tuple[Optional['typearchive.TypeArchive'], str]]: """ Get a list of all types in the analysis that are associated with attached type archives + :return: Map of all analysis types to their corresponding archive / id. If a type is associated with a disconnected type archive, the archive will be None. """ result = {} @@ -7952,6 +8039,7 @@ def associated_type_archive_types(self) -> Mapping['_types.QualifiedName', Tuple def associated_type_archive_type_ids(self) -> Mapping[str, Tuple[str, str]]: """ Get a list of all types in the analysis that are associated with type archives + :return: Map of all analysis types to their corresponding archive / id """ @@ -7976,6 +8064,7 @@ def associated_type_archive_type_ids(self) -> Mapping[str, Tuple[str, str]]: def get_associated_types_from_archive(self, archive: 'typearchive.TypeArchive') -> Mapping['_types.QualifiedName', str]: """ Get a list of all types in the analysis that are associated with a specific type archive + :return: Map of all analysis types to their corresponding archive id """ result = {} @@ -8011,6 +8100,7 @@ def get_associated_types_from_archive_by_id(self, archive_id: str) -> Mapping[st def get_associated_type_archive_type_target(self, name: '_types.QualifiedNameType') -> Optional[Tuple[Optional['typearchive.TypeArchive'], str]]: """ Determine the target archive / type id of a given analysis type + :param name: Analysis type :return: (archive, archive type id) if the type is associated. None otherwise. """ @@ -8027,6 +8117,7 @@ def get_associated_type_archive_type_target(self, name: '_types.QualifiedNameTyp def get_associated_type_archive_type_target_by_id(self, type_id: str) -> Optional[Tuple[str, str]]: """ Determine the target archive / type id of a given analysis type + :param type_id: Analysis type id :return: (archive id, archive type id) if the type is associated. None otherwise. """ @@ -8042,6 +8133,7 @@ def get_associated_type_archive_type_target_by_id(self, type_id: str) -> Optiona def get_associated_type_archive_type_source(self, archive: 'typearchive.TypeArchive', archive_type: '_types.QualifiedNameType') -> Optional['_types.QualifiedName']: """ Determine the local source type name for a given archive type + :param archive: Target type archive :param archive_type: Name of target archive type :return: Name of source analysis type, if this type is associated. None otherwise. @@ -8057,6 +8149,7 @@ def get_associated_type_archive_type_source(self, archive: 'typearchive.TypeArch def get_associated_type_archive_type_source_by_id(self, archive_id: str, archive_type_id: str) -> Optional[str]: """ Determine the local source type id for a given archive type + :param archive_id: Id of target type archive :param archive_type_id: Id of target archive type :return: Id of source analysis type, if this type is associated. None otherwise. @@ -8071,6 +8164,7 @@ def get_associated_type_archive_type_source_by_id(self, archive_id: str, archive def disassociate_type_archive_type(self, type: '_types.QualifiedNameType') -> bool: """ Disassociate an associated type, so that it will no longer receive updates from its connected type archive + :param type: Name of type in analysis :return: True if successful """ @@ -8082,6 +8176,7 @@ def disassociate_type_archive_type(self, type: '_types.QualifiedNameType') -> bo def disassociate_type_archive_type_by_id(self, type_id: str) -> bool: """ Disassociate an associated type id, so that it will no longer receive updates from its connected type archive + :param type_id: Id of type in analysis :return: True if successful """ @@ -8091,6 +8186,7 @@ def pull_types_from_archive(self, archive: 'typearchive.TypeArchive', names: Lis -> Optional[Mapping['_types.QualifiedName', Tuple['_types.QualifiedName', '_types.Type']]]: """ Pull types from a type archive, updating them and any dependencies + :param archive: Target type archive :param names: Names of desired types in type archive :return: { name: (name, type) } Mapping from archive name to (analysis name, definition), None on error @@ -8115,6 +8211,7 @@ def pull_types_from_archive_by_id(self, archive_id: str, archive_type_ids: List[ -> Optional[Mapping[str, str]]: """ Pull types from a type archive by id, updating them and any dependencies + :param archive_id: Target type archive id :param archive_type_ids: Ids of desired types in type archive :return: { id: id } Mapping from archive type id to analysis type id, None on error @@ -8142,6 +8239,7 @@ def push_types_to_archive(self, archive: 'typearchive.TypeArchive', names: List[ -> Optional[Mapping['_types.QualifiedName', Tuple['_types.QualifiedName', '_types.Type']]]: """ Push a collection of types, and all their dependencies, into a type archive + :param archive: Target type archive :param names: Names of types in analysis :return: { name: (name, type) } Mapping from analysis name to (archive name, definition), None on error @@ -8166,6 +8264,7 @@ def push_types_to_archive_by_id(self, archive_id: str, type_ids: List[str]) \ -> Optional[Mapping[str, str]]: """ Push a collection of types, and all their dependencies, into a type archive + :param archive_id: Id of target type archive :param type_ids: Ids of types in analysis :return: True if successful @@ -8601,6 +8700,15 @@ def search(self, pattern: str, start: int = None, end: int = None, raw: bool = F :return: A generator object that yields the offset and matched DataBuffer for each match found. :rtype: QueueGenerator + :Example: + >>> from binaryninja import load + >>> bv = load('/bin/ls') + >>> print(bv) + + >>> bytes(list(bv.search("50 ?4"))[0][1]).hex() + '5004' + >>> bytes(list(bv.search("[\\x20-\\x25][\\x60-\\x67]"))[0][1]).hex() + '2062' """ if start is None: start = self.start @@ -8725,7 +8833,7 @@ def show_graph_report(self, title: str, graph: flowgraph.FlowGraph) -> None: ``show_graph_report`` displays a :py:class:`FlowGraph` object `graph` in a new tab with ``title``. :param title: Title of the graph - :type title: Plain text string title + :type title: Text string title of the tab :param graph: The graph you wish to display :type graph: :py:class:`FlowGraph` object """ @@ -8736,7 +8844,7 @@ def get_address_input(self, prompt: str, title: str, current_address: Optional[i ``get_address_input`` Gets a virtual address via a prompt displayed to the user :param prompt: Prompt for the dialog - :param title: Display title, if displayed via the UI + :param title: Window title, if used in the UI :param current_address: Optional current address, for relative inputs :return: The value entered by the user, if one was entered """ diff --git a/python/debuginfo.py b/python/debuginfo.py index b1d25503b4..806e61a884 100644 --- a/python/debuginfo.py +++ b/python/debuginfo.py @@ -314,6 +314,7 @@ def get_type_container(self, parser_name: str) -> 'typecontainer.TypeContainer': """ Type Container for all types in the DebugInfo that resulted from the parse of the given parser. + :param parser_name: Name of parser :return: Type Container for types from that parser """ diff --git a/python/enterprise.py b/python/enterprise.py index 636b9023bf..e645ed5f26 100644 --- a/python/enterprise.py +++ b/python/enterprise.py @@ -320,6 +320,11 @@ class LicenseCheckout: """ Helper class for scripts to make use of a license checkout in a scope. + :param duration: Duration between refreshes + :param _cache: Deprecated but left in for compatibility + :param release: If the license should be released at the end of scope. If `False`, you + can either manually release it later or it will expire after `duration`. + :Example: >>> enterprise.connect() >>> enterprise.authenticate_with_credentials("username", "password") @@ -335,7 +340,7 @@ def __init__(self, duration=900, _cache=True, release=True): :param duration: Duration between refreshes :param _cache: Deprecated but left in for compatibility - :param release: If the license should be released at the end of scope. If False, you + :param release: If the license should be released at the end of scope. If `False`, you can either manually release it later or it will expire after `duration`. """ self.desired_duration = duration diff --git a/python/examples/bin_info.py b/python/examples/bin_info.py index bcf5adcd72..b2fd194e9d 100644 --- a/python/examples/bin_info.py +++ b/python/examples/bin_info.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright (c) 2015-2024 Vector 35 Inc # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -19,17 +19,19 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -import sys import os +import sys +from glob import glob -from binaryninja.log import log_warn, log_to_stdout -import binaryninja.interaction as interaction -from binaryninja.plugin import PluginCommand -from binaryninja import load +from binaryninja import LogLevel, PluginCommand, interaction, load, log, log_to_stdout, log_warn -def get_bininfo(bv): +def get_bininfo(bv, filename=None): if bv is None: + if not (os.path.isfile(filename) and os.access(filename, os.R_OK)): + return("Cannot read {}\n".format(filename)) + bv = load(filename, options={'analysis.mode': 'basic', 'analysis.linearSweep.autorun' : False}) + else: filename = "" if len(sys.argv) > 1: filename = sys.argv[1] @@ -40,7 +42,7 @@ def get_bininfo(bv): sys.exit(1) bv = load(filename) - log_to_stdout(True) + log_to_stdout(LogLevel.InfoLog) contents = "## %s ##\n" % os.path.basename(bv.file.filename) contents += "- START: 0x%x\n\n" % bv.start @@ -62,6 +64,13 @@ def get_bininfo(bv): length = bv.strings[i].length string = bv.strings[i].value contents += "| 0x%x |%d | %s |\n" % (start, length, string) + + # Note that we need to close BV file handles that we opened to prevent a + # memory leak due to a circular reference between BinaryViews and the + # FileMetadata that backs them + + if filename != "": + bv.file.close() return contents @@ -70,6 +79,15 @@ def display_bininfo(bv): if __name__ == "__main__": - print(get_bininfo(None)) + if len(sys.argv) == 1: + filename = interaction.get_open_filename_input("Filename:") + if filename is None: + log.log_warn("No file specified") + else: + print(get_bininfo(None, filename=filename)) + else: + for pattern in sys.argv[1:]: + for filename in glob(pattern): + print(get_bininfo(None, filename=filename)) else: - PluginCommand.register("Binary Info", "Display basic info about the binary", display_bininfo) + PluginCommand.register("Binary Info", "Display basic info about the binary using minimal analysis modes", display_bininfo) diff --git a/python/examples/raw_binary_base_detection.py b/python/examples/raw_binary_base_detection.py new file mode 100644 index 0000000000..76268fbece --- /dev/null +++ b/python/examples/raw_binary_base_detection.py @@ -0,0 +1,101 @@ +# Copyright (c) 2015-2024 Vector 35 Inc +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +"""Headless script for demonstrating Binary Ninja automated base address detection for +raw position-dependent firmware binaries +""" + +import argparse +import json +from os import walk, path +from binaryninja import BaseAddressDetection, log_to_stderr, LogLevel, log_info, log_error + + +def _get_directory_listing(_path: str) -> list[str]: + if path.isfile(_path): + return [_path] + + if not path.isdir(_path): + raise FileNotFoundError(f"Path '{_path}' is not a file or directory") + + files = [] + for dirpath, _, filenames in walk(_path): + for filename in filenames: + files.append(path.join(dirpath, filename)) + return files + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="detect base address of position-dependent raw firmware binaries") + parser.add_argument("path", help="path to the position-dependent raw firmware binary or directory") + parser.add_argument("--debug", action="store_true", help="enable debug logging") + parser.add_argument("--reasons", action="store_true", help="show reasons for base address selection") + parser.add_argument("--analysis", type=str, help="analysis level", default="basic") + parser.add_argument("--arch", type=str, default="", help="architecture of the binary") + return parser.parse_args() + + +def _setup_logger(debug: bool) -> None: + if debug: + log_to_stderr(LogLevel.DebugLog) + else: + log_to_stderr(LogLevel.InfoLog) + + +def main() -> None: + """Run the program""" + args = _parse_args() + _setup_logger(args.debug) + + files = _get_directory_listing(args.path) + for _file in files: + log_info(f"Running base address detection analysis on '{_file}'...") + bad = BaseAddressDetection(_file) + if not bad.detect_base_address(analysis=args.analysis, arch=args.arch): + log_error("Base address detection analysis failed") + continue + + json_dict = dict() + json_dict["filename"] = path.basename(_file) + json_dict["preferred_candidate"] = dict() + json_dict["preferred_candidate"]["address"] = f"0x{bad.preferred_base_address:x}" + json_dict["preferred_candidate"]["confidence"] = bad.confidence + json_dict["aborted"] = bad.aborted + json_dict["last_tested"] = f"0x{bad.last_tested_base_address:x}" + json_dict["candidates"] = dict() + for baseaddr, score in bad.scores: + json_dict["candidates"][f"0x{baseaddr:x}"] = dict() + json_dict["candidates"][f"0x{baseaddr:x}"]["score"] = score + json_dict["candidates"][f"0x{baseaddr:x}"]["function hits"] = bad.get_function_hits(baseaddr) + json_dict["candidates"][f"0x{baseaddr:x}"]["string hits"] = bad.get_string_hits(baseaddr) + json_dict["candidates"][f"0x{baseaddr:x}"]["data hits"] = bad.get_data_hits(baseaddr) + if args.reasons: + json_dict["candidates"][f"0x{baseaddr:x}"]["reasons"] = dict() + for reason in bad.get_reasons(baseaddr): + json_dict["candidates"][f"0x{baseaddr:x}"]["reasons"][f"0x{reason.pointer:x}"] = { + "poi_offset": f"0x{reason.offset:x}", + "poi_type": reason.type, + } + + print(json.dumps(json_dict, indent=4)) + + +if __name__ == "__main__": + main() diff --git a/python/filemetadata.py b/python/filemetadata.py index a5dda46753..e0222fd251 100644 --- a/python/filemetadata.py +++ b/python/filemetadata.py @@ -120,17 +120,14 @@ class FileMetadata: """ ``class FileMetadata`` represents the file being analyzed by Binary Ninja. It is responsible for opening, closing, creating the database (.bndb) files, and is used to keep track of undoable actions. + + :param str filename: The string path to the file to be opened. Defaults to None. + :param handle: A handle to the underlying C FileMetadata object. Defaults to None. (Internal use only.) """ _associated_data = {} def __init__(self, filename: Optional[str] = None, handle: Optional[core.BNFileMetadataHandle] = None): - """ - Instantiates a new FileMetadata class. - - :param str filename: The string path to the file to be opened. Defaults to None. - :param handle: A handle to the underlying C FileMetadata object. Defaults to None. - """ if handle is not None: _type = core.BNFileMetadataHandle _handle = ctypes.cast(handle, _type) diff --git a/python/function.py b/python/function.py index a3d53febb1..e08bb6c3df 100644 --- a/python/function.py +++ b/python/function.py @@ -21,7 +21,7 @@ import ctypes import inspect -from typing import Generator, Optional, List, Tuple, Union, Mapping, Any, Dict +from typing import Generator, Optional, List, Tuple, Union, Mapping, Any, Dict, overload from dataclasses import dataclass # Binary Ninja components @@ -211,6 +211,12 @@ def __next__(self) -> 'basicblock.BasicBlock': self._n += 1 return self._function._instantiate_block(block) + @overload + def __getitem__(self, i: int) -> 'basicblock.BasicBlock': ... + + @overload + def __getitem__(self, i: slice) -> List['basicblock.BasicBlock']: ... + def __getitem__(self, i: Union[int, slice]) -> Union['basicblock.BasicBlock', List['basicblock.BasicBlock']]: if isinstance(i, int): if i < 0: @@ -237,6 +243,12 @@ class LowLevelILBasicBlockList(BasicBlockList): def __repr__(self): return f"" + @overload + def __getitem__(self, i: int) -> 'lowlevelil.LowLevelILBasicBlock': ... + + @overload + def __getitem__(self, i: slice) -> List['lowlevelil.LowLevelILBasicBlock']: ... + def __getitem__( self, i: Union[int, slice] ) -> Union['lowlevelil.LowLevelILBasicBlock', List['lowlevelil.LowLevelILBasicBlock']]: @@ -250,6 +262,12 @@ class MediumLevelILBasicBlockList(BasicBlockList): def __repr__(self): return f"" + @overload + def __getitem__(self, i: int) -> 'mediumlevelil.MediumLevelILBasicBlock': ... + + @overload + def __getitem__(self, i: slice) -> List['mediumlevelil.MediumLevelILBasicBlock']: ... + def __getitem__( self, i: Union[int, slice] ) -> Union['mediumlevelil.MediumLevelILBasicBlock', List['mediumlevelil.MediumLevelILBasicBlock']]: @@ -263,6 +281,12 @@ class HighLevelILBasicBlockList(BasicBlockList): def __repr__(self): return f"" + @overload + def __getitem__(self, i: int) -> 'highlevelil.HighLevelILBasicBlock': ... + + @overload + def __getitem__(self, i: slice) -> List['highlevelil.HighLevelILBasicBlock']: ... + def __getitem__( self, i: Union[int, slice] ) -> Union['highlevelil.HighLevelILBasicBlock', List['highlevelil.HighLevelILBasicBlock']]: @@ -304,10 +328,15 @@ def __next__(self) -> Tuple['architecture.Architecture', int, 'binaryview.Tag']: self._n += 1 return arch, address, binaryview.Tag(core_tag) + @overload + def __getitem__(self, i: int) -> Tuple['architecture.Architecture', int, 'binaryview.Tag']: ... + + @overload + def __getitem__(self, i: slice) -> List[Tuple['architecture.Architecture', int, 'binaryview.Tag']]: ... + def __getitem__( self, i: Union[int, slice] - ) -> Union[Tuple['architecture.Architecture', int, 'binaryview.Tag'], List[Tuple['architecture.Architecture', int, - 'binaryview.Tag']]]: + ) -> Union[Tuple['architecture.Architecture', int, 'binaryview.Tag'], List[Tuple['architecture.Architecture', int, 'binaryview.Tag']]]: if isinstance(i, int): if i < 0: i = len(self) + i @@ -400,7 +429,13 @@ def __ge__(self, other: 'Function') -> bool: def __hash__(self): return hash((self.start, self.arch, self.platform)) - def __getitem__(self, i) -> Union['basicblock.BasicBlock', List['basicblock.BasicBlock']]: + @overload + def __getitem__(self, i: int) -> 'basicblock.BasicBlock': ... + + @overload + def __getitem__(self, i: slice) -> List['basicblock.BasicBlock']: ... + + def __getitem__(self, i: Union[int, slice]) -> Union['basicblock.BasicBlock', List['basicblock.BasicBlock']]: return self.basic_blocks[i] def __iter__(self) -> Generator['basicblock.BasicBlock', None, None]: diff --git a/python/highlevelil.py b/python/highlevelil.py index 2b4d9153bf..afdc2b2ec2 100644 --- a/python/highlevelil.py +++ b/python/highlevelil.py @@ -20,7 +20,7 @@ import ctypes import struct -from typing import Optional, Generator, List, Union, NewType, Tuple, ClassVar, Mapping, Set, Callable, Any, Iterator +from typing import Optional, Generator, List, Union, NewType, Tuple, ClassVar, Mapping, Set, Callable, Any, Iterator, overload from dataclasses import dataclass from enum import Enum @@ -798,10 +798,11 @@ def traverse(self, cb: Callable[['HighLevelILInstruction', Any], Any], *args: An :Example: >>> def get_constant_less_than_value(inst: HighLevelILInstruction, value: int) -> int: - >>> if isinstance(inst, Constant) and inst.constant < value: - >>> return inst.constant + ... if isinstance(inst, Constant) and inst.constant < value: + ... return inst.constant >>> - >>> list(inst.traverse(get_constant_less_than_value, 10)) + >>> for result in inst.traverse(get_constant_less_than_value, 10): + ... print(f"Found a constant {result} < 10 in {repr(inst)}") """ if (result := cb(self, *args, **kwargs)) is not None: yield result @@ -2556,7 +2557,7 @@ def get_basic_block_at(self, index: int) -> Optional['basicblock.BasicBlock']: def traverse(self, cb: Callable[['HighLevelILInstruction', Any], Any], *args: Any, **kwargs: Any) -> Iterator[Any]: """ - ``traverse`` iterates through all the instructions in the HighLevelILInstruction and calls the callback function for + ``traverse`` iterates through all the instructions in the HighLevelILFunction and calls the callback function for each instruction and sub-instruction. See the `Developer Docs `_ for more examples. :param Callable[[HighLevelILInstruction, Any], Any] cb: The callback function to call for each node in the HighLevelILInstruction @@ -2572,7 +2573,8 @@ def traverse(self, cb: Callable[['HighLevelILInstruction', Any], Any], *args: An ... case Localcall(dest=Constant(constant=c), params=[_, _, p]) if c == target and not isinstance(p, Constant): ... return i >>> target_address = bv.get_symbol_by_raw_name('_memcpy').address - >>> list(current_il_function.traverse(find_non_constant_memcpy, target_address)) + >>> for result in current_il_function.traverse(find_non_constant_memcpy, target_address): + ... print(f"Found suspicious memcpy: {repr(i)}") """ root = self.root if root is None: @@ -3089,7 +3091,13 @@ def __iter__(self) -> Generator[HighLevelILInstruction, None, None]: for idx in range(self.start, self.end): yield self.il_function[idx] - def __getitem__(self, idx) -> Union[List[HighLevelILInstruction], HighLevelILInstruction]: + @overload + def __getitem__(self, idx: int) -> 'HighLevelILInstruction': ... + + @overload + def __getitem__(self, idx: slice) -> List['HighLevelILInstruction']: ... + + def __getitem__(self, idx: Union[int, slice]) -> Union[List[HighLevelILInstruction], HighLevelILInstruction]: size = self.end - self.start if isinstance(idx, slice): return [self[index] for index in range(*idx.indices(size))] # type: ignore diff --git a/python/interaction.py b/python/interaction.py index c767af7ce5..c3bfb3a281 100644 --- a/python/interaction.py +++ b/python/interaction.py @@ -1062,8 +1062,8 @@ def show_plain_text_report(title, contents): .. note:: This API functions differently on the command-line vs the UI. In the UI, a pop-up is used. On the command-line, \ a simple text prompt is used. - :param str title: title to display in the UI pop-up - :param str contents: plaintext contents to display + :param str title: Title to display in the tab + :param str contents: Plaintext contents to display :rtype: None :Example: >>> show_plain_text_report("title", "contents") @@ -1081,6 +1081,7 @@ def show_markdown_report(title, contents, plaintext=""): .. note:: This API function differently on the command-line vs the UI. In the UI a pop-up is used. On the command-line \ a simple text prompt is used. + :param str title: title to display in the tab :param str contents: markdown contents to display :param str plaintext: Plain text version to display (used on the command-line) :rtype: None @@ -1097,6 +1098,7 @@ def show_html_report(title, contents, plaintext=""): applications. This API doesn't support hyperlinking into the BinaryView, use the :py:meth:`BinaryView.show_html_report` \ API if hyperlinking is needed. + :param str title: Title to display in the tab :param str contents: HTML contents to display :param str plaintext: Plain text version to display (used on the command-line) :rtype: None @@ -1115,6 +1117,7 @@ def show_graph_report(title, graph): .. note:: This API function will have no effect outside the UI. + :param str title: Title to display in the tab :param FlowGraph graph: Flow graph to display :rtype: None """ @@ -1144,9 +1147,9 @@ def get_text_line_input(prompt, title): .. note:: This API function differently on the command-line vs the UI. In the UI a pop-up is used. On the command-line \ a simple text prompt is used. - :param str prompt: String to prompt with. - :param str title: Title of the window when executed in the UI. - :rtype: str containing the input without trailing newline character. + :param str prompt: String to prompt with + :param str title: Title of the window when executed in the UI + :rtype: str containing the input without trailing newline character :Example: >>> get_text_line_input("PROMPT>", "getinfo") PROMPT> Input! @@ -1167,9 +1170,9 @@ def get_int_input(prompt, title): .. note:: This API function differently on the command-line vs the UI. In the UI a pop-up is used. On the command-line \ a simple text prompt is used. - :param str prompt: String to prompt with. - :param str title: Title of the window when executed in the UI. - :rtype: integer value input by the user. + :param str prompt: String to prompt with + :param str title: Title of the window when executed in the UI + :rtype: integer value input by the user :Example: >>> get_int_input("PROMPT>", "getinfo") PROMPT> 10 diff --git a/python/lowlevelil.py b/python/lowlevelil.py index 0258611390..e3c3ccab88 100644 --- a/python/lowlevelil.py +++ b/python/lowlevelil.py @@ -20,7 +20,7 @@ import ctypes import struct -from typing import Generator, List, Optional, Dict, Union, Tuple, NewType, ClassVar, Set, Callable, Any, Iterator +from typing import Generator, List, Optional, Dict, Union, Tuple, NewType, ClassVar, Set, Callable, Any, Iterator, overload from dataclasses import dataclass # Binary Ninja components @@ -5503,7 +5503,13 @@ def __iter__(self) -> Generator['LowLevelILInstruction', None, None]: for idx in range(self.start, self.end): yield self._il_function[idx] - def __getitem__(self, idx): + @overload + def __getitem__(self, idx: int) -> 'LowLevelILInstruction': ... + + @overload + def __getitem__(self, idx: slice) -> List['LowLevelILInstruction']: ... + + def __getitem__(self, idx: Union[int, slice]) -> Union['LowLevelILInstruction', List['LowLevelILInstruction']]: size = self.end - self.start if isinstance(idx, slice): return [self[index] for index in range(*idx.indices(size))] diff --git a/python/mainthread.py b/python/mainthread.py index 14698b6a7a..9064a6663c 100644 --- a/python/mainthread.py +++ b/python/mainthread.py @@ -18,6 +18,41 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +""" +.. py:module:: mainthread + +This module provides two ways to execute "jobs": + +1. On the Binary Ninja main thread (the UI event thread when running in the GUI application): + * :py:func:`.execute_on_main_thread` + * :py:func:`.execute_on_main_thread_and_wait` +2. On a worker thread + +Any manipulation of the GUI should be performed on the main thread, but any +non-GUI work is generally better to be performed using a worker. This is +especially true for any longer-running work, as the user interface will +be unable to update itself while a job is executing on the main thread. + +There are three worker queues, in order of decreasing priority: + + 1. The Interactive Queue (:py:func:`.worker_interactive_enqueue`) + 2. The Priority Queue (:py:func:`.worker_priority_enqueue`) + 3. The Worker Queue (:py:func:`.worker_enqueue`) + +All of these queues are serviced by the same pool of worker threads. The +difference between the queues is basically one of priority: one queue must +be empty of jobs before a worker thread will execute a job from a lower +priority queue. + +The default maximum number of concurrent worker threads is controlled by the +`analysis.limits.workerThreadCount` setting but can be adjusted at runtime via +:py:func:`.set_worker_thread_count`. + +The worker threads are native threads, managed by the Binary Ninja core. If +more control over the thread is required, consider using the +:py:class:`~binaryninja.plugin.BackgroundTaskThread` class. +""" + # Binary Ninja components from . import _binaryninjacore as core from . import scriptingprovider diff --git a/python/mediumlevelil.py b/python/mediumlevelil.py index e857920f93..291da4b9d3 100644 --- a/python/mediumlevelil.py +++ b/python/mediumlevelil.py @@ -21,7 +21,7 @@ import ctypes import struct from typing import (Optional, List, Union, Mapping, - Generator, NewType, Tuple, ClassVar, Dict, Set, Callable, Any, Iterator) + Generator, NewType, Tuple, ClassVar, Dict, Set, Callable, Any, Iterator, overload) from dataclasses import dataclass from . import deprecation @@ -3957,7 +3957,13 @@ def __iter__(self): for idx in range(self.start, self.end): yield self._il_function[idx] - def __getitem__(self, idx) -> Union[List['MediumLevelILInstruction'], 'MediumLevelILInstruction']: + @overload + def __getitem__(self, idx: int) -> 'MediumLevelILInstruction': ... + + @overload + def __getitem__(self, idx: slice) -> List['MediumLevelILInstruction']: ... + + def __getitem__(self, idx: Union[int, slice]) -> Union[List['MediumLevelILInstruction'], 'MediumLevelILInstruction']: size = self.end - self.start if isinstance(idx, slice): return [self[index] for index in range(*idx.indices(size))] # type: ignore diff --git a/python/plugin.py b/python/plugin.py index e68e30d5df..5a92215b09 100644 --- a/python/plugin.py +++ b/python/plugin.py @@ -970,6 +970,18 @@ def __iter__(self): class BackgroundTask(metaclass=_BackgroundTaskMetaclass): + """ + The ``BackgroundTask`` class provides a mechanism for reporting progress of + an optionally cancelable task to the user via the status bar in the UI. + If ``can_cancel`` is is `True`, then the task can be cancelled either + programmatically (via :py:meth:`.cancel`) or by the user via the UI. + + Note this class does not provide a means to execute a task, which is + available via the :py:class:`.BackgroundTaskThread` class. + + :param initial_progress_text: text description of the task to display in the status bar in the UI, defaults to `""` + :param can_cancel: whether to enable cancelation of the task, defaults to `False` + """ def __init__(self, initial_progress_text="", can_cancel=False, handle=None): if handle is None: self.handle = core.BNBeginBackgroundTask(initial_progress_text, can_cancel) @@ -1022,6 +1034,15 @@ def cancel(self): class BackgroundTaskThread(BackgroundTask): + """ + The ``BackgroundTaskThread`` class provides an all-in-one solution for executing a :py:class:`.BackgroundTask` + in a thread. + + See the :py:class:`.BackgroundTask` for additional information. + + :param initial_progress_text: text description of the task to display in the status bar in the UI, defaults to `""` + :param can_cancel: whether to enable cancelation of the task, defaults to `False` + """ def __init__(self, initial_progress_text: str = "", can_cancel: bool = False): class _Thread(threading.Thread): def __init__(self, task: 'BackgroundTaskThread'): diff --git a/python/typearchive.py b/python/typearchive.py index 26b15b5f14..7b264056ee 100644 --- a/python/typearchive.py +++ b/python/typearchive.py @@ -39,13 +39,20 @@ class TypeArchive: Type Archives are a collection of types which can be shared between different analysis sessions and are backed by a database file on disk. Their types can be modified, and a history of previous versions of types is stored in snapshots in the archive. + + + Internal-use constructor. API users will want to use :py:meth:`.TypeArchive.open` + or :py:meth:`.TypeArchive.create` instead to get an instance of a TypeArchive. + + :param handle: Handle pointer (Internal use only.) """ def __init__(self, handle: core.BNTypeArchiveHandle): """ Internal-use constructor. API users will want to use `:py:func:TypeArchive.open` or `:py:func:TypeArchive.create` instead to get an instance of a TypeArchive. - :param handle: + + :param handle: Handle pointer (Internal use only.) """ binaryninja._init_plugins() self.handle: core.BNTypeArchiveHandle = core.handle_of_type(handle, core.BNTypeArchive) @@ -70,6 +77,7 @@ def __eq__(self, other): def open(path: str) -> Optional['TypeArchive']: """ Open the Type Archive at the given path, if it exists. + :param path: Path to Type Archive file :return: Type Archive, or None if it could not be loaded. """ @@ -82,6 +90,7 @@ def open(path: str) -> Optional['TypeArchive']: def create(path: str, platform: 'platform.Platform') -> Optional['TypeArchive']: """ Create a Type Archive at the given path. + :param path: Path to Type Archive file :param platform: Relevant platform for types in the archive :return: Type Archive, or None if it could not be created. @@ -95,6 +104,7 @@ def create(path: str, platform: 'platform.Platform') -> Optional['TypeArchive']: def lookup_by_id(id: str) -> Optional['TypeArchive']: """ Get a reference to the Type Archive with the known id, if one exists. + :param id: Type Archive id :return: Type archive, or None if it could not be found. """ @@ -107,6 +117,7 @@ def lookup_by_id(id: str) -> Optional['TypeArchive']: def path(self) -> Optional[str]: """ Get the path to the Type Archive's file + :return: File path """ return core.BNGetTypeArchivePath(self.handle) @@ -115,6 +126,7 @@ def path(self) -> Optional[str]: def id(self) -> Optional[str]: """ Get the guid for a Type Archive + :return: Guid string """ return core.BNGetTypeArchiveId(self.handle) @@ -123,6 +135,7 @@ def id(self) -> Optional[str]: def platform(self) -> 'platform.Platform': """ Get the associated Platform for a Type Archive + :return: Platform object """ handle = core.BNGetTypeArchivePlatform(self.handle) @@ -133,6 +146,7 @@ def platform(self) -> 'platform.Platform': def current_snapshot_id(self) -> str: """ Get the id of the current snapshot in the type archive + :return: Snapshot id """ result = core.BNGetTypeArchiveCurrentSnapshotId(self.handle) @@ -144,6 +158,7 @@ def current_snapshot_id(self) -> str: def current_snapshot_id(self, value: str): """ Revert the type archive's current snapshot to the given snapshot + :param value: Snapshot id """ core.BNSetTypeArchiveCurrentSnapshot(self.handle, value) @@ -152,6 +167,7 @@ def current_snapshot_id(self, value: str): def all_snapshot_ids(self) -> List[str]: """ Get a list of every snapshot's id + :return: All ids (including the empty first snapshot) """ count = ctypes.c_ulonglong(0) @@ -169,6 +185,7 @@ def all_snapshot_ids(self) -> List[str]: def get_snapshot_parent_ids(self, snapshot: str) -> Optional[List[str]]: """ Get the ids of the parents to the given snapshot + :param snapshot: Child snapshot id :return: Parent snapshot ids, or empty list if the snapshot is a root """ @@ -187,6 +204,7 @@ def get_snapshot_parent_ids(self, snapshot: str) -> Optional[List[str]]: def get_snapshot_child_ids(self, snapshot: str) -> Optional[List[str]]: """ Get the ids of the children to the given snapshot + :param snapshot: Parent snapshot id :return: Child snapshot ids, or empty list if the snapshot is a leaf """ @@ -207,6 +225,7 @@ def add_type(self, name: '_types.QualifiedNameType', type: '_types.Type') -> Non Add named types to the type archive. Type must have all dependant named types added prior to being added, or this function will fail. If the type already exists, it will be overwritten. + :param name: Name of new type :param type: Definition of new type """ @@ -217,6 +236,7 @@ def add_types(self, new_types: List[Tuple['_types.QualifiedNameType', '_types.Ty Add named types to the type archive. Types must have all dependant named types prior to being added, or included in the list, or this function will fail. Types already existing with any added names will be overwritten. + :param new_types: Names and definitions of new types """ api_types = (core.BNQualifiedNameAndType * len(new_types))() @@ -237,6 +257,7 @@ def add_types(self, new_types: List[Tuple['_types.QualifiedNameType', '_types.Ty def rename_type(self, old_name: '_types.QualifiedNameType', new_name: '_types.QualifiedNameType') -> None: """ Change the name of an existing type in the type archive. + :param old_name: Old type name in archive :param new_name: New type name """ @@ -246,6 +267,7 @@ def rename_type(self, old_name: '_types.QualifiedNameType', new_name: '_types.Qu def rename_type_by_id(self, id: str, new_name: '_types.QualifiedNameType') -> None: """ Change the name of an existing type in the type archive. + :param id: Old id of type in archive :param new_name: New type name """ @@ -257,6 +279,7 @@ def rename_type_by_id(self, id: str, new_name: '_types.QualifiedNameType') -> No def delete_type(self, name: '_types.QualifiedNameType') -> None: """ Delete an existing type in the type archive. + :param name: Type name """ id = self.get_type_id(name) @@ -267,6 +290,7 @@ def delete_type(self, name: '_types.QualifiedNameType') -> None: def delete_type_by_id(self, id: str) -> None: """ Delete an existing type in the type archive. + :param id: Type id """ if not core.BNDeleteTypeArchiveType(self.handle, id): @@ -275,6 +299,7 @@ def delete_type_by_id(self, id: str) -> None: def get_type_by_name(self, name: '_types.QualifiedNameType', snapshot: Optional[str] = None) -> Optional[_types.Type]: """ Retrieve a stored type in the archive + :param name: Type name :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: Type, if it exists. Otherwise None @@ -291,6 +316,7 @@ def get_type_by_name(self, name: '_types.QualifiedNameType', snapshot: Optional[ def get_type_by_id(self, id: str, snapshot: Optional[str] = None) -> Optional[_types.Type]: """ Retrieve a stored type in the archive by id + :param id: Type id :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: Type, if it exists. Otherwise None @@ -306,6 +332,7 @@ def get_type_by_id(self, id: str, snapshot: Optional[str] = None) -> Optional[_t def get_type_name_by_id(self, id: str, snapshot: Optional[str] = None) -> Optional['_types.QualifiedName']: """ Retrieve a type's name by its id + :param id: Type id :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: Type name, if it exists. Otherwise None @@ -324,6 +351,7 @@ def get_type_name_by_id(self, id: str, snapshot: Optional[str] = None) -> Option def get_type_id(self, name: '_types.QualifiedNameType', snapshot: Optional[str] = None) -> Optional[str]: """ Retrieve a type's id by its name + :param name: Type name :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: Type id, if it exists. Otherwise None @@ -343,6 +371,7 @@ def get_type_id(self, name: '_types.QualifiedNameType', snapshot: Optional[str] def types(self) -> Dict[_types.QualifiedName, _types.Type]: """ Retrieve all stored types in the archive at the current snapshot + :return: Map of all types, by name """ return self.get_types() @@ -351,6 +380,7 @@ def types(self) -> Dict[_types.QualifiedName, _types.Type]: def types_and_ids(self) -> Dict[str, Tuple[_types.QualifiedName, _types.Type]]: """ Retrieve all stored types in the archive at the current snapshot + :return: Map of type id to type name and definition """ return self.get_types_and_ids() @@ -358,6 +388,7 @@ def types_and_ids(self) -> Dict[str, Tuple[_types.QualifiedName, _types.Type]]: def get_types(self, snapshot: Optional[str] = None) -> Dict[_types.QualifiedName, _types.Type]: """ Retrieve all stored types in the archive at a snapshot + :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: Map of all types, by name """ @@ -369,6 +400,7 @@ def get_types(self, snapshot: Optional[str] = None) -> Dict[_types.QualifiedName def get_types_and_ids(self, snapshot: Optional[str] = None) -> Dict[str, Tuple[_types.QualifiedName, _types.Type]]: """ Retrieve all stored types in the archive at a snapshot + :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: Map of type id to type name and definition """ @@ -391,6 +423,7 @@ def get_types_and_ids(self, snapshot: Optional[str] = None) -> Dict[str, Tuple[_ def type_ids(self) -> List[str]: """ Get a list of all types' ids in the archive at the current snapshot + :return: All type ids """ return self.get_type_ids() @@ -398,6 +431,7 @@ def type_ids(self) -> List[str]: def get_type_ids(self, snapshot: Optional[str] = None) -> List[str]: """ Get a list of all types' ids in the archive at a snapshot + :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: All type ids """ @@ -425,6 +459,7 @@ def type_names(self) -> List['_types.QualifiedName']: def get_type_names(self, snapshot: Optional[str] = None) -> List['_types.QualifiedName']: """ Get a list of all types' names in the archive at a snapshot + :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: All type names """ @@ -445,6 +480,7 @@ def get_type_names(self, snapshot: Optional[str] = None) -> List['_types.Qualifi def type_names_and_ids(self) -> Dict[str, '_types.QualifiedName']: """ Get a list of all types' names and ids in the archive at the current snapshot + :return: Mapping of all type ids to names """ return self.get_type_names_and_ids() @@ -452,6 +488,7 @@ def type_names_and_ids(self) -> Dict[str, '_types.QualifiedName']: def get_type_names_and_ids(self, snapshot: Optional[str] = None) -> Dict[str, '_types.QualifiedName']: """ Get a list of all types' names and ids in the archive at a current snapshot + :param snapshot: Snapshot id to search for types, or None to search the latest snapshot :return: Mapping of all type ids to names """ @@ -475,6 +512,7 @@ def get_type_names_and_ids(self, snapshot: Optional[str] = None) -> Dict[str, '_ def get_outgoing_direct_references(self, id: str, snapshot: Optional[str] = None) -> List[str]: """ Get all types a given type references directly + :param id: Source type id :param snapshot: Snapshot id to search for types, or empty string to search the latest snapshot :return: Target type ids @@ -497,6 +535,7 @@ def get_outgoing_direct_references(self, id: str, snapshot: Optional[str] = None def get_outgoing_recursive_references(self, id: str, snapshot: Optional[str] = None) -> List[str]: """ Get all types a given type references, and any types that the referenced types reference + :param id: Source type id :param snapshot: Snapshot id to search for types, or empty string to search the latest snapshot :return: Target type ids @@ -519,6 +558,7 @@ def get_outgoing_recursive_references(self, id: str, snapshot: Optional[str] = N def get_incoming_direct_references(self, id: str, snapshot: Optional[str] = None) -> List[str]: """ Get all types that reference a given type + :param id: Target type id :param snapshot: Snapshot id to search for types, or empty string to search the latest snapshot :return: Source type ids @@ -541,6 +581,7 @@ def get_incoming_direct_references(self, id: str, snapshot: Optional[str] = None def get_incoming_recursive_references(self, id: str, snapshot: Optional[str] = None) -> List[str]: """ Get all types that reference a given type, and all types that reference them, recursively + :param id: Target type id :param snapshot: Snapshot id to search for types, or empty string to search the latest snapshot :return: Source type ids @@ -563,6 +604,7 @@ def get_incoming_recursive_references(self, id: str, snapshot: Optional[str] = N def query_metadata(self, key: str) -> Optional['metadata.MetadataValueType']: """ Look up a metadata entry in the archive + :param string key: key to query :rtype: Metadata associated with the key, if it exists. Otherwise, None :Example: @@ -580,6 +622,7 @@ def query_metadata(self, key: str) -> Optional['metadata.MetadataValueType']: def store_metadata(self, key: str, md: 'metadata.MetadataValueType') -> None: """ Store a key/value pair in the archive's metadata storage + :param string key: key value to associate the Metadata object with :param Varies md: object to store. :Example: @@ -597,6 +640,7 @@ def store_metadata(self, key: str, md: 'metadata.MetadataValueType') -> None: def remove_metadata(self, key: str) -> None: """ Delete a given metadata entry in the archive + :param string key: key associated with metadata :Example: @@ -609,6 +653,7 @@ def remove_metadata(self, key: str) -> None: def serialize_snapshot(self, snapshot: str) -> 'databuffer.DataBuffer': """ Turn a given snapshot into a data stream + :param snapshot: Snapshot id :return: Buffer containing serialized snapshot data """ @@ -620,6 +665,7 @@ def serialize_snapshot(self, snapshot: str) -> 'databuffer.DataBuffer': def deserialize_snapshot(self, data: 'databuffer.DataBufferInputType') -> str: """ Take a serialized snapshot data stream and create a new snapshot from it + :param data: Snapshot data :return: String of created snapshot id """ @@ -632,6 +678,7 @@ def deserialize_snapshot(self, data: 'databuffer.DataBufferInputType') -> str: def register_notification(self, notify: 'TypeArchiveNotification') -> None: """ Register a notification listener + :param notify: Object to receive notifications """ cb = TypeArchiveNotificationCallbacks(self, notify) @@ -641,6 +688,7 @@ def register_notification(self, notify: 'TypeArchiveNotification') -> None: def unregister_notification(self, notify: 'TypeArchiveNotification') -> None: """ Unregister a notification listener + :param notify: Object to no longer receive notifications """ if notify in self._notifications: @@ -660,6 +708,7 @@ def __init__(self): def type_added(self, archive: 'TypeArchive', id: str, definition: '_types.Type') -> None: """ Called when a type is added to the archive + :param archive: Source Type archive :param id: Id of type added :param definition: Definition of type @@ -669,6 +718,7 @@ def type_added(self, archive: 'TypeArchive', id: str, definition: '_types.Type') def type_updated(self, archive: 'TypeArchive', id: str, old_definition: '_types.Type', new_definition: '_types.Type') -> None: """ Called when a type in the archive is updated to a new definition + :param archive: Source Type archive :param id: Id of type :param old_definition: Previous definition @@ -679,6 +729,7 @@ def type_updated(self, archive: 'TypeArchive', id: str, old_definition: '_types. def type_renamed(self, archive: 'TypeArchive', id: str, old_name: '_types.QualifiedName', new_name: '_types.QualifiedName') -> None: """ Called when a type in the archive is renamed + :param archive: Source Type archive :param id: Type id :param old_name: Previous name @@ -689,6 +740,7 @@ def type_renamed(self, archive: 'TypeArchive', id: str, old_name: '_types.Qualif def type_deleted(self, archive: 'TypeArchive', id: str, definition: '_types.Type') -> None: """ Called when a type in the archive is deleted from the archive + :param archive: Source Type archive :param id: Id of type deleted :param definition: Definition of type deleted diff --git a/python/typecontainer.py b/python/typecontainer.py index cc1b328849..0b3221b829 100644 --- a/python/typecontainer.py +++ b/python/typecontainer.py @@ -37,10 +37,23 @@ class TypeContainer: """ A ``TypeContainer`` is a generic interface to access various Binary Ninja models that contain types. Types are stored with both a unique id and a unique name. + + The ``TypeContainer`` class should not generally be instantiated directly. Instances + can be retrieved from the following properties and methods in the API: + + * :py:meth:`.BinaryView.type_container` + * :py:meth:`.BinaryView.auto_type_container` + * :py:meth:`.BinaryView.user_type_container` + * :py:meth:`.Platform.type_container` + * :py:meth:`.TypeLibrary.type_container` + * :py:meth:`.DebugInfo.get_type_container` + + :param handle: Handle pointer (Internal use only.) """ def __init__(self, handle: core.BNTypeContainerHandle): """ Construct a Type Container, internal use only + :param handle: Handle pointer """ binaryninja._init_plugins() @@ -147,6 +160,7 @@ def rename_type(self, type_id: str, new_name: '_types.QualifiedNameType') -> boo """ Rename a type in the Type Container. All references to this type will be updated (by id) to use the new name. + :param type_id: Id of type to update :param new_name: New name for the type :return: True if successful @@ -157,6 +171,7 @@ def delete_type(self, type_id: str) -> bool: """ Delete a type in the Type Container. Behavior of references to this type is not specified and you may end up with broken references if any still exist. + :param type_id: Id of type to delete :return: True if successful """ @@ -166,6 +181,7 @@ def get_type_id(self, type_name: '_types.QualifiedNameType') -> Optional[str]: """ Get the unique id of the type in the Type Container with the given name. If no type with that name exists, returns None. + :param type_name: Name of type :return: Type id, if exists, else, None """ @@ -178,6 +194,7 @@ def get_type_name(self, type_id: str) -> Optional['_types.QualifiedName']: """ Get the unique name of the type in the Type Container with the given id. If no type with that id exists, returns None. + :param type_id: Id of type :return: Type name, if exists, else, None """ @@ -192,6 +209,7 @@ def get_type_by_id(self, type_id: str) -> Optional['_types.Type']: """ Get the definition of the type in the Type Container with the given id. If no type with that id exists, returns None. + :param type_id: Id of type :return: Type object, if exists, else, None """ @@ -232,6 +250,7 @@ def get_type_by_name(self, type_name: '_types.QualifiedNameType') -> Optional['_ """ Get the definition of the type in the Type Container with the given name. If no type with that name exists, returns None. + :param type_name: Name of type :return: Type object, if exists, else, None """ diff --git a/python/websocketprovider.py b/python/websocketprovider.py index 93e88eb868..420f43c66d 100644 --- a/python/websocketprovider.py +++ b/python/websocketprovider.py @@ -46,6 +46,9 @@ def to_bytes(field): class WebsocketClient(object): + """ + This class implements a websocket client. See :py:func:`~WebsocketClient.connect` for more details. + """ _registered_clients = [] def __init__(self, provider, handle=None): @@ -157,12 +160,20 @@ def connect(self, url, headers=None, on_connected=nop, on_disconnected=nop, on_e :param function(bytes) -> bool on_data: function to call when data is read from the websocket :return: if the connection has started, but not necessarily if it succeeded :rtype: bool + + :Example: + >>> provider = list(WebsocketProvider)[0] + >>> client = provider.create_instance() + >>> client.connect("ws://localhost:8080", {}) + True """ if self._connected: raise RuntimeError("Cannot use connect() twice on the same WebsocketClient") self._connected = True + if headers is None: + headers = {} header_keys = (ctypes.c_char_p * len(headers))() header_values = (ctypes.c_char_p * len(headers))() for (i, item) in enumerate(headers.items()): diff --git a/rust/Cargo.lock b/rust/Cargo.lock index b57255e082..e45d2da2d9 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -77,6 +77,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + [[package]] name = "autocfg" version = "1.1.0" @@ -143,6 +149,18 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "cab" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae6b4de23c7d39c0631fd3cc952d87951c86c75a13812d7247cb7a896e7b3551" +dependencies = [ + "byteorder", + "flate2", + "lzxd", + "time", +] + [[package]] name = "cexpr" version = "0.6.0" @@ -354,6 +372,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -383,7 +407,7 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" dependencies = [ - "fallible-iterator", + "fallible-iterator 0.3.0", "indexmap", "stable_deref_trait", ] @@ -442,6 +466,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.10" @@ -488,6 +521,12 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +[[package]] +name = "lzxd" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784462f20dddd9dfdb45de963fa4ad4a288cb10a7889ac5d2c34fb6481c6b213" + [[package]] name = "memchr" version = "2.7.1" @@ -631,6 +670,43 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "pdb" +version = "0.8.0" +dependencies = [ + "fallible-iterator 0.2.0", + "scroll", + "uuid", +] + +[[package]] +name = "pdb-import-plugin" +version = "0.1.0" +dependencies = [ + "anyhow", + "binaryninja", + "cab", + "home", + "itertools", + "log", + "pdb", + "regex", +] + +[[package]] +name = "pdb-import-plugin-static" +version = "0.1.0" +dependencies = [ + "anyhow", + "binaryninja", + "cab", + "home", + "itertools", + "log", + "pdb", + "regex", +] + [[package]] name = "peeking_take_while" version = "0.1.2" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 8e174e68e7..b827b9bb56 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -14,6 +14,10 @@ libc = "0.2" rayon = { version = "1.8", optional = true } binaryninjacore-sys = { path = "binaryninjacore-sys" } +[patch.crates-io] +# Patched pdb crate to implement some extra structures +pdb = { path = "./examples/pdb-ng/pdb-0.8.0-patched" } + [workspace] members = [ "examples/basic_script", @@ -28,6 +32,8 @@ members = [ "examples/mlil_lifter", "examples/hlil_visitor", "examples/hlil_lifter", + "examples/pdb-ng", + "examples/pdb-ng/demo", "examples/template" ] diff --git a/rust/examples/dwarf/dwarf_export/src/lib.rs b/rust/examples/dwarf/dwarf_export/src/lib.rs index 7143f6dd91..ef71f1ae73 100644 --- a/rust/examples/dwarf/dwarf_export/src/lib.rs +++ b/rust/examples/dwarf/dwarf_export/src/lib.rs @@ -551,7 +551,7 @@ fn export_data_vars( dwarf.unit.get_mut(var_die_uid).set( gimli::DW_AT_name, AttributeValue::String( - format!("data_{:x}", data_variable.address) + format!("data_{:x}", data_variable.address()) .as_bytes() .to_vec(), ), @@ -559,15 +559,15 @@ fn export_data_vars( } let mut variable_location = Expression::new(); - variable_location.op_addr(Address::Constant(data_variable.address)); + variable_location.op_addr(Address::Constant(data_variable.address())); dwarf.unit.get_mut(var_die_uid).set( gimli::DW_AT_location, AttributeValue::Exprloc(variable_location), ); if let Some(target_die_uid) = export_type( - format!("{}", data_variable.t.contents), - data_variable.t.contents.as_ref(), + format!("{}", data_variable.t()), + data_variable.t(), bv, defined_types, dwarf, diff --git a/rust/examples/dwarf/shared/src/lib.rs b/rust/examples/dwarf/shared/src/lib.rs index 718dcb8cd3..7712ff3b3f 100644 --- a/rust/examples/dwarf/shared/src/lib.rs +++ b/rust/examples/dwarf/shared/src/lib.rs @@ -88,11 +88,11 @@ pub fn create_section_reader<'a, Endian: 'a + Endianity>( if let Some(data_var) = view .data_variables() .iter() - .find(|var| var.address == symbol.address()) + .find(|var| var.address() == symbol.address()) { // TODO : This should eventually be wrapped by some DataView sorta thingy thing, like how python does it - let data_type = data_var.type_with_confidence().contents; - let data = view.read_vec(data_var.address, data_type.width() as usize); + let data_type = data_var.t(); + let data = view.read_vec(data_var.address(), data_type.width() as usize); let element_type = data_type.element_type().unwrap().contents; if let Some(current_section_header) = data diff --git a/rust/examples/pdb-ng/.gitignore b/rust/examples/pdb-ng/.gitignore new file mode 100644 index 0000000000..eb5a316cbd --- /dev/null +++ b/rust/examples/pdb-ng/.gitignore @@ -0,0 +1 @@ +target diff --git a/rust/examples/pdb-ng/CMakeLists.txt b/rust/examples/pdb-ng/CMakeLists.txt new file mode 100644 index 0000000000..c88d4125d8 --- /dev/null +++ b/rust/examples/pdb-ng/CMakeLists.txt @@ -0,0 +1,138 @@ +cmake_minimum_required(VERSION 3.9 FATAL_ERROR) + +project(pdb_import_plugin) + +file(GLOB PLUGIN_SOURCES CONFIGURE_DEPENDS + ${PROJECT_SOURCE_DIR}/Cargo.toml + ${PROJECT_SOURCE_DIR}/src/*.rs) + +file(GLOB_RECURSE API_SOURCES CONFIGURE_DEPENDS + ${PROJECT_SOURCE_DIR}/../../../binaryninjacore.h + ${PROJECT_SOURCE_DIR}/../../binaryninjacore-sys/build.rs + ${PROJECT_SOURCE_DIR}/../../binaryninjacore-sys/Cargo.toml + ${PROJECT_SOURCE_DIR}/../../binaryninjacore-sys/src/* + ${PROJECT_SOURCE_DIR}/../../Cargo.toml + ${PROJECT_SOURCE_DIR}/../../src/*.rs) + +if(CMAKE_BUILD_TYPE MATCHES Debug) + set(TARGET_DIR ${PROJECT_BINARY_DIR}/target/debug) + set(CARGO_OPTS --target-dir=${PROJECT_BINARY_DIR}/target) +else() + set(TARGET_DIR ${PROJECT_BINARY_DIR}/target/release) + set(CARGO_OPTS --target-dir=${PROJECT_BINARY_DIR}/target --release) +endif() + +if(FORCE_COLORED_OUTPUT) + set(CARGO_OPTS ${CARGO_OPTS} --color always) +endif() + +if(DEMO) + set(CARGO_FEATURES --features demo --manifest-path ${PROJECT_SOURCE_DIR}/demo/Cargo.toml) + + set(OUTPUT_FILE_NAME ${CMAKE_STATIC_LIBRARY_PREFIX}${PROJECT_NAME}_static${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(OUTPUT_PDB_NAME ${CMAKE_STATIC_LIBRARY_PREFIX}${PROJECT_NAME}.pdb) + set(OUTPUT_FILE_PATH ${CMAKE_BINARY_DIR}/${OUTPUT_FILE_NAME}) + set(OUTPUT_PDB_PATH ${CMAKE_BINARY_DIR}/${OUTPUT_PDB_NAME}) + + set(BINJA_LIB_DIR $) +else() + set(CARGO_FEATURES "") + + set(OUTPUT_FILE_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${PROJECT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(OUTPUT_PDB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${PROJECT_NAME}.pdb) + set(OUTPUT_FILE_PATH ${BN_CORE_PLUGIN_DIR}/${OUTPUT_FILE_NAME}) + set(OUTPUT_PDB_PATH ${BN_CORE_PLUGIN_DIR}/${OUTPUT_PDB_NAME}) + + set(BINJA_LIB_DIR ${BN_INSTALL_BIN_DIR}) +endif() + +add_custom_target(${PROJECT_NAME} ALL DEPENDS ${OUTPUT_FILE_PATH}) +add_dependencies(${PROJECT_NAME} binaryninjaapi) +get_target_property(BN_API_SOURCE_DIR binaryninjaapi SOURCE_DIR) +list(APPEND CMAKE_MODULE_PATH "${BN_API_SOURCE_DIR}/cmake") +find_package(BinaryNinjaCore REQUIRED) + +set_property(TARGET ${PROJECT_NAME} PROPERTY OUTPUT_FILE_PATH ${OUTPUT_FILE_PATH}) + +find_program(RUSTUP_PATH rustup REQUIRED HINTS ~/.cargo/bin) +set(RUSTUP_COMMAND ${RUSTUP_PATH} run ${CARGO_STABLE_VERSION} cargo) + +if(APPLE) + if(UNIVERSAL) + if(CMAKE_BUILD_TYPE MATCHES Debug) + set(AARCH64_LIB_PATH ${PROJECT_BINARY_DIR}/target/aarch64-apple-darwin/debug/${OUTPUT_FILE_NAME}) + set(X86_64_LIB_PATH ${PROJECT_BINARY_DIR}/target/x86_64-apple-darwin/debug/${OUTPUT_FILE_NAME}) + else() + set(AARCH64_LIB_PATH ${PROJECT_BINARY_DIR}/target/aarch64-apple-darwin/release/${OUTPUT_FILE_NAME}) + set(X86_64_LIB_PATH ${PROJECT_BINARY_DIR}/target/x86_64-apple-darwin/release/${OUTPUT_FILE_NAME}) + endif() + + add_custom_command( + OUTPUT ${OUTPUT_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E env + MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BINJA_LIB_DIR} + ${RUSTUP_COMMAND} clean --target=aarch64-apple-darwin ${CARGO_OPTS} + COMMAND ${CMAKE_COMMAND} -E env + MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BINJA_LIB_DIR} + ${RUSTUP_COMMAND} clean --target=x86_64-apple-darwin ${CARGO_OPTS} + COMMAND ${CMAKE_COMMAND} -E env + MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BINJA_LIB_DIR} + ${RUSTUP_COMMAND} build --target=aarch64-apple-darwin ${CARGO_OPTS} ${CARGO_FEATURES} + COMMAND ${CMAKE_COMMAND} -E env + MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BINJA_LIB_DIR} + ${RUSTUP_COMMAND} build --target=x86_64-apple-darwin ${CARGO_OPTS} ${CARGO_FEATURES} + COMMAND lipo -create ${AARCH64_LIB_PATH} ${X86_64_LIB_PATH} -output ${OUTPUT_FILE_PATH} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES} + ) + else() + if(CMAKE_BUILD_TYPE MATCHES Debug) + set(LIB_PATH ${PROJECT_BINARY_DIR}/target/debug/${OUTPUT_FILE_NAME}) + else() + set(LIB_PATH ${PROJECT_BINARY_DIR}/target/release/${OUTPUT_FILE_NAME}) + endif() + + add_custom_command( + OUTPUT ${OUTPUT_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E env + MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BINJA_LIB_DIR} + ${RUSTUP_COMMAND} clean ${CARGO_OPTS} + COMMAND ${CMAKE_COMMAND} -E env + MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BINJA_LIB_DIR} + ${RUSTUP_COMMAND} build ${CARGO_OPTS} ${CARGO_FEATURES} + COMMAND ${CMAKE_COMMAND} -E copy ${LIB_PATH} ${OUTPUT_FILE_PATH} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES} + ) + endif() +elseif(WIN32) + if(DEMO) + add_custom_command( + OUTPUT ${OUTPUT_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BINJA_LIB_DIR} ${RUSTUP_COMMAND} clean ${CARGO_OPTS} + COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BINJA_LIB_DIR} ${RUSTUP_COMMAND} build ${CARGO_OPTS} ${CARGO_FEATURES} + COMMAND ${CMAKE_COMMAND} -E copy ${TARGET_DIR}/${OUTPUT_FILE_NAME} ${OUTPUT_FILE_PATH} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES} + ) + else() + add_custom_command( + OUTPUT ${OUTPUT_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BINJA_LIB_DIR} ${RUSTUP_COMMAND} clean ${CARGO_OPTS} + COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BINJA_LIB_DIR} ${RUSTUP_COMMAND} build ${CARGO_OPTS} ${CARGO_FEATURES} + COMMAND ${CMAKE_COMMAND} -E copy ${TARGET_DIR}/${OUTPUT_FILE_NAME} ${OUTPUT_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E copy ${TARGET_DIR}/${OUTPUT_PDB_NAME} ${OUTPUT_PDB_PATH} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES} + ) + endif() +else() + add_custom_command( + OUTPUT ${OUTPUT_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BINJA_LIB_DIR} ${RUSTUP_COMMAND} clean ${CARGO_OPTS} + COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BINJA_LIB_DIR} ${RUSTUP_COMMAND} build ${CARGO_OPTS} ${CARGO_FEATURES} + COMMAND ${CMAKE_COMMAND} -E copy ${TARGET_DIR}/${OUTPUT_FILE_NAME} ${OUTPUT_FILE_PATH} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES} + ) +endif() diff --git a/rust/examples/pdb-ng/Cargo.lock b/rust/examples/pdb-ng/Cargo.lock new file mode 100644 index 0000000000..a820f88bd2 --- /dev/null +++ b/rust/examples/pdb-ng/Cargo.lock @@ -0,0 +1,540 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" + +[[package]] +name = "binaryninja" +version = "0.1.0" +dependencies = [ + "binaryninjacore-sys", + "lazy_static", + "libc", + "log", +] + +[[package]] +name = "binaryninjacore-sys" +version = "0.1.0" +dependencies = [ + "bindgen", +] + +[[package]] +name = "bindgen" +version = "0.68.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cab" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae6b4de23c7d39c0631fd3cc952d87951c86c75a13812d7247cb7a896e7b3551" +dependencies = [ + "byteorder", + "flate2", + "lzxd", + "time", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "either" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "lzxd" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784462f20dddd9dfdb45de963fa4ad4a288cb10a7889ac5d2c34fb6481c6b213" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "pdb" +version = "0.8.0" +dependencies = [ + "fallible-iterator", + "scroll", + "uuid", +] + +[[package]] +name = "pdb-import-plugin" +version = "0.1.0" +dependencies = [ + "anyhow", + "binaryninja", + "cab", + "home", + "itertools", + "log", + "pdb", + "regex", +] + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "prettyplease" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "time" +version = "0.3.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "uuid" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" diff --git a/rust/examples/pdb-ng/Cargo.toml b/rust/examples/pdb-ng/Cargo.toml new file mode 100644 index 0000000000..8636bced89 --- /dev/null +++ b/rust/examples/pdb-ng/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "pdb-import-plugin" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +anyhow = "^1.0" +binaryninja = {path = "../../"} +home = "^0.5.5" +itertools = "^0.11" +log = "^0.4" +pdb = "^0.8" +cab = "^0.4" +regex = "1" + +[features] +demo = [] diff --git a/rust/examples/pdb-ng/demo/Cargo.lock b/rust/examples/pdb-ng/demo/Cargo.lock new file mode 100644 index 0000000000..201c27d81a --- /dev/null +++ b/rust/examples/pdb-ng/demo/Cargo.lock @@ -0,0 +1,555 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + +[[package]] +name = "binaryninja" +version = "0.1.0" +dependencies = [ + "binaryninjacore-sys", + "lazy_static", + "libc", + "log", +] + +[[package]] +name = "binaryninjacore-sys" +version = "0.1.0" +dependencies = [ + "bindgen", +] + +[[package]] +name = "bindgen" +version = "0.68.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cab" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae6b4de23c7d39c0631fd3cc952d87951c86c75a13812d7247cb7a896e7b3551" +dependencies = [ + "byteorder", + "flate2", + "lzxd", + "time", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "deranged" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "errno" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "lzxd" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784462f20dddd9dfdb45de963fa4ad4a288cb10a7889ac5d2c34fb6481c6b213" + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "pdb" +version = "0.8.0" +dependencies = [ + "fallible-iterator", + "scroll", + "uuid", +] + +[[package]] +name = "pdb-import-plugin" +version = "0.1.0" +dependencies = [ + "anyhow", + "binaryninja", + "cab", + "home", + "itertools", + "log", + "pdb", + "regex", +] + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "prettyplease" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" + +[[package]] +name = "serde" +version = "1.0.189" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.189" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" + +[[package]] +name = "syn" +version = "2.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "time" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +dependencies = [ + "deranged", + "powerfmt", + "serde", + "time-core", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "uuid" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/rust/examples/pdb-ng/demo/Cargo.toml b/rust/examples/pdb-ng/demo/Cargo.toml new file mode 100644 index 0000000000..656d1c6c01 --- /dev/null +++ b/rust/examples/pdb-ng/demo/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "pdb-import-plugin-static" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["staticlib"] +path = "../src/lib.rs" + +[dependencies] +anyhow = "^1.0" +binaryninja = {path = "../../../"} +home = "^0.5.5" +itertools = "^0.11" +log = "^0.4" +pdb = "^0.8" +cab = "^0.4" +regex = "1" + +[features] +demo = [] diff --git a/rust/examples/pdb-ng/pdb-0.8.0-patched b/rust/examples/pdb-ng/pdb-0.8.0-patched new file mode 160000 index 0000000000..030477d646 --- /dev/null +++ b/rust/examples/pdb-ng/pdb-0.8.0-patched @@ -0,0 +1 @@ +Subproject commit 030477d646bf05c23f5ddcdd23b4d110a04dff05 diff --git a/rust/examples/pdb-ng/src/lib.rs b/rust/examples/pdb-ng/src/lib.rs new file mode 100644 index 0000000000..3fdea60fba --- /dev/null +++ b/rust/examples/pdb-ng/src/lib.rs @@ -0,0 +1,912 @@ +// Copyright 2022-2024 Vector 35 Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::env::{current_dir, current_exe, temp_dir}; +use std::io::Cursor; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::mpsc; +use std::{env, fs}; + +use anyhow::{anyhow, Result}; +use log::{debug, error, info, LevelFilter}; +use pdb::PDB; + +use binaryninja::binaryview::{BinaryView, BinaryViewExt}; +use binaryninja::debuginfo::{CustomDebugInfoParser, DebugInfo, DebugInfoParser}; +use binaryninja::downloadprovider::{DownloadInstanceInputOutputCallbacks, DownloadProvider}; +use binaryninja::interaction::{MessageBoxButtonResult, MessageBoxButtonSet}; +use binaryninja::settings::Settings; +use binaryninja::string::BnString; +use binaryninja::{add_optional_plugin_dependency, interaction, logger, user_directory}; +use parser::PDBParserInstance; + +/// PDB Parser!! +/// +/// General project structure: +/// - lib.rs: Interaction with DebugInfoParser and plugin actions +/// - parser.rs: PDB Parser base functionality, puts the internal structures into the DebugInfo +/// - type_parser.rs: Parses all the TPI type stream information into both named and indexed types +/// - symbol_parser.rs: Parses, one module at a time, symbol information into named symbols +/// - struct_grouper.rs: Ugly algorithm for handling union and structure members +mod parser; +mod struct_grouper; +mod symbol_parser; +mod type_parser; + +// struct PDBLoad; +// struct PDBLoadFile; +// struct PDBSetSymbolPath; + +#[allow(dead_code)] +struct PDBInfo { + path: String, + file_name: String, + age: u32, + guid: Vec, + guid_age_string: String, +} + +fn is_pdb(view: &BinaryView) -> bool { + let pdb_magic_bytes = "Microsoft C/C++ MSF 7.00\r\n\x1A\x44\x53\x00\x00\x00"; + if let Ok(raw_view) = view.raw_view() { + raw_view.read_vec(0, pdb_magic_bytes.len()) == pdb_magic_bytes.as_bytes() + } else { + false + } +} + +fn default_local_cache() -> Result { + // The default value is a directory named "sym" immediately below the program directory + // of the calling application. This is sometimes referred to as the default local cache. + let current_path = current_exe()?; + let parent_path = current_path + .parent() + .ok_or_else(|| anyhow!("No parent to current exe"))?; + let mut cache_path = PathBuf::from(parent_path); + cache_path.push("sym"); + return Ok(cache_path + .to_str() + .ok_or_else(|| anyhow!("Could not convert cache path to string"))? + .to_string()); +} + +fn active_local_cache(view: Option<&BinaryView>) -> Result { + // Check the local symbol store + let mut local_store_path = Settings::new("") + .get_string("pdb.files.localStoreAbsolute", view, None) + .to_string(); + if local_store_path.is_empty() { + local_store_path = match user_directory() { + Ok(mut dir) => { + dir.push( + Settings::new("") + .get_string("pdb.files.localStoreRelative", view, None) + .to_string(), + ); + match dir.to_str() { + Some(s) => s.to_string(), + _ => "".to_string(), + } + } + _ => "".to_string(), + }; + } + if !local_store_path.is_empty() { + Ok(local_store_path) + } else if let Ok(default_cache) = default_local_cache() { + Ok(default_cache) + } else if let Ok(current) = current_dir().map(|d| { + d.to_str() + .expect("Expected current dir to be a valid string") + .to_string() + }) { + Ok(current) + } else { + Ok(temp_dir() + .to_str() + .expect("Expected temp dir to be a valid string") + .to_string()) + } +} + +fn parse_sym_srv( + symbol_path: &String, + default_store: &String, +) -> Result>> { + // https://docs.microsoft.com/en-us/windows/win32/debug/using-symsrv + // Why + + // ... the symbol path (_NT_SYMBOL_PATH environment variable) can be made up of several path + // elements separated by semicolons. If any one or more of these path elements begins with + // the text "srv*", then the element is a symbol server and will use SymSrv to locate + // symbol files. + + // If the "srv*" text is not specified but the actual path element is a symbol server store, + // then the symbol handler will act as if "srv*" were specified. The symbol handler makes + // this determination by searching for the existence of a file called "pingme.txt" in + // the root directory of the specified path. + + // ... symbol servers are made up of symbol store elements separated by asterisks. There can + // be up to 10 symbol stores after the "srv*" prefix. + + let mut sym_srv_results = vec![]; + + // 'path elements separated by semicolons' + for path_element in symbol_path.split(';') { + // 'begins with the text "srv*"' + if path_element.to_lowercase().starts_with("srv*") { + // 'symbol store elements separated by asterisks' + for store_element in path_element[4..].split('*') { + if store_element.is_empty() { + sym_srv_results.push(default_store.clone()); + } else { + sym_srv_results.push(store_element.to_string()); + } + } + } else if PathBuf::from(path_element).exists() { + // 'searching for the existence of a file called "pingme.txt" in the root directory' + let pingme_txt = path_element.to_string() + "/" + "pingme.txt"; + if PathBuf::from(pingme_txt).exists() { + sym_srv_results.push(path_element.to_string()); + } + } + } + + Ok(Box::new(sym_srv_results.into_iter())) +} + +fn read_from_sym_store(path: &String) -> Result<(bool, Vec)> { + info!("Read file: {}", path); + if !path.contains("://") { + // Local file + let conts = fs::read(path)?; + return Ok((false, conts)); + } + + if !Settings::new("").get_bool("network.pdbAutoDownload", None, None) { + return Err(anyhow!("Auto download disabled")); + } + + // Download from remote + let (tx, rx) = mpsc::channel(); + let write = move |data: &[u8]| -> usize { + if let Ok(_) = tx.send(Vec::from(data)) { + data.len() + } else { + 0 + } + }; + + info!("GET: {}", path); + + let dp = + DownloadProvider::try_default().map_err(|_| anyhow!("No default download provider"))?; + let mut inst = dp + .create_instance() + .map_err(|_| anyhow!("Couldn't create download instance"))?; + let result = inst + .perform_custom_request( + "GET", + path.clone(), + HashMap::::new(), + DownloadInstanceInputOutputCallbacks { + read: None, + write: Some(Box::new(write)), + progress: None, + }, + ) + .map_err(|e| anyhow!(e.to_string()))?; + if result.status_code != 200 { + return Err(anyhow!("Path does not exist")); + } + + let mut expected_length = None; + for (k, v) in result.headers.iter() { + if k.to_lowercase() == "content-length" { + expected_length = Some(usize::from_str(v)?); + } + } + + let mut data = vec![]; + while let Ok(packet) = rx.try_recv() { + data.extend(packet.into_iter()); + } + + if let Some(length) = expected_length { + if data.len() != length { + return Err(anyhow!(format!( + "Bad length: expected {} got {}", + length, + data.len() + ))); + } + } + + Ok((true, data)) +} + +fn sym_store_exists(path: &String) -> Result { + info!("Check file exists: {}", path); + if !path.contains("://") { + // Local file + if PathBuf::from(path).exists() { + return Ok(true); + } else { + return Ok(false); + } + } + + if !Settings::new("").get_bool("network.pdbAutoDownload", None, None) { + return Err(anyhow!("Auto download disabled")); + } + info!("HEAD: {}", path); + + // Download from remote + let dp = + DownloadProvider::try_default().map_err(|_| anyhow!("No default download provider"))?; + let mut inst = dp + .create_instance() + .map_err(|_| anyhow!("Couldn't create download instance"))?; + let result = inst + .perform_custom_request( + "HEAD", + path.clone(), + HashMap::::new(), + DownloadInstanceInputOutputCallbacks { + read: None, + write: None, + progress: None, + }, + ) + .map_err(|e| anyhow!(e.to_string()))?; + if result.status_code != 200 { + return Ok(false); + } + + Ok(true) +} + +fn search_sym_store(store_path: &String, pdb_info: &PDBInfo) -> Result> { + // https://www.technlg.net/windows/symbol-server-path-windbg-debugging/ + // For symbol servers, to identify the files path easily, Windbg uses the format + // binaryname.pdb/GUID + + // Doesn't actually say what the format is, just gives an example: + // https://docs.microsoft.com/en-us/windows/win32/debug/using-symstore + // In this example, the lookup path for the acpi.dbg symbol file might look something + // like this: \\mybuilds\symsrv\acpi.dbg\37cdb03962040. + let base_path = + store_path.clone() + "/" + &pdb_info.file_name + "/" + &pdb_info.guid_age_string; + + // Three files may exist inside the lookup directory: + // 1. If the file was stored, then acpi.dbg will exist there. + // 2. If a pointer was stored, then a file called file.ptr will exist and contain the path + // to the actual symbol file. + // 3. A file called refs.ptr, which contains a list of all the current locations for + // acpi.dbg with this timestamp and image size that are currently added to the + // symbol store. + + // We don't care about #3 because it says we don't + + let direct_path = base_path.clone() + "/" + &pdb_info.file_name; + if sym_store_exists(&direct_path)? { + return Ok(Some(direct_path)); + } + + let file_ptr = base_path.clone() + "/" + "file.ptr"; + if sym_store_exists(&file_ptr)? { + let path = String::from_utf8(read_from_sym_store(&file_ptr)?.1)?; + // PATH:https://full/path + if path.starts_with("PATH:") { + if sym_store_exists(&path[5..].to_string())? { + return Ok(Some(path)); + } + } + } + + return Ok(None); +} + +fn parse_pdb_info(view: &BinaryView) -> Option { + match view.get_metadata::("DEBUG_INFO_TYPE") { + Some(Ok(0x53445352 /* 'SDSR' */)) => {} + _ => return None, + } + + // This is stored in the BV by the PE loader + let file_path = match view.get_metadata::("PDB_FILENAME") { + Some(Ok(md)) => md, + _ => return None, + }; + let mut guid = match view.get_metadata::, _>("PDB_GUID") { + Some(Ok(md)) => md, + _ => return None, + }; + let age = match view.get_metadata::("PDB_AGE") { + Some(Ok(md)) => md as u32, + _ => return None, + }; + + if guid.len() != 16 { + return None; + } + + // struct _GUID { + // uint32_t Data1; + // uint16_t Data2; + // uint16_t Data3; + // uint8_t Data4[8]; + // }; + + // Endian swap + // Data1 + guid.swap(0, 3); + guid.swap(1, 2); + // Data2 + guid.swap(4, 5); + // Data3 + guid.swap(6, 7); + + let guid_age_string = guid + .iter() + .take(16) + .map(|ch| format!("{:02X}", ch)) + .collect::>() + .join("") + + &format!("{:X}", age); + + // Just assume all the paths are / + let file_path = if cfg!(windows) { + file_path + } else { + file_path.replace("\\", "/") + }; + let path = file_path; + let file_name = if let Some(idx) = path.rfind("\\") { + path[(idx + 1)..].to_string() + } else if let Some(idx) = path.rfind("/") { + path[(idx + 1)..].to_string() + } else { + path.clone() + }; + + Some(PDBInfo { + path, + file_name, + age, + guid, + guid_age_string, + }) +} + +struct PDBParser; +impl PDBParser { + fn load_from_file( + &self, + filename: &String, + debug_info: &mut DebugInfo, + view: &BinaryView, + progress: &Box Result<(), ()>>, + check_guid: bool, + did_download: bool, + ) -> Result<()> { + let (_downloaded, conts) = read_from_sym_store(filename)?; + let mut pdb = PDB::open(Cursor::new(&conts))?; + + if let Some(info) = parse_pdb_info(view) { + let pdb_info = pdb.pdb_information()?; + if info.guid.as_slice() != pdb_info.guid.as_ref() { + if check_guid { + return Err(anyhow!("PDB GUID does not match")); + } else { + if interaction::show_message_box( + "Mismatched PDB", + "This PDB does not look like it matches your binary. Do you want to load it anyway?", + MessageBoxButtonSet::YesNoButtonSet, + binaryninja::interaction::MessageBoxIcon::QuestionIcon + ) == MessageBoxButtonResult::NoButton { + return Err(anyhow!("User cancelled mismatched load")); + } + } + } + + // Microsoft's symbol server sometimes gives us a different version of the PDB + // than what we ask for. It's weird, but if they're doing it, I trust it will work. + if info.age != pdb_info.age { + if info.age > pdb_info.age { + // Have not seen this case, so I'm not sure if this is fatal + info!("PDB age is older than our binary! Loading it anyway, but there may be missing information."); + } else { + info!("PDB age is newer than our binary! Loading it anyway, there probably shouldn't be any issues."); + } + } + + if did_download && Settings::new("").get_bool("pdb.files.localStoreCache", None, None) { + match active_local_cache(Some(view)) { + Ok(cache) => { + let mut cab_path = PathBuf::from(&cache); + cab_path.push(&info.file_name); + cab_path.push( + pdb_info + .guid + .as_ref() + .iter() + .map(|ch| format!("{:02X}", ch)) + .collect::>() + .join("") + + &format!("{:X}", pdb_info.age), + ); + let has_dir = if cab_path.is_dir() { + true + } else { + match fs::create_dir_all(&cab_path) { + Ok(_) => true, + Err(e) => { + error!("Could not create PDB cache dir: {}", e); + false + } + } + }; + if has_dir { + cab_path.push(&info.file_name); + match fs::write(&cab_path, &conts) { + Ok(_) => { + info!("Downloaded to: {}", cab_path.to_string_lossy()); + } + Err(e) => error!("Could not write PDB to cache: {}", e), + } + } + + // Also write with the age we expect in our binary view + if info.age < pdb_info.age { + let mut cab_path = PathBuf::from(&cache); + cab_path.push(&info.file_name); + cab_path.push( + pdb_info + .guid + .as_ref() + .iter() + .map(|ch| format!("{:02X}", ch)) + .collect::>() + .join("") + + &format!("{:X}", info.age), // XXX: BV's pdb age + ); + let has_dir = if cab_path.is_dir() { + true + } else { + match fs::create_dir_all(&cab_path) { + Ok(_) => true, + Err(e) => { + error!("Could not create PDB cache dir: {}", e); + false + } + } + }; + if has_dir { + cab_path.push(&info.file_name); + match fs::write(&cab_path, &conts) { + Ok(_) => { + info!("Downloaded to: {}", cab_path.to_string_lossy()); + } + Err(e) => error!("Could not write PDB to cache: {}", e), + } + } + } + } + Err(e) => error!("Could not get local cache for writing: {}", e), + } + } + } else { + if check_guid { + return Err(anyhow!("File not compiled with PDB information")); + } else { + if interaction::show_message_box( + "No PDB Information", + "This file does not look like it was compiled with a PDB, so your PDB might not correctly apply to the analysis. Do you want to load it anyway?", + MessageBoxButtonSet::YesNoButtonSet, + binaryninja::interaction::MessageBoxIcon::QuestionIcon + ) == MessageBoxButtonResult::NoButton { + return Err(anyhow!("User cancelled missing info load")); + } + } + } + + let mut inst = match PDBParserInstance::new(debug_info, view, pdb) { + Ok(inst) => { + info!("Loaded PDB, parsing..."); + inst + } + Err(e) => { + error!("Could not open PDB: {}", e); + return Err(e); + } + }; + match inst.try_parse_info(Box::new(|cur, max| { + (*progress)(cur, max).map_err(|_| anyhow!("Cancelled")) + })) { + Ok(()) => { + info!("Parsed pdb"); + Ok(()) + } + Err(e) => { + error!("Could not parse PDB: {}", e); + if e.to_string() == "Todo" { + Ok(()) + } else { + Err(e) + } + } + } + } +} + +impl CustomDebugInfoParser for PDBParser { + fn is_valid(&self, view: &BinaryView) -> bool { + view.type_name().to_string() == "PE" || is_pdb(view) + } + + fn parse_info( + &self, + debug_info: &mut DebugInfo, + view: &BinaryView, + debug_file: &BinaryView, + progress: Box Result<(), ()>>, + ) -> bool { + let filename = debug_file.file().filename(); + + if is_pdb(debug_file) { + match self.load_from_file( + &filename.to_string(), + debug_info, + view, + &progress, + false, + false, + ) { + Ok(_) => return true, + Err(e) if e.to_string() == "Cancelled" => return false, + Err(_) => { + error!("Chosen PDB file failed to load"); + return false; + } + } + } + + // See if we can get pdb info from the view + if let Some(info) = parse_pdb_info(view) { + // First, check _NT_SYMBOL_PATH + if let Ok(sym_path) = env::var("_NT_SYMBOL_PATH") { + let stores = if let Ok(default_cache) = active_local_cache(Some(view)) { + parse_sym_srv(&sym_path, &default_cache) + } else { + Err(anyhow!("No local cache found")) + }; + if let Ok(stores) = stores { + for store in stores { + match search_sym_store(&store, &info) { + Ok(Some(path)) => { + match self + .load_from_file(&path, debug_info, view, &progress, true, true) + { + Ok(_) => return true, + Err(e) if e.to_string() == "Cancelled" => return false, + Err(e) => debug!("Skipping, {}", e.to_string()), + } + } + Ok(None) => {} + e => error!("Error searching symbol store {}: {:?}", store, e), + } + } + } + } + + // Does the raw path just exist? + if PathBuf::from(&info.path).exists() { + match self.load_from_file(&info.path, debug_info, view, &progress, true, false) { + Ok(_) => return true, + Err(e) if e.to_string() == "Cancelled" => return false, + Err(e) => debug!("Skipping, {}", e.to_string()), + } + } + + // Try in the same directory as the file + let mut potential_path = PathBuf::from(view.file().filename().to_string()); + potential_path.pop(); + potential_path.push(&info.file_name); + if potential_path.exists() { + match self.load_from_file( + &potential_path + .to_str() + .expect("Potential path is a real string") + .to_string(), + debug_info, + view, + &progress, + true, + false, + ) { + Ok(_) => return true, + Err(e) if e.to_string() == "Cancelled" => return false, + Err(e) => debug!("Skipping, {}", e.to_string()), + } + } + + // Check the local symbol store + if let Ok(local_store_path) = active_local_cache(Some(view)) { + match search_sym_store(&local_store_path, &info) { + Ok(Some(path)) => { + match self.load_from_file(&path, debug_info, view, &progress, true, false) { + Ok(_) => return true, + Err(e) if e.to_string() == "Cancelled" => return false, + Err(e) => debug!("Skipping, {}", e.to_string()), + } + } + Ok(None) => {} + e => error!( + "Error searching local symbol store {}: {:?}", + local_store_path, e + ), + } + } + + // Next, try downloading from all symbol servers in the server list + let server_list = + Settings::new("").get_string_list("pdb.files.symbolServerList", Some(view), None); + + for server in server_list.iter() { + match search_sym_store(&server.to_string(), &info) { + Ok(Some(path)) => { + match self.load_from_file(&path, debug_info, view, &progress, true, true) { + Ok(_) => return true, + Err(e) if e.to_string() == "Cancelled" => return false, + Err(e) => debug!("Skipping, {}", e.to_string()), + } + } + Ok(None) => {} + e => error!("Error searching remote symbol server {}: {:?}", server, e), + } + } + } + false + } +} + +#[cfg(not(feature = "demo"))] +#[no_mangle] +pub extern "C" fn CorePluginDependencies() { + add_optional_plugin_dependency("view_pe"); +} + +#[cfg(not(feature = "demo"))] +#[no_mangle] +pub extern "C" fn CorePluginInit() -> bool { + init_plugin() +} + +#[cfg(feature = "demo")] +#[no_mangle] +pub extern "C" fn PDBPluginInit() -> bool { + init_plugin() +} + +fn init_plugin() -> bool { + let _ = logger::init(LevelFilter::Debug); + DebugInfoParser::register("PDB", PDBParser {}); + + let settings = Settings::new(""); + settings.register_group("pdb", "PDB Loader"); + settings.register_setting_json( + "pdb.files.localStoreAbsolute", + r#"{ + "title" : "Local Symbol Store Absolute Path", + "type" : "string", + "default" : "", + "aliases" : ["pdb.local-store-absolute", "pdb.localStoreAbsolute"], + "description" : "Absolute path specifying where the PDB symbol store exists on this machine, overrides relative path.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.files.localStoreRelative", + r#"{ + "title" : "Local Symbol Store Relative Path", + "type" : "string", + "default" : "symbols", + "aliases" : ["pdb.local-store-relative", "pdb.localStoreRelative"], + "description" : "Path *relative* to the binaryninja _user_ directory, specifying the pdb symbol store. If the Local Symbol Store Absolute Path is specified, this is ignored.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.files.localStoreCache", + r#"{ + "title" : "Cache Downloaded PDBs in Local Store", + "type" : "boolean", + "default" : true, + "aliases" : ["pdb.localStoreCache"], + "description" : "Store PDBs downloaded from Symbol Servers in the local Symbol Store Path.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "network.pdbAutoDownload", + r#"{ + "title" : "Enable Auto Downloading PDBs", + "type" : "boolean", + "default" : true, + "aliases" : ["pdb.autoDownload", "pdb.auto-download-pdb"], + "description" : "Automatically search for and download pdb files from specified symbol servers.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.files.symbolServerList", + r#"{ + "title" : "Symbol Server List", + "type" : "array", + "elementType" : "string", + "default" : ["https://msdl.microsoft.com/download/symbols"], + "aliases" : ["pdb.symbol-server-list", "pdb.symbolServerList"], + "description" : "List of servers to query for pdb symbols.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.features.expandRTTIStructures", + r#"{ + "title" : "Expand RTTI Structures", + "type" : "boolean", + "default" : true, + "aliases" : ["pdb.expandRTTIStructures"], + "description" : "Create structures for RTTI symbols with variable-sized names and arrays.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.features.generateVTables", + r#"{ + "title" : "Generate Virtual Table Structures", + "type" : "boolean", + "default" : true, + "aliases" : ["pdb.generateVTables"], + "description" : "Create Virtual Table (VTable) structures for C++ classes found when parsing.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.features.loadGlobalSymbols", + r#"{ + "title" : "Load Global Module Symbols", + "type" : "boolean", + "default" : true, + "aliases" : ["pdb.loadGlobalSymbols"], + "description" : "Load symbols in the Global module of the PDB. These symbols have generally lower quality types due to relying on the demangler.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.features.allowUnnamedVoidSymbols", + r#"{ + "title" : "Allow Unnamed Untyped Symbols", + "type" : "boolean", + "default" : false, + "aliases" : ["pdb.allowUnnamedVoidSymbols"], + "description" : "Allow creation of symbols with no name and void types, often used as static local variables. Generally, these are just noisy and not relevant.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.features.allowVoidGlobals", + r#"{ + "title" : "Allow Untyped Symbols", + "type" : "boolean", + "default" : true, + "aliases" : ["pdb.allowVoidGlobals"], + "description" : "Allow creation of symbols that have no type, and will be created as void-typed symbols. Generally, this happens in a stripped PDB when a Global symbol's mangled name does not contain type information.", + "ignore" : [] + }"#, + ); + + settings.register_setting_json( + "pdb.features.createMissingNamedTypes", + r#"{ + "title" : "Create Missing Named Types", + "type" : "boolean", + "default" : true, + "aliases" : ["pdb.createMissingNamedTypes"], + "description" : "Allow creation of types named by function signatures which are not found in the PDB's types list or the Binary View. These types are usually found in stripped PDBs that have no type information but function signatures reference the stripped types.", + "ignore" : [] + }"#, + ); + + true +} + +#[test] +fn test_default_cache_path() { + println!("{:?}", default_local_cache()); +} + +#[test] +fn test_sym_srv() { + assert_eq!( + parse_sym_srv( + &r"srv*\\mybuilds\mysymbols".to_string(), + &r"DEFAULT_STORE".to_string() + ) + .expect("parse success") + .collect::>(), + vec![r"\\mybuilds\mysymbols".to_string()] + ); + assert_eq!( + parse_sym_srv( + &r"srv*c:\localsymbols*\\mybuilds\mysymbols".to_string(), + &r"DEFAULT_STORE".to_string() + ) + .expect("parse success") + .collect::>(), + vec![ + r"c:\localsymbols".to_string(), + r"\\mybuilds\mysymbols".to_string() + ] + ); + assert_eq!( + parse_sym_srv( + &r"srv**\\mybuilds\mysymbols".to_string(), + &r"DEFAULT_STORE".to_string() + ) + .expect("parse success") + .collect::>(), + vec![ + r"DEFAULT_STORE".to_string(), + r"\\mybuilds\mysymbols".to_string() + ] + ); + assert_eq!( + parse_sym_srv( + &r"srv*c:\localsymbols*\\NearbyServer\store*https://DistantServer".to_string(), + &r"DEFAULT_STORE".to_string() + ) + .expect("parse success") + .collect::>(), + vec![ + r"c:\localsymbols".to_string(), + r"\\NearbyServer\store".to_string(), + r"https://DistantServer".to_string() + ] + ); + assert_eq!( + parse_sym_srv( + &r"srv*c:\DownstreamStore*https://msdl.microsoft.com/download/symbols".to_string(), + &r"DEFAULT_STORE".to_string() + ) + .expect("parse success") + .collect::>(), + vec![ + r"c:\DownstreamStore".to_string(), + r"https://msdl.microsoft.com/download/symbols".to_string() + ] + ); +} diff --git a/rust/examples/pdb-ng/src/parser.rs b/rust/examples/pdb-ng/src/parser.rs new file mode 100644 index 0000000000..cbc4f07a38 --- /dev/null +++ b/rust/examples/pdb-ng/src/parser.rs @@ -0,0 +1,499 @@ +// Copyright 2022-2024 Vector 35 Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{HashMap, HashSet}; +use std::env; +use std::fmt::Display; + +use anyhow::{anyhow, Result}; +use log::{debug, info}; +use pdb::*; + +use binaryninja::architecture::{Architecture, CoreArchitecture}; +use binaryninja::binaryview::{BinaryView, BinaryViewExt}; +use binaryninja::callingconvention::CallingConvention; +use binaryninja::debuginfo::{DebugFunctionInfo, DebugInfo}; +use binaryninja::platform::Platform; +use binaryninja::rc::Ref; +use binaryninja::settings::Settings; +use binaryninja::types::{ + min_confidence, Conf, DataVariableAndName, EnumerationBuilder, NamedTypeReference, + NamedTypeReferenceClass, StructureBuilder, StructureType, Type, TypeClass, +}; + +use crate::symbol_parser::{ParsedDataSymbol, ParsedProcedure, ParsedSymbol}; +use crate::type_parser::ParsedType; + +/// Megastruct for all the parsing +/// Certain fields are only used by specific files, as marked below. +/// Why not make new structs for them? Because vvvv this garbage +pub struct PDBParserInstance<'a, S: Source<'a> + 'a> { + /// DebugInfo where types/functions will be stored eventually + pub(crate) debug_info: &'a mut DebugInfo, + /// Parent binary view (usually during BinaryView::Finalize) + pub(crate) bv: &'a BinaryView, + /// Default arch of self.bv + pub(crate) arch: CoreArchitecture, + /// Default calling convention for self.arch + pub(crate) default_cc: Ref>, + /// Thiscall calling convention for self.bv, or default_cc if we can't find one + pub(crate) thiscall_cc: Ref>, + /// Cdecl calling convention for self.bv, or default_cc if we can't find one + pub(crate) cdecl_cc: Ref>, + /// Default platform of self.bv + pub(crate) platform: Ref, + /// pdb-rs structure for making lifetime hell a real place + pub(crate) pdb: PDB<'a, S>, + /// pdb-rs Mapping of modules to addresses for resolving RVAs + pub(crate) address_map: AddressMap<'a>, + /// Binja Settings instance (for optimization) + pub(crate) settings: Ref, + + /// type_parser.rs + + /// TypeIndex -> ParsedType enum used during parsing + pub(crate) indexed_types: HashMap, + /// QName -> Binja Type for finished types + pub(crate) named_types: HashMap>, + /// Raw (mangled) name -> TypeIndex for resolving forward references + pub(crate) full_type_indices: HashMap, + /// Stack of types we're currently parsing + pub(crate) type_stack: Vec, + /// Stack of parent types we're parsing nested types inside of + pub(crate) namespace_stack: Vec, + /// Type Index -> Does it return on the stack + pub(crate) type_default_returnable: HashMap, + + /// symbol_parser.rs + + /// List of fully parsed symbols from all modules + pub(crate) parsed_symbols: Vec, + /// Raw name -> index in parsed_symbols + pub(crate) parsed_symbols_by_name: HashMap, + /// Raw name -> Symbol index for looking up symbols for the currently parsing module (mostly for thunks) + pub(crate) named_symbols: HashMap, + /// Parent -> Children symbol index tree for the currently parsing module + pub(crate) symbol_tree: HashMap>, + /// Child -> Parent symbol index mapping, inverse of symbol_tree + pub(crate) symbol_parents: HashMap, + /// Stack of (start, end) indices for the current symbols being parsed while constructing the tree + pub(crate) symbol_stack: Vec<(SymbolIndex, SymbolIndex)>, + /// Index -> parsed symbol for the currently parsing module + pub(crate) indexed_symbols: HashMap, + /// Symbol address -> Symbol for looking up by address + pub(crate) addressed_symbols: HashMap>, + /// CPU type of the currently parsing module + pub(crate) module_cpu_type: Option, +} + +impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> { + /// Try to create a new parser instance from a given bv/pdb + pub fn new( + debug_info: &'a mut DebugInfo, + bv: &'a BinaryView, + mut pdb: PDB<'a, S>, + ) -> Result { + let arch = if let Some(arch) = bv.default_arch() { + arch + } else { + return Err(anyhow!("Cannot parse to view with no architecture")); + }; + + let platform = bv + .default_platform() + .expect("Expected bv to have a platform"); + + let address_map = pdb.address_map()?; + + let default_cc = platform + .get_default_calling_convention() + .expect("Expected default calling convention"); + + let thiscall_cc = Self::find_calling_convention(platform.as_ref(), "thiscall") + .unwrap_or(default_cc.clone()); + + let cdecl_cc = platform + .get_cdecl_calling_convention() + .unwrap_or(default_cc.clone()); + + Ok(Self { + debug_info, + bv, + arch, + default_cc, + thiscall_cc, + cdecl_cc, + platform, + pdb, + address_map, + settings: Settings::new(""), + indexed_types: Default::default(), + named_types: Default::default(), + full_type_indices: Default::default(), + type_stack: Default::default(), + namespace_stack: Default::default(), + type_default_returnable: Default::default(), + parsed_symbols: Default::default(), + parsed_symbols_by_name: Default::default(), + named_symbols: Default::default(), + symbol_tree: Default::default(), + symbol_parents: Default::default(), + symbol_stack: Default::default(), + indexed_symbols: Default::default(), + addressed_symbols: Default::default(), + module_cpu_type: None, + }) + } + + /// Try to parse the pdb into the DebugInfo + pub fn try_parse_info( + &mut self, + progress: Box Result<()> + 'a>, + ) -> Result<()> { + self.parse_types(Self::split_progress(&progress, 0, &[1.0, 3.0, 0.5, 0.5]))?; + for (name, ty) in self.named_types.iter() { + self.debug_info.add_type(name, ty.as_ref(), &[]); // TODO : Components + } + + info!("PDB found {} types", self.named_types.len()); + + let (symbols, functions) = + self.parse_symbols(Self::split_progress(&progress, 1, &[1.0, 3.0, 0.5, 0.5]))?; + + if self + .settings + .get_bool("pdb.features.createMissingNamedTypes", Some(self.bv), None) + { + self.resolve_missing_ntrs( + &symbols, + Self::split_progress(&progress, 2, &[1.0, 3.0, 0.5, 0.5]), + )?; + self.resolve_missing_ntrs( + &functions, + Self::split_progress(&progress, 3, &[1.0, 3.0, 0.5, 0.5]), + )?; + } + + info!("PDB found {} data variables", symbols.len()); + info!("PDB found {} functions", functions.len()); + + let allow_void = + self.settings + .get_bool("pdb.features.allowVoidGlobals", Some(self.bv), None); + + for sym in symbols { + match sym { + ParsedSymbol::Data(ParsedDataSymbol { + address, + name, + type_, + .. + }) => { + let real_type = + type_.unwrap_or_else(|| Conf::new(Type::void(), min_confidence())); + + if real_type.contents.type_class() == TypeClass::VoidTypeClass { + if !allow_void { + self.log(|| { + format!("Not adding void-typed symbol {:?}@{:x}", name, address) + }); + continue; + } + } + + self.log(|| { + format!( + "Adding data variable: 0x{:x}: {} {:?}", + address, &name.raw_name, real_type + ) + }); + self.debug_info + .add_data_variable_info(DataVariableAndName::new( + address, + real_type, + true, + name.full_name.unwrap_or(name.raw_name), + )); + } + s => { + self.log(|| format!("Not adding non-data symbol {:?}", s)); + } + } + } + + for sym in functions { + match sym { + ParsedSymbol::Procedure(ParsedProcedure { + address, + name, + type_, + .. + }) => { + self.log(|| { + format!( + "Adding function: 0x{:x}: {} {:?}", + address, &name.raw_name, type_ + ) + }); + self.debug_info.add_function(DebugFunctionInfo::new( + Some(name.short_name.unwrap_or(name.raw_name.clone())), + Some(name.full_name.unwrap_or(name.raw_name.clone())), + Some(name.raw_name), + type_.clone().and_then(|conf| { + // TODO: When DebugInfo support confidence on function types, remove this + if conf.confidence == 0 { + None + } else { + Some(conf.contents) + } + }), + Some(address), + Some(self.platform.clone()), + vec![], // TODO : Components + )); + } + _ => {} + } + } + + Ok(()) + } + + fn collect_name( + &self, + name: &NamedTypeReference, + unknown_names: &mut HashMap, + ) { + let used_name = name.name().to_string(); + if let Some(&found) = + unknown_names.iter().find_map( + |(key, value)| { + if key == &used_name { + Some(value) + } else { + None + } + }, + ) + { + if found != name.class() { + // Interesting case, not sure we care + self.log(|| { + format!( + "Mismatch unknown NTR class for {}: {} ?", + &used_name, + name.class() as u32 + ) + }); + } + } else { + self.log(|| format!("Found new unused name: {}", &used_name)); + unknown_names.insert(used_name, name.class()); + } + } + + fn collect_names( + &self, + ty: &Type, + unknown_names: &mut HashMap, + ) { + match ty.type_class() { + TypeClass::StructureTypeClass => { + if let Ok(structure) = ty.get_structure() { + if let Ok(members) = structure.members() { + for member in members { + self.collect_names(member.ty.contents.as_ref(), unknown_names); + } + } + if let Ok(bases) = structure.base_structures() { + for base in bases { + self.collect_name(base.ty.as_ref(), unknown_names); + } + } + } + } + TypeClass::PointerTypeClass => { + if let Ok(target) = ty.target() { + self.collect_names(target.contents.as_ref(), unknown_names); + } + } + TypeClass::ArrayTypeClass => { + if let Ok(element_type) = ty.element_type() { + self.collect_names(element_type.contents.as_ref(), unknown_names); + } + } + TypeClass::FunctionTypeClass => { + if let Ok(return_value) = ty.return_value() { + self.collect_names(return_value.contents.as_ref(), unknown_names); + } + if let Ok(params) = ty.parameters() { + for param in params { + self.collect_names(param.t.contents.as_ref(), unknown_names); + } + } + } + TypeClass::NamedTypeReferenceClass => { + if let Ok(ntr) = ty.get_named_type_reference() { + self.collect_name(ntr.as_ref(), unknown_names); + } + } + _ => {} + } + } + + fn resolve_missing_ntrs( + &mut self, + symbols: &Vec, + progress: Box Result<()> + '_>, + ) -> Result<()> { + let mut unknown_names = HashMap::new(); + let mut known_names = self + .bv + .types() + .iter() + .map(|qnat| qnat.name().string()) + .collect::>(); + + for ty in &self.named_types { + known_names.insert(ty.0.clone()); + } + + let count = symbols.len(); + for (i, sym) in symbols.into_iter().enumerate() { + match sym { + ParsedSymbol::Data(ParsedDataSymbol { + type_: Some(type_), .. + }) => { + self.collect_names(type_.contents.as_ref(), &mut unknown_names); + } + ParsedSymbol::Procedure(ParsedProcedure { + type_: Some(type_), + locals, + .. + }) => { + self.collect_names(type_.contents.as_ref(), &mut unknown_names); + for l in locals { + if let Some(ltype) = &l.type_ { + self.collect_names(ltype.contents.as_ref(), &mut unknown_names); + } + } + } + _ => {} + } + (progress)(i, count)?; + } + + for (name, class) in unknown_names.into_iter() { + if known_names.iter().any(|known| known == &name) { + self.log(|| format!("Found referenced name and ignoring: {}", &name)); + continue; + } + self.log(|| format!("Adding referenced but unknown type {} (likely due to demangled name and stripped type)", &name)); + match class { + NamedTypeReferenceClass::UnknownNamedTypeClass + | NamedTypeReferenceClass::TypedefNamedTypeClass => { + self.debug_info.add_type(name, Type::void().as_ref(), &[]); // TODO : Components + } + NamedTypeReferenceClass::ClassNamedTypeClass + | NamedTypeReferenceClass::StructNamedTypeClass + | NamedTypeReferenceClass::UnionNamedTypeClass => { + let structure = StructureBuilder::new(); + match class { + NamedTypeReferenceClass::ClassNamedTypeClass => { + structure.set_structure_type(StructureType::ClassStructureType); + } + NamedTypeReferenceClass::StructNamedTypeClass => { + structure.set_structure_type(StructureType::StructStructureType); + } + NamedTypeReferenceClass::UnionNamedTypeClass => { + structure.set_structure_type(StructureType::UnionStructureType); + } + _ => {} + } + structure.set_width(1); + structure.set_alignment(1); + + self.debug_info.add_type( + name, + Type::structure(structure.finalize().as_ref()).as_ref(), + &[], // TODO : Components + ); + } + NamedTypeReferenceClass::EnumNamedTypeClass => { + let enumeration = EnumerationBuilder::new(); + self.debug_info.add_type( + name, + Type::enumeration( + enumeration.finalize().as_ref(), + self.arch.default_integer_size(), + false, + ) + .as_ref(), + &[], // TODO : Components + ); + } + } + } + + Ok(()) + } + + /// Lazy logging function that prints like 20MB of messages + pub(crate) fn log D, D: Display>(&self, msg: F) { + if env::var("BN_DEBUG_PDB").is_ok() { + let space = "\t".repeat(self.type_stack.len()) + &"\t".repeat(self.symbol_stack.len()); + let msg = format!("{}", msg()); + debug!( + "{}{}", + space, + msg.replace("\n", &*("\n".to_string() + &space)) + ); + } + } + + pub(crate) fn split_progress<'b, F: Fn(usize, usize) -> Result<()> + 'b>( + original_fn: F, + subpart: usize, + subpart_weights: &[f64], + ) -> Box Result<()> + 'b> { + // Normalize weights + let weight_sum: f64 = subpart_weights.iter().sum(); + if weight_sum < 0.0001 { + return Box::new(|_, _| Ok(())); + } + + // Keep a running count of weights for the start + let mut subpart_starts = vec![]; + let mut start = 0f64; + for w in subpart_weights { + subpart_starts.push(start); + start += *w; + } + + let subpart_start = subpart_starts[subpart] / weight_sum; + let weight = subpart_weights[subpart] / weight_sum; + + Box::new(move |cur: usize, max: usize| { + // Just use a large number for easy divisibility + let steps = 1000000f64; + let subpart_size = steps * weight; + let subpart_progress = ((cur as f64) / (max as f64)) * subpart_size; + + original_fn( + (subpart_start * steps + subpart_progress) as usize, + steps as usize, + ) + }) + } +} diff --git a/rust/examples/pdb-ng/src/struct_grouper.rs b/rust/examples/pdb-ng/src/struct_grouper.rs new file mode 100644 index 0000000000..042eb979c2 --- /dev/null +++ b/rust/examples/pdb-ng/src/struct_grouper.rs @@ -0,0 +1,1164 @@ +// Copyright 2022-2024 Vector 35 Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Ordering; +use std::env; +use std::fmt::{Debug, Display, Formatter}; + +use anyhow::{anyhow, Result}; +use log::{debug, warn}; + +use binaryninja::types::{ + max_confidence, Conf, MemberAccess, MemberScope, StructureBuilder, StructureType, Type, +}; + +use crate::type_parser::ParsedMember; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct MemberSize { + index: usize, + offset: u64, + width: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum ResolvedGroup { + Single(usize), + Struct(u64, Vec), + Union(u64, Vec), +} + +#[derive(Clone, PartialEq, Eq)] +struct WorkingStruct { + index: Option, + offset: u64, + width: u64, + is_union: bool, + children: Vec, +} + +impl PartialOrd for WorkingStruct { + fn partial_cmp(&self, other: &Self) -> Option { + if self.end() < other.start() { + Some(Ordering::Less) + } else if other.end() < self.start() { + Some(Ordering::Greater) + } else if self.is_same(other) { + Some(Ordering::Equal) + } else { + None + } + } +} + +impl Debug for WorkingStruct { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if self.children.len() == 0 { + write!(f, "{:X} -> {:X}", self.start(), self.end())?; + if let Some(index) = self.index { + write!(f, " (#{:X})", index)?; + } else { + write!(f, " without index???")?; + } + Ok(()) + } else if self.is_union { + write!(f, "union {:X} -> {:X} ", self.start(), self.end())?; + if let Some(index) = self.index { + write!(f, "with index {:X} ??? ", index)?; + } + f.debug_list().entries(self.children.iter()).finish() + } else { + write!(f, "struct {:X} -> {:X} ", self.start(), self.end())?; + if let Some(index) = self.index { + write!(f, "with index {:X} ??? ", index)?; + } + f.debug_list().entries(self.children.iter()).finish() + } + } +} + +impl WorkingStruct { + pub fn start(&self) -> u64 { + self.offset + } + + pub fn end(&self) -> u64 { + self.offset + self.width + } + + pub fn extend_to(&mut self, new_end: u64) { + if new_end > self.end() { + self.width = new_end - self.offset; + } + } + + // pub fn overlaps(&self, other: &WorkingStruct) -> bool { + // // If A starts after B ends + // if self.start() >= other.end() { + // return false; + // } + // // Or if B starts after A ends + // if other.start() >= self.end() { + // return false; + // } + // // Otherwise, one of the items starts before the other ends, so there is overlap + // return true; + // } + + // pub fn contains(&self, other: &WorkingStruct) -> bool { + // // If other is fully contained within self + // self.start() <= other.start() && self.end() >= other.end() + // } + + pub fn is_same(&self, other: &WorkingStruct) -> bool { + // If self and other have the same range + self.start() == other.start() && self.end() == other.end() + } + + pub fn insert(&mut self, other: WorkingStruct, recursion: usize) -> Result<()> { + log(|| { + format!("{}self: {:#?}", " ".repeat(recursion), self) + .replace("\n", &*("\n".to_owned() + &" ".repeat(recursion))) + }); + log(|| { + format!("{}other: {:#?}", " ".repeat(recursion), other) + .replace("\n", &*("\n".to_owned() + &" ".repeat(recursion))) + }); + + self.extend_to(other.end()); + + // There are 2 cases we have to deal with here: + // a. `other` starts after the end of the last group => insert `other` into the last group + // b. `other` starts before the end of the last group => collect all the children inserted after it starts and put them into a struct + // start a new struct with `other` + + if self.children.len() == 0 { + self.children.push(other); + return Ok(()); + } + + // This is really gross. + // But also I need to ship this before I leave for France + // TODO: Clean this up + + if other.start() + >= self + .children + .last() + .ok_or_else(|| anyhow!("Expected we have children #A"))? + .end() + { + self.children.push(other); + } else { + // Create a structure with fields from self.children + if self + .children + .last() + .ok_or_else(|| anyhow!("Expected we have children #B"))? + .index + .is_none() + && self + .children + .last() + .ok_or_else(|| anyhow!("Expected we have children #C"))? + .start() + < other.start() + { + self.children + .last_mut() + .ok_or_else(|| anyhow!("Expected we have children #D"))? + .insert(other, recursion + 1)?; + return Ok(()); + } + + // If we're a union, we don't have to bother pushing a struct+union combo + if self.is_union { + self.children.push(WorkingStruct { + index: None, + offset: self.offset, + width: self.width, + is_union: false, + children: vec![other], + }); + return Ok(()); + } + + let mut start_index = None; + for (i, child) in self.children.iter().enumerate() { + if child.start() >= other.start() { + start_index = Some(i); + break; + } + } + if start_index.is_none() { + return Err(anyhow!( + "Struct has overlapping member that cannot be resolved: {:#?}", + other + )); + } + + let struct_start = self.children + [start_index.ok_or_else(|| anyhow!("Expected we have start index"))?] + .offset; + let struct_end = self + .children + .last() + .ok_or_else(|| anyhow!("Expected we have start index"))? + .end() + .max(other.end()); + + let struct_children = self + .children + .drain(start_index.ok_or_else(|| anyhow!("Expected we have start index"))?..) + .collect::>(); + self.children.push(WorkingStruct { + index: None, + offset: struct_start, + width: struct_end - struct_start, + is_union: true, + children: vec![ + WorkingStruct { + index: None, + offset: struct_start, + width: struct_end - struct_start, + is_union: false, + children: struct_children, + }, + WorkingStruct { + index: None, + offset: struct_start, + width: struct_end - struct_start, + is_union: false, + children: vec![other], + }, + ], + }); + + // union { + // struct { + // int data0; + // int[2] data4; + // int dataC; + // }; + // struct { + // int newdata0; + // ... + // }; + // }; + } + + // if other.start() < self.children[-1].end() { + // take children from other.start() until -1 and put them into a struct + // } + // else { + // add to self.children[-1], extend to fill + // } + + Ok(()) + } + + pub fn to_resolved(mut self) -> ResolvedGroup { + if let Some(index) = self.index { + ResolvedGroup::Single(index) + } else if self.is_union { + if self.children.len() == 1 { + self.children.remove(0).to_resolved() + } else { + // Collapse union of unions + ResolvedGroup::Union( + self.offset, + self.children + .into_iter() + .flat_map(|child| match child.to_resolved() { + ResolvedGroup::Union(offset, children) if offset == self.offset => { + children + } + s => vec![s], + }) + .collect(), + ) + } + } else { + if self.children.len() == 1 { + self.children.remove(0).to_resolved() + } else { + ResolvedGroup::Struct( + self.offset, + self.children + .into_iter() + .map(|child| child.to_resolved()) + .collect(), + ) + } + } + } +} + +pub fn group_structure( + name: &String, + members: &Vec, + structure: &mut StructureBuilder, +) -> Result<()> { + // SO + // PDBs handle trivial unions inside structures by just slamming all the fields together into + // one big overlappy happy family. We need to reverse this and create out union structures + // to properly represent the original source. + + // IN VISUAL FORM (if you are a visual person, like me): + // struct { + // int foos[2]; + // __offset(0): + // int foo1; + // int foo2; + // int bar; + // } + // + // Into + // + // struct { + // union { + // int foos[2]; + // struct { + // int foo1; + // int foo2; + // } + // } + // int bar; + // } + + // Into internal rep + let reps = members + .iter() + .enumerate() + .map(|(i, member)| MemberSize { + index: i, + offset: member.offset, + width: member.ty.contents.width(), + }) + .collect::>(); + + log(|| format!("{} {:#x?}", name, members)); + log(|| format!("{} {:#x?}", name, reps)); + + // Group them + match resolve_struct_groups(reps) { + Ok(groups) => { + log(|| format!("{} {:#x?}", name, groups)); + + // Apply grouped members + apply_groups(members, structure, groups, 0); + } + Err(e) => { + warn!("{} Could not resolve structure groups: {}", name, e); + for member in members { + structure.insert( + &member.ty.clone(), + member.name.clone(), + member.offset, + false, + member.access, + member.scope, + ); + } + } + } + + Ok(()) +} + +fn apply_groups( + members: &Vec, + structure: &mut StructureBuilder, + groups: Vec, + offset: u64, +) { + for (i, group) in groups.into_iter().enumerate() { + match group { + ResolvedGroup::Single(index) => { + let member = &members[index]; + + // TODO : Fix inner-offset being larger than `member.offset` + + if offset > member.offset { + structure.insert( + &member.ty.clone(), + member.name.clone(), + 0, + false, + member.access, + member.scope, + ); + } else { + structure.insert( + &member.ty.clone(), + member.name.clone(), + member.offset - offset, + false, + member.access, + member.scope, + ); + } + } + ResolvedGroup::Struct(inner_offset, children) => { + let mut inner = StructureBuilder::new(); + apply_groups(members, &mut inner, children, inner_offset); + structure.insert( + &Conf::new(Type::structure(inner.finalize().as_ref()), max_confidence()), + format!("__inner{}", i), + inner_offset - offset, + false, + MemberAccess::PublicAccess, + MemberScope::NoScope, + ); + } + ResolvedGroup::Union(inner_offset, children) => { + let mut inner = StructureBuilder::new(); + inner.set_structure_type(StructureType::UnionStructureType); + apply_groups(members, &mut inner, children, inner_offset); + structure.insert( + &Conf::new(Type::structure(inner.finalize().as_ref()), max_confidence()), + format!("__inner{}", i), + inner_offset - offset, + false, + MemberAccess::PublicAccess, + MemberScope::NoScope, + ); + } + } + } +} + +fn resolve_struct_groups(members: Vec) -> Result> { + // See if we care + let mut has_overlapping = false; + let mut last_end = 0; + let mut max_width = 0; + for member in &members { + if member.offset < last_end { + has_overlapping = true; + } + last_end = member.offset + member.width; + max_width = max_width.max(member.offset + member.width); + } + + if !has_overlapping { + // Nothing overlaps, just add em directly + return Ok(members + .into_iter() + .map(|member| ResolvedGroup::Single(member.index)) + .collect()); + } + + // Yes overlapping + + let mut groups = WorkingStruct { + index: None, + offset: 0, + width: max_width, + is_union: false, + children: vec![], + }; + for &member in &members { + let member_group = WorkingStruct { + index: Some(member.index), + offset: member.offset, + width: member.width, + is_union: false, + children: vec![], + }; + groups.insert(member_group, 0)?; + + log(|| format!("GROUPS: {:#x?}", groups)); + } + + Ok(groups + .children + .into_iter() + .map(|child| child.to_resolved()) + .collect()) +} + +#[test] +fn test_trivial() { + assert_eq!( + resolve_struct_groups(vec![ + MemberSize { + index: 0, + offset: 0, + width: 1, + }, + MemberSize { + index: 1, + offset: 1, + width: 1, + }, + MemberSize { + index: 2, + offset: 2, + width: 1, + }, + MemberSize { + index: 3, + offset: 3, + width: 1, + }, + ]) + .unwrap(), + vec![ + ResolvedGroup::Single(0), + ResolvedGroup::Single(1), + ResolvedGroup::Single(2), + ResolvedGroup::Single(3), + ] + ); +} + +#[test] +fn test_everything_everywhere() { + assert_eq!( + resolve_struct_groups(vec![ + MemberSize { + index: 0, + offset: 0, + width: 1, + }, + MemberSize { + index: 1, + offset: 0, + width: 1, + }, + MemberSize { + index: 2, + offset: 0, + width: 1, + }, + MemberSize { + index: 3, + offset: 0, + width: 1, + }, + ]) + .unwrap(), + vec![ResolvedGroup::Union( + 0, + vec![ + ResolvedGroup::Single(0), + ResolvedGroup::Single(1), + ResolvedGroup::Single(2), + ResolvedGroup::Single(3), + ] + )] + ); +} + +#[test] +fn test_unalignend() { + assert_eq!( + resolve_struct_groups(vec![ + MemberSize { + index: 0, + offset: 0, + width: 4, + }, + MemberSize { + index: 1, + offset: 4, + width: 8, + }, + MemberSize { + index: 2, + offset: 12, + width: 4, + }, + MemberSize { + index: 3, + offset: 0, + width: 8, + }, + MemberSize { + index: 4, + offset: 8, + width: 8, + }, + ]) + .unwrap(), + vec![ResolvedGroup::Union( + 0, + vec![ + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0), + ResolvedGroup::Single(1), + ResolvedGroup::Single(2), + ] + ), + ResolvedGroup::Struct(0, vec![ResolvedGroup::Single(3), ResolvedGroup::Single(4),]), + ] + )] + ); +} + +#[test] +fn test_heap_vs_chunk_free_header() { + assert_eq!( + resolve_struct_groups(vec![ + MemberSize { + index: 0, + offset: 0, + width: 16, + }, + MemberSize { + index: 1, + offset: 0, + width: 8, + }, + MemberSize { + index: 2, + offset: 8, + width: 24, + }, + ]) + .unwrap(), + vec![ResolvedGroup::Union( + 0, + vec![ + ResolvedGroup::Single(0), + ResolvedGroup::Struct(0, vec![ResolvedGroup::Single(1), ResolvedGroup::Single(2)]) + ] + )] + ); +} + +#[test] +fn test_kprcb() { + assert_eq!( + resolve_struct_groups(vec![ + MemberSize { + index: 0, + offset: 0, + width: 8, + }, + MemberSize { + index: 1, + offset: 8, + width: 1, + }, + MemberSize { + index: 2, + offset: 8, + width: 1, + }, + MemberSize { + index: 3, + offset: 9, + width: 1, + }, + MemberSize { + index: 4, + offset: 9, + width: 1, + }, + MemberSize { + index: 5, + offset: 10, + width: 1, + }, + MemberSize { + index: 6, + offset: 11, + width: 1, + }, + MemberSize { + index: 7, + offset: 12, + width: 1, + }, + MemberSize { + index: 8, + offset: 13, + width: 1, + }, + MemberSize { + index: 9, + offset: 14, + width: 2, + }, + MemberSize { + index: 10, + offset: 0, + width: 16, + }, + MemberSize { + index: 11, + offset: 16, + width: 1, + }, + MemberSize { + index: 12, + offset: 17, + width: 1, + }, + MemberSize { + index: 13, + offset: 18, + width: 1, + }, + MemberSize { + index: 14, + offset: 18, + width: 1, + }, + MemberSize { + index: 15, + offset: 19, + width: 1, + }, + MemberSize { + index: 16, + offset: 19, + width: 1, + }, + MemberSize { + index: 17, + offset: 20, + width: 4, + }, + MemberSize { + index: 18, + offset: 16, + width: 8, + }, + ]) + .unwrap(), + vec![ + ResolvedGroup::Union( + 0, + vec![ + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0), + ResolvedGroup::Union( + 8, + vec![ResolvedGroup::Single(1), ResolvedGroup::Single(2),] + ), + ResolvedGroup::Union( + 9, + vec![ResolvedGroup::Single(3), ResolvedGroup::Single(4),] + ), + ResolvedGroup::Single(5), + ResolvedGroup::Single(6), + ResolvedGroup::Single(7), + ResolvedGroup::Single(8), + ResolvedGroup::Single(9) + ] + ), + ResolvedGroup::Single(10) + ] + ), + ResolvedGroup::Union( + 16, + vec![ + ResolvedGroup::Struct( + 16, + vec![ + ResolvedGroup::Single(11), + ResolvedGroup::Single(12), + ResolvedGroup::Union( + 18, + vec![ResolvedGroup::Single(13), ResolvedGroup::Single(14),] + ), + ResolvedGroup::Union( + 19, + vec![ResolvedGroup::Single(15), ResolvedGroup::Single(16),] + ), + ResolvedGroup::Single(17) + ] + ), + ResolvedGroup::Single(18) + ] + ) + ] + ); +} + +#[test] +fn test_dispatcher_header() { + /* + XXX: This returns a different grouping which is still valid + Basically it turns this: + struct { + unsigned char data0; + union { + unsigned char data1; + struct { + unsigned char data1_2; + unsigned char data2; + unsigned char data3; + }; + }; + }; + + into this: + + struct { + unsigned char data0; + union { + unsigned char data1; + unsigned char data1_2; + }; + unsigned char data2; + unsigned char data3; + }; + */ + + assert_eq!( + resolve_struct_groups(vec![ + MemberSize { + index: 0x0, + offset: 0x0, + width: 0x4, + }, + MemberSize { + index: 0x1, + offset: 0x0, + width: 0x4, + }, + MemberSize { + index: 0x2, + offset: 0x0, + width: 0x1, + }, + MemberSize { + index: 0x3, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0x4, + offset: 0x2, + width: 0x1, + }, + MemberSize { + index: 0x5, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0x6, + offset: 0x0, + width: 0x1, + }, + MemberSize { + index: 0x7, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0x8, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0x9, + offset: 0x2, + width: 0x1, + }, + MemberSize { + index: 0xa, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0xb, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0xc, + offset: 0x0, + width: 0x1, + }, + MemberSize { + index: 0xd, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0xe, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0xf, + offset: 0x2, + width: 0x1, + }, + MemberSize { + index: 0x10, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0x11, + offset: 0x0, + width: 0x1, + }, + MemberSize { + index: 0x12, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0x13, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0x14, + offset: 0x2, + width: 0x1, + }, + MemberSize { + index: 0x15, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0x16, + offset: 0x0, + width: 0x1, + }, + MemberSize { + index: 0x17, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0x18, + offset: 0x2, + width: 0x1, + }, + MemberSize { + index: 0x19, + offset: 0x2, + width: 0x1, + }, + MemberSize { + index: 0x1a, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0x1b, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0x1c, + offset: 0x0, + width: 0x1, + }, + MemberSize { + index: 0x1d, + offset: 0x1, + width: 0x1, + }, + MemberSize { + index: 0x1e, + offset: 0x2, + width: 0x1, + }, + MemberSize { + index: 0x1f, + offset: 0x3, + width: 0x1, + }, + MemberSize { + index: 0x20, + offset: 0x4, + width: 0x4, + }, + MemberSize { + index: 0x21, + offset: 0x8, + width: 0x10, + }, + ]) + .unwrap(), + vec![ + ResolvedGroup::Union( + 0, + vec![ + ResolvedGroup::Single(0x0), + ResolvedGroup::Single(0x1), + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0x2), + ResolvedGroup::Single(0x3), + ResolvedGroup::Single(0x4), + ResolvedGroup::Single(0x5), + ] + ), + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0x6), + ResolvedGroup::Union( + 1, + vec![ + ResolvedGroup::Single(0x7), + ResolvedGroup::Struct( + 1, + vec![ + ResolvedGroup::Single(0x8), + ResolvedGroup::Single(0x9), + ResolvedGroup::Union( + 3, + vec![ + ResolvedGroup::Single(0xa), + ResolvedGroup::Single(0xb), + ] + ), + ] + ), + ] + ), + ] + ), + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0xc), + ResolvedGroup::Union( + 1, + vec![ + ResolvedGroup::Single(0xd), + ResolvedGroup::Struct( + 1, + vec![ + ResolvedGroup::Single(0xe), + ResolvedGroup::Single(0xf), + ResolvedGroup::Single(0x10), + ] + ) + ] + ), + ] + ), + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0x11), + ResolvedGroup::Union( + 1, + vec![ + ResolvedGroup::Single(0x12), + ResolvedGroup::Struct( + 1, + vec![ + ResolvedGroup::Single(0x13), + ResolvedGroup::Single(0x14), + ResolvedGroup::Single(0x15), + ] + ) + ] + ), + ] + ), + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0x16), + ResolvedGroup::Single(0x17), + ResolvedGroup::Union( + 2, + vec![ + ResolvedGroup::Single(0x18), + ResolvedGroup::Struct( + 2, + vec![ + ResolvedGroup::Single(0x19), + ResolvedGroup::Union( + 2, + vec![ + ResolvedGroup::Single(0x1a), + ResolvedGroup::Single(0x1b), + ] + ) + ] + ) + ] + ), + ] + ), + ResolvedGroup::Struct( + 0, + vec![ + ResolvedGroup::Single(0x1c), + ResolvedGroup::Single(0x1d), + ResolvedGroup::Single(0x1e), + ResolvedGroup::Single(0x1f), + ] + ), + ] + ), + ResolvedGroup::Single(0x20), + ResolvedGroup::Single(0x21), + ] + ) +} + +#[test] +fn test_bool_modifier() { + assert_eq!( + resolve_struct_groups(vec![ + MemberSize { + index: 0, + offset: 8, + width: 1, + }, + MemberSize { + index: 1, + offset: 12, + width: 8, + }, + MemberSize { + index: 2, + offset: 16, + width: 1, + }, + ]) + .unwrap_err() + .to_string(), + format!( + "Struct has overlapping member that cannot be resolved: {:#?}", + MemberSize { + index: 2, + offset: 16, + width: 1, + } + ) + ); +} + +/// Whoops I'm not in PDBParserInstance +fn log D, D: Display>(msg: F) { + // println!("{}", msg()); + if env::var("BN_DEBUG_PDB").is_ok() { + debug!("{}", msg()); + } +} diff --git a/rust/examples/pdb-ng/src/symbol_parser.rs b/rust/examples/pdb-ng/src/symbol_parser.rs new file mode 100644 index 0000000000..7f418747f2 --- /dev/null +++ b/rust/examples/pdb-ng/src/symbol_parser.rs @@ -0,0 +1,2025 @@ +// Copyright 2022-2024 Vector 35 Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{BTreeMap, HashMap, HashSet}; + +use anyhow::{anyhow, Result}; +use itertools::Itertools; +use pdb::register::Register::{AMD64, X86}; +use pdb::register::{AMD64Register, X86Register}; +use pdb::Error::UnimplementedSymbolKind; +use pdb::{ + AnnotationReferenceSymbol, BasePointerRelativeSymbol, BlockSymbol, BuildInfoSymbol, + CallSiteInfoSymbol, CompileFlagsSymbol, ConstantSymbol, DataReferenceSymbol, DataSymbol, + DefRangeFramePointerRelativeFullScopeSymbol, DefRangeFramePointerRelativeSymbol, + DefRangeRegisterRelativeSymbol, DefRangeRegisterSymbol, DefRangeSubFieldRegisterSymbol, + DefRangeSubFieldSymbol, DefRangeSymbol, ExportSymbol, FallibleIterator, FrameProcedureSymbol, + InlineSiteSymbol, LabelSymbol, LocalSymbol, MultiRegisterVariableSymbol, ObjNameSymbol, + ProcedureReferenceSymbol, ProcedureSymbol, PublicSymbol, RegisterRelativeSymbol, + RegisterVariableSymbol, Rva, SeparatedCodeSymbol, Source, Symbol, SymbolData, SymbolIndex, + SymbolIter, ThreadStorageSymbol, ThunkSymbol, TrampolineSymbol, TypeIndex, + UserDefinedTypeSymbol, UsingNamespaceSymbol, +}; + +use binaryninja::architecture::{Architecture, ArchitectureExt, Register}; +use binaryninja::binaryninjacore_sys::BNVariableSourceType; +use binaryninja::binaryview::BinaryViewBase; +use binaryninja::demangle::demangle_ms; +use binaryninja::rc::Ref; +use binaryninja::types::{ + max_confidence, min_confidence, Conf, ConfMergable, FunctionParameter, QualifiedName, + StructureBuilder, Type, TypeClass, Variable, +}; + +use crate::PDBParserInstance; + +const DEMANGLE_CONFIDENCE: u8 = 32; + +/// Parsed Data Symbol like globals, etc +#[derive(Debug, Clone)] +pub struct SymbolNames { + pub raw_name: String, + pub short_name: Option, + pub full_name: Option, +} + +/// Parsed Data Symbol like globals, etc +#[derive(Debug, Clone)] +pub struct ParsedDataSymbol { + /// If the symbol comes from the public symbol list (lower quality) + pub is_public: bool, + /// Absolute address in bv + pub address: u64, + /// Symbol name + pub name: SymbolNames, + /// Type if known + pub type_: Option>>, +} + +/// Parsed functions and function-y symbols +#[derive(Debug, Clone)] +pub struct ParsedProcedure { + /// If the symbol comes from the public symbol list (lower quality) + pub is_public: bool, + /// Absolute address in bv + pub address: u64, + /// Symbol name + pub name: SymbolNames, + /// Function type if known + pub type_: Option>>, + /// List of local variables (TODO: use these) + pub locals: Vec, +} + +/// Structure with some information about a procedure +#[derive(Debug, Clone)] +pub struct ParsedProcedureInfo { + /// Known parameters for the procedure + pub params: Vec, + /// Known local variables for the procedure + pub locals: Vec, +} + +/// One parsed variable / parameter +#[derive(Debug, Clone)] +pub struct ParsedVariable { + /// Variable name + pub name: String, + /// Variable type if known + pub type_: Option>>, + /// Location(s) where the variable is stored. PDB lets you store a variable in multiple locations + /// despite binja not really understanding that. Length is probably never zero + pub storage: Vec, + /// Do we think this is a parameter + pub is_param: bool, +} + +#[derive(Debug, Copy, Clone)] +pub struct ParsedLocation { + /// Location information + pub location: Variable, + /// Is the storage location relative to the base pointer? See [ParsedProcedureInfo.frame_offset] + pub base_relative: bool, + /// Is the storage location relative to the stack pointer? + pub stack_relative: bool, +} + +/// Big enum of all the types of symbols we know how to parse +#[derive(Debug, Clone)] +pub enum ParsedSymbol { + /// Parsed Data Symbol like globals, etc + Data(ParsedDataSymbol), + /// Parsed functions and function-y symbols + Procedure(ParsedProcedure), + /// Structure with some information about a procedure + ProcedureInfo(ParsedProcedureInfo), + /// One parsed variable / parameter + LocalVariable(ParsedVariable), + /// Location of a local variable + Location(ParsedLocation), +} + +/// This is all done in the parser instance namespace because the lifetimes are impossible to +/// wrangle otherwise. +impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> { + pub fn parse_symbols( + &mut self, + progress: Box Result<()> + '_>, + ) -> Result<(Vec, Vec)> { + let mut module_count = 0usize; + let dbg = self.pdb.debug_information()?; + let mut modules = dbg.modules()?; + while let Some(_module) = modules.next()? { + module_count += 1; + } + + let global_symbols = self.pdb.global_symbols()?; + let symbols = global_symbols.iter(); + let parsed = self.parse_mod_symbols(symbols)?; + for sym in parsed { + match &sym { + ParsedSymbol::Data(ParsedDataSymbol { + name: SymbolNames { raw_name, .. }, + .. + }) + | ParsedSymbol::Procedure(ParsedProcedure { + name: SymbolNames { raw_name, .. }, + .. + }) => { + self.parsed_symbols_by_name + .insert(raw_name.clone(), self.parsed_symbols.len()); + } + _ => {} + } + self.parsed_symbols.push(sym); + } + + (progress)(1, module_count + 1)?; + + let dbg = self.pdb.debug_information()?; + let mut modules = dbg.modules()?; + let mut i = 0; + while let Some(module) = modules.next()? { + i += 1; + (progress)(i + 1, module_count + 1)?; + + self.log(|| { + format!( + "Module {} {}", + module.module_name(), + module.object_file_name() + ) + }); + if let Some(module_info) = self.pdb.module_info(&module)? { + let symbols = module_info.symbols()?; + let parsed = self.parse_mod_symbols(symbols)?; + for sym in parsed { + match &sym { + ParsedSymbol::Data(ParsedDataSymbol { + name: SymbolNames { raw_name, .. }, + .. + }) + | ParsedSymbol::Procedure(ParsedProcedure { + name: SymbolNames { raw_name, .. }, + .. + }) => { + self.parsed_symbols_by_name + .insert(raw_name.clone(), self.parsed_symbols.len()); + } + _ => {} + } + self.parsed_symbols.push(sym); + } + } + } + + let use_public = + self.settings + .get_bool("pdb.features.loadGlobalSymbols", Some(self.bv), None); + + let mut best_symbols = HashMap::::new(); + for sym in &self.parsed_symbols { + match sym { + ParsedSymbol::Data(ParsedDataSymbol { + is_public, + address, + name: + SymbolNames { + raw_name, + full_name, + .. + }, + type_, + .. + }) => { + if *is_public && !use_public { + continue; + } + + let this_confidence = match type_ { + Some(Conf { confidence, .. }) => *confidence, + _ => min_confidence(), + }; + let (new_better, old_exists) = match best_symbols.get(raw_name) { + Some(ParsedSymbol::Data(ParsedDataSymbol { + type_: + Some(Conf { + confidence: old_conf, + .. + }), + .. + })) => (this_confidence > *old_conf, true), + Some(ParsedSymbol::Data(ParsedDataSymbol { type_: None, .. })) => { + (true, true) + } + Some(..) => (false, true), + _ => (true, false), + }; + if new_better { + self.log(|| { + format!( + "New best symbol (at 0x{:x}) for `{}` / `{}`: {:?}", + *address, + raw_name, + full_name.as_ref().unwrap_or(raw_name), + sym + ) + }); + if old_exists { + self.log(|| format!("Clobbering old definition")); + } + best_symbols.insert(raw_name.clone(), sym); + } + } + _ => {} + } + } + + let mut best_functions = HashMap::::new(); + for sym in &self.parsed_symbols { + match sym { + ParsedSymbol::Procedure(ParsedProcedure { + is_public, + address, + name: + SymbolNames { + raw_name, + full_name, + .. + }, + type_, + .. + }) => { + if *is_public && !use_public { + continue; + } + + let this_confidence = match type_ { + Some(Conf { confidence, .. }) => *confidence, + _ => min_confidence(), + }; + let (new_better, old_exists) = match best_functions.get(raw_name) { + Some(ParsedSymbol::Procedure(ParsedProcedure { + type_: + Some(Conf { + confidence: old_conf, + .. + }), + .. + })) => (this_confidence > *old_conf, true), + Some(ParsedSymbol::Procedure(ParsedProcedure { type_: None, .. })) => { + (true, true) + } + Some(..) => (false, true), + _ => (true, false), + }; + if new_better { + self.log(|| { + format!( + "New best function (at 0x{:x}) for `{}` / `{}`: {:?}", + *address, + raw_name, + full_name.as_ref().unwrap_or(raw_name), + sym + ) + }); + if old_exists { + self.log(|| format!("Clobbering old definition")); + } + best_functions.insert(raw_name.clone(), sym); + } + } + _ => {} + } + } + + Ok(( + best_symbols + .into_iter() + .map(|(_, sym)| sym.clone()) + .sorted_by_key(|sym| match sym { + ParsedSymbol::Data(ParsedDataSymbol { type_, .. }) => { + type_.as_ref().map(|ty| ty.confidence).unwrap_or(0) + } + ParsedSymbol::Procedure(ParsedProcedure { type_, .. }) => { + type_.as_ref().map(|ty| ty.confidence).unwrap_or(0) + } + _ => 0, + }) + .collect::>(), + best_functions + .into_iter() + .map(|(_, func)| func.clone()) + .sorted_by_key(|sym| match sym { + ParsedSymbol::Data(ParsedDataSymbol { type_, .. }) => { + type_.as_ref().map(|ty| ty.confidence).unwrap_or(0) + } + ParsedSymbol::Procedure(ParsedProcedure { type_, .. }) => { + type_.as_ref().map(|ty| ty.confidence).unwrap_or(0) + } + _ => 0, + }) + .collect::>(), + )) + } + + /// Parse all the symbols in a module, via the given SymbolIter + pub fn parse_mod_symbols(&mut self, mut symbols: SymbolIter) -> Result> { + // Collect tree structure first + let mut first = None; + let mut last_local = None; + let mut top_level_syms = vec![]; + let mut thunk_syms = vec![]; + let mut unparsed_syms = BTreeMap::new(); + while let Some(sym) = symbols.next()? { + if first.is_none() { + first = Some(sym.index()); + } + unparsed_syms.insert(sym.index(), sym); + + let p = sym.parse(); + self.log(|| format!("Parsed: {:x?}", p)); + + // It's some sort of weird tree structure where SOME symbols have "end" indices + // and anything between them and that index is a child symbol + // Sometimes there are "end scope" symbols at those end indices but like, sometimes + // there aren't? Which makes that entire system seem pointless (or I'm just missing + // something and it makes sense to _someone_) + if let Some(&(start, _end)) = self.symbol_stack.last() { + self.add_symbol_child(start, sym.index()); + } else { + // Place thunk symbols in their own list at the end, so they can reference + // other symbols parsed in the module + match &p { + Ok(SymbolData::Thunk(_)) => { + thunk_syms.push(sym.index()); + } + _ => { + top_level_syms.push(sym.index()); + } + } + } + let mut popped = false; + while let Some(&(_start, end)) = self.symbol_stack.last() { + if sym.index().0 >= end.0 { + let _ = self.symbol_stack.pop(); + popped = true; + } else { + break; + } + } + + // These aren't actually used for parsing (I don't trust them) but we can include a little + // debug error check here and see if it's ever actually wrong + match p { + Ok(SymbolData::ScopeEnd) | Ok(SymbolData::InlineSiteEnd) if popped => {} + Ok(SymbolData::ScopeEnd) | Ok(SymbolData::InlineSiteEnd) if !popped => { + self.log(|| format!("Did not pop at a scope end??? WTF??")); + } + _ if popped => { + self.log(|| format!("Popped but not at a scope end??? WTF??")); + } + _ => {} + } + + // Push new scopes on the stack to build the tree + match p { + Ok(SymbolData::Procedure(data)) => { + self.symbol_stack.push((sym.index(), data.end)); + } + Ok(SymbolData::InlineSite(data)) => { + self.symbol_stack.push((sym.index(), data.end)); + } + Ok(SymbolData::Block(data)) => { + self.symbol_stack.push((sym.index(), data.end)); + } + Ok(SymbolData::Thunk(data)) => { + self.symbol_stack.push((sym.index(), data.end)); + } + Ok(SymbolData::SeparatedCode(data)) => { + self.symbol_stack.push((sym.index(), data.end)); + } + Ok(SymbolData::FrameProcedure(..)) => { + if let Some(&(_, proc_end)) = self.symbol_stack.last() { + self.symbol_stack.push((sym.index(), proc_end)); + } + } + Ok(SymbolData::Local(..)) => { + last_local = Some(sym.index()); + } + Ok(SymbolData::DefRange(..)) + | Ok(SymbolData::DefRangeSubField(..)) + | Ok(SymbolData::DefRangeRegister(..)) + | Ok(SymbolData::DefRangeFramePointerRelative(..)) + | Ok(SymbolData::DefRangeFramePointerRelativeFullScope(..)) + | Ok(SymbolData::DefRangeSubFieldRegister(..)) + | Ok(SymbolData::DefRangeRegisterRelative(..)) => { + // I'd like to retract my previous statement that someone could possibly + // understand this: + // These symbol types impact the previous symbol, if it was a local + // BUT ALSO!! PART III REVENGE OF THE SYM-TH: You can have more than one of + // these and they all (?? it's undocumented) apply to the last local, PROBABLY + if let Some(last) = last_local { + self.add_symbol_child(last, sym.index()); + } else { + self.log(|| format!("Found def range with no last local: {:?}", p)); + } + } + _ => {} + } + } + assert!(self.symbol_stack.is_empty()); + // Add thunks at the end as per above + top_level_syms.extend(thunk_syms.into_iter()); + + // Restart and do the processing for real this time + if let Some(first) = first { + symbols.seek(first); + } + + let mut final_symbols = HashSet::new(); + + for root_idx in top_level_syms { + for child_idx in self.walk_children(root_idx).into_iter() { + let &sym = unparsed_syms + .get(&child_idx) + .expect("should have parsed this"); + + self.log(|| format!("Symbol {:?} ", sym.index())); + let (name, address) = + if let Some(parsed) = self.handle_symbol_index(sym.index(), sym)? { + final_symbols.insert(sym.index()); + match parsed { + ParsedSymbol::Data(ParsedDataSymbol { name, address, .. }) => { + (Some(name.clone()), Some(*address)) + } + ParsedSymbol::Procedure(ParsedProcedure { name, address, .. }) => { + (Some(name.clone()), Some(*address)) + } + _ => (None, None), + } + } else { + (None, None) + }; + + if let Some(name) = name { + self.named_symbols.insert(name.raw_name, sym.index()); + } + if let Some(address) = address { + if !self.addressed_symbols.contains_key(&address) { + self.addressed_symbols.insert(address, vec![]); + } + self.addressed_symbols + .get_mut(&address) + .expect("just created this") + .push( + self.indexed_symbols + .get(&sym.index()) + .ok_or_else(|| anyhow!("Can't find sym {} ?", sym.index()))? + .clone(), + ); + } + } + } + + let filtered_symbols = self + .indexed_symbols + .drain() + .filter_map(|(idx, sym)| { + if final_symbols.contains(&idx) { + Some(sym) + } else { + None + } + }) + .collect::>(); + + // The symbols overlap between modules or something, so we can't keep this info around + self.symbol_tree.clear(); + self.module_cpu_type = None; + + Ok(filtered_symbols) + } + + /// Set a symbol to be the parent of another, building the symbol tree + fn add_symbol_child(&mut self, parent: SymbolIndex, child: SymbolIndex) { + if let Some(tree) = self.symbol_tree.get_mut(&parent) { + tree.push(child); + } else { + self.symbol_tree.insert(parent, Vec::from([child])); + } + + self.symbol_parents.insert(child, parent); + } + + /// Postorder traversal of children of symbol index (only during this module parse) + fn walk_children(&self, sym: SymbolIndex) -> Vec { + let mut children = vec![]; + + if let Some(tree) = self.symbol_tree.get(&sym) { + for &child in tree { + children.extend(self.walk_children(child).into_iter()); + } + } + + children.push(sym); + return children; + } + + /// Direct children of symbol index (only during this module parse) + fn symbol_children(&self, sym: SymbolIndex) -> Vec { + if let Some(tree) = self.symbol_tree.get(&sym) { + tree.clone() + } else { + vec![] + } + } + + /// Direct parent of symbol index (only during this module parse) + #[allow(dead_code)] + fn symbol_parent(&self, sym: SymbolIndex) -> Option { + self.symbol_parents.get(&sym).map(|idx| *idx) + } + + /// Find symbol by index (only during this module parse) + fn lookup_symbol(&self, sym: &SymbolIndex) -> Option<&ParsedSymbol> { + self.indexed_symbols.get(sym) + } + + /// Parse a new symbol by its index + fn handle_symbol_index( + &mut self, + idx: SymbolIndex, + sym: Symbol, + ) -> Result> { + if let None = self.indexed_symbols.get(&idx) { + match sym.parse() { + Ok(data) => match self.handle_symbol(idx, &data) { + Ok(Some(parsed)) => { + self.log(|| format!("Symbol {} parsed into: {:?}", idx, parsed)); + match &parsed { + _ => {} + } + self.indexed_symbols.insert(idx, parsed.clone()); + } + Ok(None) => {} + e => { + self.log(|| format!("Error parsing symbol {}: {:?}", idx, e)); + } + }, + Err(UnimplementedSymbolKind(k)) => { + self.log(|| format!("Not parsing unimplemented symbol {}: kind {:x?}", idx, k)); + } + Err(e) => { + self.log(|| format!("Could not parse symbol: {}: {}", idx, e)); + } + }; + } + + Ok(self.indexed_symbols.get(&idx)) + } + + /// Parse a new symbol's data + fn handle_symbol( + &mut self, + index: SymbolIndex, + data: &SymbolData, + ) -> Result> { + match data { + SymbolData::ScopeEnd => self.handle_scope_end_symbol(index), + SymbolData::ObjName(data) => self.handle_obj_name_symbol(index, &data), + SymbolData::RegisterVariable(data) => { + self.handle_register_variable_symbol(index, &data) + } + SymbolData::Constant(data) => self.handle_constant_symbol(index, &data), + SymbolData::UserDefinedType(data) => self.handle_user_defined_type_symbol(index, &data), + SymbolData::MultiRegisterVariable(data) => { + self.handle_multi_register_variable_symbol(index, &data) + } + SymbolData::Data(data) => self.handle_data_symbol(index, &data), + SymbolData::Public(data) => self.handle_public_symbol(index, &data), + SymbolData::Procedure(data) => self.handle_procedure_symbol(index, &data), + SymbolData::ThreadStorage(data) => self.handle_thread_storage_symbol(index, &data), + SymbolData::CompileFlags(data) => self.handle_compile_flags_symbol(index, &data), + SymbolData::UsingNamespace(data) => self.handle_using_namespace_symbol(index, &data), + SymbolData::ProcedureReference(data) => { + self.handle_procedure_reference_symbol(index, &data) + } + SymbolData::DataReference(data) => self.handle_data_reference_symbol(index, &data), + SymbolData::AnnotationReference(data) => { + self.handle_annotation_reference_symbol(index, &data) + } + SymbolData::Trampoline(data) => self.handle_trampoline_symbol(index, &data), + SymbolData::Export(data) => self.handle_export_symbol(index, &data), + SymbolData::Local(data) => self.handle_local_symbol(index, &data), + SymbolData::BuildInfo(data) => self.handle_build_info_symbol(index, &data), + SymbolData::InlineSite(data) => self.handle_inline_site_symbol(index, &data), + SymbolData::InlineSiteEnd => self.handle_inline_site_end_symbol(index), + SymbolData::ProcedureEnd => self.handle_procedure_end_symbol(index), + SymbolData::Label(data) => self.handle_label_symbol(index, &data), + SymbolData::Block(data) => self.handle_block_symbol(index, &data), + SymbolData::RegisterRelative(data) => { + self.handle_register_relative_symbol(index, &data) + } + SymbolData::Thunk(data) => self.handle_thunk_symbol(index, &data), + SymbolData::SeparatedCode(data) => self.handle_separated_code_symbol(index, &data), + SymbolData::DefRange(data) => self.handle_def_range(index, &data), + SymbolData::DefRangeSubField(data) => self.handle_def_range_sub_field(index, &data), + SymbolData::DefRangeRegister(data) => self.handle_def_range_register(index, &data), + SymbolData::DefRangeFramePointerRelative(data) => { + self.handle_def_range_frame_pointer_relative_symbol(index, &data) + } + SymbolData::DefRangeFramePointerRelativeFullScope(data) => { + self.handle_def_range_frame_pointer_relative_full_scope_symbol(index, &data) + } + SymbolData::DefRangeSubFieldRegister(data) => { + self.handle_def_range_sub_field_register_symbol(index, &data) + } + SymbolData::DefRangeRegisterRelative(data) => { + self.handle_def_range_register_relative_symbol(index, &data) + } + SymbolData::BasePointerRelative(data) => { + self.handle_base_pointer_relative_symbol(index, &data) + } + SymbolData::FrameProcedure(data) => self.handle_frame_procedure_symbol(index, &data), + SymbolData::CallSiteInfo(data) => self.handle_call_site_info(index, &data), + e => Err(anyhow!("Unhandled symbol type {:?}", e)), + } + } + + fn handle_scope_end_symbol(&mut self, _index: SymbolIndex) -> Result> { + self.log(|| format!("Got ScopeEnd symbol")); + Ok(None) + } + + fn handle_obj_name_symbol( + &mut self, + _index: SymbolIndex, + data: &ObjNameSymbol, + ) -> Result> { + self.log(|| format!("Got ObjName symbol: {:?}", data)); + Ok(None) + } + + fn handle_register_variable_symbol( + &mut self, + _index: SymbolIndex, + data: &RegisterVariableSymbol, + ) -> Result> { + self.log(|| format!("Got RegisterVariable symbol: {:?}", data)); + + let storage = if let Some(reg) = self.convert_register(data.register) { + vec![ParsedLocation { + location: Variable { + t: BNVariableSourceType::RegisterVariableSourceType, + index: 0, + storage: reg, + }, + base_relative: false, + stack_relative: false, + }] + } else { + // TODO: What do we do here? + vec![] + }; + + Ok(Some(ParsedSymbol::LocalVariable(ParsedVariable { + name: data.name.to_string().to_string(), + type_: self.lookup_type_conf(&data.type_index, false)?, + storage, + is_param: data.slot.map_or(true, |slot| slot > 0), + }))) + } + + fn handle_constant_symbol( + &mut self, + _index: SymbolIndex, + data: &ConstantSymbol, + ) -> Result> { + self.log(|| format!("Got Constant symbol: {:?}", data)); + Ok(None) + } + + fn handle_user_defined_type_symbol( + &mut self, + _index: SymbolIndex, + data: &UserDefinedTypeSymbol, + ) -> Result> { + self.log(|| format!("Got UserDefinedType symbol: {:?}", data)); + Ok(None) + } + + fn handle_multi_register_variable_symbol( + &mut self, + _index: SymbolIndex, + data: &MultiRegisterVariableSymbol, + ) -> Result> { + self.log(|| format!("Got MultiRegisterVariable symbol: {:?}", data)); + Ok(None) + } + + fn handle_data_symbol( + &mut self, + _index: SymbolIndex, + data: &DataSymbol, + ) -> Result> { + self.log(|| format!("Got Data symbol: {:?}", data)); + + let rva = data.offset.to_rva(&self.address_map).unwrap_or_default(); + let raw_name = data.name.to_string().to_string(); + let (t, name) = self.demangle_to_type(&raw_name, rva)?; + let name = name.map(|n| n.string()); + + // Sometimes the demangler REALLY knows what type this is supposed to be, and the + // data symbol is actually wrong. So in those cases, let the demangler take precedence + // Otherwise-- the demangler is usually wrong and clueless + let data_type = t.merge(self.lookup_type_conf(&data.type_index, false)?); + + // Ignore symbols with no name and no type + if !self + .settings + .get_bool("pdb.features.allowUnnamedVoidSymbols", Some(self.bv), None) + && name.is_none() + { + if let Some(ty) = &data_type { + if ty.contents.type_class() == TypeClass::VoidTypeClass { + return Ok(None); + } + } else { + return Ok(None); + } + } + + let name = SymbolNames { + raw_name, + short_name: name.clone(), + full_name: name, + }; + + self.log(|| { + format!( + "DATA: 0x{:x}: {:?} {:?}", + self.bv.start() + rva.0 as u64, + &name, + &data_type + ) + }); + + Ok(Some(ParsedSymbol::Data(ParsedDataSymbol { + is_public: false, + address: self.bv.start() + rva.0 as u64, + name, + type_: data_type, + }))) + } + + fn handle_public_symbol( + &mut self, + _index: SymbolIndex, + data: &PublicSymbol, + ) -> Result> { + self.log(|| format!("Got Public symbol: {:?}", data)); + let rva = data.offset.to_rva(&self.address_map).unwrap_or_default(); + let raw_name = data.name.to_string().to_string(); + let (t, name) = self.demangle_to_type(&raw_name, rva)?; + let name = name.map(|n| n.string()); + + let name = SymbolNames { + raw_name, + short_name: name.clone(), + full_name: name, + }; + + // These are generally low confidence because we only have the demangler to inform us of type + + if data.function { + self.log(|| { + format!( + "PUBLIC FUNCTION: 0x{:x}: {:?} {:?}", + self.bv.start() + rva.0 as u64, + &name, + t + ) + }); + + Ok(Some(ParsedSymbol::Procedure(ParsedProcedure { + is_public: true, + address: self.bv.start() + rva.0 as u64, + name, + type_: t, + locals: vec![], + }))) + } else { + self.log(|| { + format!( + "PUBLIC DATA: 0x{:x}: {:?} {:?}", + self.bv.start() + rva.0 as u64, + &name, + t + ) + }); + + Ok(Some(ParsedSymbol::Data(ParsedDataSymbol { + is_public: true, + address: self.bv.start() + rva.0 as u64, + name, + type_: t, + }))) + } + } + + /// Given a proc symbol index and guessed type (from demangler or tpi), find all the local variables + /// and parameters related to that symbol. + /// Returns Ok(Some((resolved params, locals)))) + fn lookup_locals( + &self, + index: SymbolIndex, + type_index: TypeIndex, + demangled_type: Option>>, + ) -> Result<(Option>>, Vec)> { + // So generally speaking, here's the information we have: + // - The function type is usually accurate wrt the parameter locations + // - The parameter symbols have the names we want for the params + // - The parameter symbols are a big ugly mess + // We basically want to take the function type from the type, and just fill in the + // names of all the parameters. Non-param locals don't really matter since binja + // can't handle them anyway. + + // Type parameters order needs to be like this: + // 1. `this` pointer (if exists) + // 2. Various stack params + // 3. Various register params + // We assume that if a parameter is found in a register, that is where it is passed. + // Otherwise they are in the default order as per the CC + + // Get child objects and search for local variable names + let mut locals = vec![]; + let mut params = vec![]; + let mut known_frame = false; + for child in self.symbol_children(index) { + match self.lookup_symbol(&child) { + Some(ParsedSymbol::ProcedureInfo(info)) => { + params = info.params.clone(); + locals = info.locals.clone(); + known_frame = true; + } + _ => {} + } + } + + let raw_type = self.lookup_type_conf(&type_index, false)?; + let fancy_type = self.lookup_type_conf(&type_index, true)?; + + // Best guess so far in case of error handling + let fancier_type = fancy_type + .clone() + .merge(raw_type.clone()) + .merge(demangled_type.clone()); + + if !known_frame { + return Ok((fancier_type, vec![])); + } + + // We need both of these to exist (not sure why they wouldn't) + let (raw_type, fancy_type) = match (raw_type, fancy_type) { + (Some(raw), Some(fancy)) => (raw, fancy), + _ => return Ok((fancier_type, vec![])), + }; + + let raw_params = raw_type + .contents + .parameters() + .map_err(|_| anyhow!("no params"))?; + let mut fancy_params = fancy_type + .contents + .parameters() + .map_err(|_| anyhow!("no params"))?; + + // Collect all the parameters we are expecting from the symbols + let mut parsed_params = vec![]; + for p in ¶ms { + let param = FunctionParameter::new( + p.type_.clone().merge(Conf::new( + Type::int(self.arch.address_size(), false), + min_confidence(), + )), + p.name.clone(), + p.storage.get(0).map(|loc| loc.location.clone()), + ); + // Ignore thisptr because it's not technically part of the raw type signature + if p.name != "this" { + parsed_params.push(param); + } + } + let mut parsed_locals = vec![]; + for p in &locals { + let param = FunctionParameter::new( + p.type_.clone().merge(Conf::new( + Type::int(self.arch.address_size(), false), + min_confidence(), + )), + p.name.clone(), + p.storage.get(0).map(|loc| loc.location.clone()), + ); + // Ignore thisptr because it's not technically part of the raw type signature + if p.name != "this" { + parsed_locals.push(param); + } + } + + self.log(|| format!("Raw params: {:#x?}", raw_params)); + self.log(|| format!("Fancy params: {:#x?}", fancy_params)); + self.log(|| format!("Parsed params: {:#x?}", parsed_params)); + + // We expect one parameter for each unnamed parameter in the marked up type + let expected_param_count = fancy_params + .iter() + .filter(|p| p.name.as_str().is_empty()) + .count(); + // Sanity + if expected_param_count != raw_params.len() { + return Err(anyhow!( + "Mismatched number of formal parameters and interpreted parameters" + )); + } + + // If we don't have enough parameters to fill the slots, there's a problem here + // So just fallback to the unnamed params + if expected_param_count > parsed_params.len() { + // As per reversing of msdia140.dll (and nowhere else): if a function doesn't have + // enough parameter variables declared as parameters, the remaining parameters are + // the first however many locals. If you don't have enough of those, idk?? + if expected_param_count > (parsed_params.len() + parsed_locals.len()) { + return Ok((fancier_type, vec![])); + } + parsed_params.extend(parsed_locals.into_iter()); + } + let expected_parsed_params = parsed_params + .drain(0..expected_param_count) + .collect::>(); + + // For all formal parameters, apply names to them in fancy_params + // These should be all types in fancy_params that are unnamed (named ones we inserted) + + let mut i = 0; + for p in fancy_params.iter_mut() { + if p.name.as_str().is_empty() { + if p.t.contents != expected_parsed_params[i].t.contents { + self.log(|| { + format!( + "Suspicious parameter {}: {:?} vs {:?}", + i, p, expected_parsed_params[i] + ) + }); + } + if expected_parsed_params[i].name.as_str() == "__formal" { + p.name = format!("__formal{}", i); + } else { + p.name = expected_parsed_params[i].name.clone(); + } + i += 1; + } + } + + // Now apply the default location for the params from the cc + let cc = fancy_type + .contents + .calling_convention() + .map_or_else(|_| Conf::new(self.default_cc.clone(), 0), |cc| cc); + + self.log(|| { + format!( + "Type calling convention: {:?}", + fancy_type.contents.calling_convention() + ) + }); + self.log(|| format!("Default calling convention: {:?}", self.default_cc)); + self.log(|| format!("Result calling convention: {:?}", cc)); + + let locations = cc.contents.variables_for_parameters(&fancy_params, None); + for (p, new_location) in fancy_params.iter_mut().zip(locations.into_iter()) { + p.location = Some(new_location); + } + + self.log(|| format!("Final params: {:#x?}", fancy_params)); + + // Use the new locals we've parsed to make the Real Definitely True function type + let fancy_type = Conf::new( + Type::function_with_options( + &fancy_type + .contents + .return_value() + .map_err(|_| anyhow!("no ret"))?, + fancy_params.as_slice(), + fancy_type.contents.has_variable_arguments().contents, + &cc, + fancy_type.contents.stack_adjustment(), + ), + max_confidence(), + ); + + let fancier_type = fancy_type + .clone() + .merge(raw_type.clone()) + .merge(demangled_type.clone()); + + self.log(|| format!("Raw type: {:#x?}", raw_type)); + self.log(|| format!("Demangled type: {:#x?}", demangled_type)); + self.log(|| format!("Fancy type: {:#x?}", fancy_type)); + self.log(|| format!("Result type: {:#x?}", fancier_type)); + + Ok((Some(fancier_type), vec![])) + } + + fn handle_procedure_symbol( + &mut self, + index: SymbolIndex, + data: &ProcedureSymbol, + ) -> Result> { + self.log(|| format!("Got Procedure symbol: {:?}", data)); + + let rva = data.offset.to_rva(&self.address_map).unwrap_or_default(); + let address = self.bv.start() + rva.0 as u64; + + let mut raw_name = data.name.to_string().to_string(); + + // Generally proc symbols have real types, but use the demangler just in case the microsoft + // public pdbs have the function type as `void` + let (t, name) = self.demangle_to_type(&raw_name, rva)?; + let mut name = name.map(|n| n.string()); + + // Some proc symbols don't have a mangled name, so try and look up their name + if name.is_none() || name.as_ref().expect("just failed none") == &raw_name { + // Lookup public symbol with the same name + if let Some(others) = self.addressed_symbols.get(&address) { + for o in others { + match o { + ParsedSymbol::Procedure(ParsedProcedure { + name: proc_name, .. + }) => { + if proc_name.full_name.as_ref().unwrap_or(&proc_name.raw_name) + == &raw_name + { + name = Some(raw_name); + raw_name = proc_name.raw_name.clone(); + break; + } + } + _ => {} + } + } + } + } + + let (fn_type, locals) = self.lookup_locals(index, data.type_index, t)?; + + let name = SymbolNames { + raw_name, + short_name: name.clone(), + full_name: name, + }; + + self.log(|| format!("PROC: 0x{:x}: {:?} {:?}", address, &name, &fn_type)); + + Ok(Some(ParsedSymbol::Procedure(ParsedProcedure { + is_public: false, + address, + name, + type_: fn_type, + locals, + }))) + } + + fn handle_thread_storage_symbol( + &mut self, + _index: SymbolIndex, + data: &ThreadStorageSymbol, + ) -> Result> { + self.log(|| format!("Got ThreadStorage symbol: {:?}", data)); + Ok(None) + } + + fn handle_compile_flags_symbol( + &mut self, + _index: SymbolIndex, + data: &CompileFlagsSymbol, + ) -> Result> { + self.log(|| format!("Got CompileFlags symbol: {:?}", data)); + self.module_cpu_type = Some(data.cpu_type); + Ok(None) + } + + fn handle_using_namespace_symbol( + &mut self, + _index: SymbolIndex, + data: &UsingNamespaceSymbol, + ) -> Result> { + self.log(|| format!("Got UsingNamespace symbol: {:?}", data)); + Ok(None) + } + + fn handle_procedure_reference_symbol( + &mut self, + _index: SymbolIndex, + data: &ProcedureReferenceSymbol, + ) -> Result> { + self.log(|| format!("Got ProcedureReference symbol: {:?}", data)); + Ok(None) + } + + fn handle_data_reference_symbol( + &mut self, + _index: SymbolIndex, + data: &DataReferenceSymbol, + ) -> Result> { + self.log(|| format!("Got DataReference symbol: {:?}", data)); + Ok(None) + } + + fn handle_annotation_reference_symbol( + &mut self, + _index: SymbolIndex, + data: &AnnotationReferenceSymbol, + ) -> Result> { + self.log(|| format!("Got AnnotationReference symbol: {:?}", data)); + Ok(None) + } + + fn handle_trampoline_symbol( + &mut self, + _index: SymbolIndex, + data: &TrampolineSymbol, + ) -> Result> { + self.log(|| format!("Got Trampoline symbol: {:?}", data)); + let rva = data.thunk.to_rva(&self.address_map).unwrap_or_default(); + let target_rva = data.target.to_rva(&self.address_map).unwrap_or_default(); + + let address = self.bv.start() + rva.0 as u64; + let target_address = self.bv.start() + target_rva.0 as u64; + + let mut target_name = None; + let mut thunk_name = None; + + let mut fn_type: Option>> = None; + + // These have the same name as their target, so look that up + if let Some(syms) = self.addressed_symbols.get(&target_address) { + // Take name from the public symbol + for sym in syms { + match sym { + ParsedSymbol::Procedure(proc) if proc.is_public => { + fn_type = proc.type_.clone().merge(fn_type); + target_name = Some(proc.name.clone()); + } + _ => {} + } + } + // Take type from the non-public symbol if we have one + for sym in syms { + match sym { + ParsedSymbol::Procedure(proc) if !proc.is_public => { + fn_type = proc.type_.clone().merge(fn_type); + if target_name.is_none() { + target_name = Some(proc.name.clone()); + } + } + _ => {} + } + } + } + + // And handle the fact that pdb public symbols for trampolines have the name of their target + // ugh + if let Some(syms) = self.addressed_symbols.get_mut(&address) { + if let [ParsedSymbol::Procedure(proc)] = syms.as_mut_slice() { + if let Some(tn) = &target_name { + if proc.name.raw_name == tn.raw_name + || proc.name.full_name.as_ref().unwrap_or(&proc.name.raw_name) + == tn.full_name.as_ref().unwrap_or(&tn.raw_name) + { + // Yeah it's one of these symbols + let old_name = proc.name.clone(); + let new_name = SymbolNames { + raw_name: "j_".to_string() + &old_name.raw_name, + short_name: old_name.short_name.as_ref().map(|n| "j_".to_string() + n), + full_name: old_name.full_name.as_ref().map(|n| "j_".to_string() + n), + }; + + // I'm so sorry about this + // XXX: Update the parsed public symbol's name to use j_ syntax + if let Some(idx) = self.named_symbols.remove(&old_name.raw_name) { + self.named_symbols.insert(new_name.raw_name.clone(), idx); + } + if let Some(idx) = self.parsed_symbols_by_name.remove(&old_name.raw_name) { + self.parsed_symbols_by_name + .insert(new_name.raw_name.clone(), idx); + match &mut self.parsed_symbols[idx] { + ParsedSymbol::Data(ParsedDataSymbol { + name: parsed_name, .. + }) + | ParsedSymbol::Procedure(ParsedProcedure { + name: parsed_name, + .. + }) => { + parsed_name.raw_name = new_name.raw_name.clone(); + parsed_name.short_name = new_name.short_name.clone(); + parsed_name.full_name = new_name.full_name.clone(); + } + _ => {} + } + } + proc.name = new_name.clone(); + thunk_name = Some(new_name); + } + } + } + } + + if thunk_name.is_none() { + if let Some(tn) = target_name { + thunk_name = Some(SymbolNames { + raw_name: "j_".to_string() + &tn.raw_name, + short_name: tn.short_name.as_ref().map(|n| "j_".to_string() + n), + full_name: tn.full_name.as_ref().map(|n| "j_".to_string() + n), + }); + } + } + + let name = thunk_name.unwrap_or(SymbolNames { + raw_name: format!("j_sub_{:x}", target_address), + short_name: None, + full_name: None, + }); + + self.log(|| format!("TRAMPOLINE: 0x{:x}: {:?} {:?}", address, &name, &fn_type)); + + Ok(Some(ParsedSymbol::Procedure(ParsedProcedure { + is_public: false, + address, + name, + type_: fn_type, + locals: vec![], + }))) + } + + fn handle_export_symbol( + &mut self, + _index: SymbolIndex, + data: &ExportSymbol, + ) -> Result> { + self.log(|| format!("Got Export symbol: {:?}", data)); + Ok(None) + } + + fn handle_local_symbol( + &mut self, + index: SymbolIndex, + data: &LocalSymbol, + ) -> Result> { + self.log(|| format!("Got Local symbol: {:?}", data)); + // Look for definition ranges for this symbol + let mut locations = vec![]; + for child in self.symbol_children(index) { + match self.lookup_symbol(&child) { + Some(ParsedSymbol::Location(loc)) => { + locations.push(loc.clone()); + } + _ => {} + } + } + + Ok(Some(ParsedSymbol::LocalVariable(ParsedVariable { + name: data.name.to_string().to_string(), + type_: self.lookup_type_conf(&data.type_index, false)?, + storage: locations, + is_param: data.flags.isparam, + }))) + } + + fn handle_build_info_symbol( + &mut self, + _index: SymbolIndex, + data: &BuildInfoSymbol, + ) -> Result> { + self.log(|| format!("Got BuildInfo symbol: {:?}", data)); + Ok(None) + } + + fn handle_inline_site_symbol( + &mut self, + _index: SymbolIndex, + data: &InlineSiteSymbol, + ) -> Result> { + self.log(|| format!("Got InlineSite symbol: {:?}", data)); + Ok(None) + } + + fn handle_inline_site_end_symbol( + &mut self, + _index: SymbolIndex, + ) -> Result> { + self.log(|| format!("Got InlineSiteEnd symbol")); + Ok(None) + } + + fn handle_procedure_end_symbol(&mut self, _index: SymbolIndex) -> Result> { + self.log(|| format!("Got ProcedureEnd symbol")); + Ok(None) + } + + fn handle_label_symbol( + &mut self, + _index: SymbolIndex, + data: &LabelSymbol, + ) -> Result> { + self.log(|| format!("Got Label symbol: {:?}", data)); + Ok(None) + } + + fn handle_block_symbol( + &mut self, + _index: SymbolIndex, + data: &BlockSymbol, + ) -> Result> { + self.log(|| format!("Got Block symbol: {:?}", data)); + Ok(None) + } + + fn handle_register_relative_symbol( + &mut self, + _index: SymbolIndex, + data: &RegisterRelativeSymbol, + ) -> Result> { + self.log(|| format!("Got RegisterRelative symbol: {:?}", data)); + match self.lookup_register(data.register) { + Some(X86(X86Register::EBP)) | Some(AMD64(AMD64Register::RBP)) => { + // Local is relative to base pointer + // This is just another way of writing BasePointerRelativeSymbol + Ok(Some(ParsedSymbol::LocalVariable(ParsedVariable { + name: data.name.to_string().to_string(), + type_: self.lookup_type_conf(&data.type_index, false)?, + storage: vec![ParsedLocation { + location: Variable { + t: BNVariableSourceType::StackVariableSourceType, + index: 0, + storage: data.offset as i64, + }, + base_relative: true, // !! + stack_relative: false, // !! + }], + is_param: data.slot.map_or(false, |slot| slot > 0), + }))) + } + Some(X86(X86Register::ESP)) | Some(AMD64(AMD64Register::RSP)) => { + // Local is relative to stack pointer + // This is the same as base pointer case except not base relative (ofc) + Ok(Some(ParsedSymbol::LocalVariable(ParsedVariable { + name: data.name.to_string().to_string(), + type_: self.lookup_type_conf(&data.type_index, false)?, + storage: vec![ParsedLocation { + location: Variable { + t: BNVariableSourceType::StackVariableSourceType, + index: 0, + storage: data.offset as i64, + }, + base_relative: false, // !! + stack_relative: true, // !! + }], + is_param: data.slot.map_or(false, |slot| slot > 0), + }))) + } + _ => { + // Local is relative to some non-bp register. + // This is, of course, totally possible and normal + // Binja just can't handle it in the slightest. + // Soooooooo ???? + // TODO + Ok(None) + } + } + } + + fn handle_thunk_symbol( + &mut self, + _index: SymbolIndex, + data: &ThunkSymbol, + ) -> Result> { + self.log(|| format!("Got Thunk symbol: {:?}", data)); + let rva = data.offset.to_rva(&self.address_map).unwrap_or_default(); + let raw_name = data.name.to_string().to_string(); + let address = self.bv.start() + rva.0 as u64; + + let (t, name) = self.demangle_to_type(&raw_name, rva)?; + let name = name.map(|n| n.string()); + let mut fn_type = t; + + // These have the same name as their target, so look that up + if let Some(&idx) = self.named_symbols.get(&raw_name) { + if let Some(ParsedSymbol::Procedure(proc)) = self.indexed_symbols.get(&idx) { + fn_type = proc.type_.clone().merge(fn_type); + } + } + + let mut thunk_name = None; + + // And handle the fact that pdb public symbols for thunks have the name of their target + // ugh + if let Some(syms) = self.addressed_symbols.get_mut(&address) { + if let [ParsedSymbol::Procedure(proc)] = syms.as_mut_slice() { + // Yeah it's one of these symbols + // Make sure we don't do this twice (does that even happen?) + if !proc.name.raw_name.starts_with("j_") { + let old_name = proc.name.clone(); + let new_name = SymbolNames { + raw_name: "j_".to_string() + &old_name.raw_name, + short_name: Some( + "j_".to_string() + old_name.short_name.as_ref().unwrap_or(&raw_name), + ), + full_name: Some( + "j_".to_string() + old_name.full_name.as_ref().unwrap_or(&raw_name), + ), + }; + + // I'm so sorry about this + // XXX: Update the parsed public symbol's name to use j_ syntax + if let Some(idx) = self.named_symbols.remove(&old_name.raw_name) { + self.named_symbols.insert(new_name.raw_name.clone(), idx); + } + if let Some(idx) = self.parsed_symbols_by_name.remove(&old_name.raw_name) { + self.parsed_symbols_by_name + .insert(new_name.raw_name.clone(), idx); + match &mut self.parsed_symbols[idx] { + ParsedSymbol::Data(ParsedDataSymbol { + name: parsed_name, .. + }) + | ParsedSymbol::Procedure(ParsedProcedure { + name: parsed_name, .. + }) => { + parsed_name.raw_name = new_name.raw_name.clone(); + parsed_name.short_name = new_name.short_name.clone(); + parsed_name.full_name = new_name.full_name.clone(); + } + _ => {} + } + } + proc.name = new_name.clone(); + thunk_name = Some(new_name); + } + } + } + + let locals = vec![]; + let name = thunk_name.unwrap_or(SymbolNames { + raw_name, + short_name: name.clone(), + full_name: name, + }); + + self.log(|| format!("THUNK: 0x{:x}: {:?} {:?}", address, &name, &fn_type)); + + Ok(Some(ParsedSymbol::Procedure(ParsedProcedure { + is_public: false, + address: address, + name, + type_: fn_type, + locals, + }))) + } + + fn handle_separated_code_symbol( + &mut self, + _index: SymbolIndex, + data: &SeparatedCodeSymbol, + ) -> Result> { + self.log(|| format!("Got SeparatedCode symbol: {:?}", data)); + Ok(None) + } + + fn handle_def_range( + &mut self, + _index: SymbolIndex, + data: &DefRangeSymbol, + ) -> Result> { + self.log(|| format!("Got DefRange symbol: {:?}", data)); + Ok(None) + } + + fn handle_def_range_sub_field( + &mut self, + _index: SymbolIndex, + data: &DefRangeSubFieldSymbol, + ) -> Result> { + self.log(|| format!("Got DefRangeSubField symbol: {:?}", data)); + Ok(None) + } + + fn handle_def_range_register( + &mut self, + _index: SymbolIndex, + data: &DefRangeRegisterSymbol, + ) -> Result> { + self.log(|| format!("Got DefRangeRegister symbol: {:?}", data)); + if let Some(reg) = self.convert_register(data.register) { + Ok(Some(ParsedSymbol::Location(ParsedLocation { + location: Variable { + t: BNVariableSourceType::RegisterVariableSourceType, + index: 0, + storage: reg, + }, + base_relative: false, + stack_relative: false, + }))) + } else { + Ok(None) + } + } + + fn handle_def_range_frame_pointer_relative_symbol( + &mut self, + _index: SymbolIndex, + data: &DefRangeFramePointerRelativeSymbol, + ) -> Result> { + self.log(|| format!("Got DefRangeFramePointerRelative symbol: {:?}", data)); + Ok(None) + } + + fn handle_def_range_frame_pointer_relative_full_scope_symbol( + &mut self, + _index: SymbolIndex, + data: &DefRangeFramePointerRelativeFullScopeSymbol, + ) -> Result> { + self.log(|| { + format!( + "Got DefRangeFramePointerRelativeFullScope symbol: {:?}", + data + ) + }); + Ok(None) + } + + fn handle_def_range_sub_field_register_symbol( + &mut self, + _index: SymbolIndex, + data: &DefRangeSubFieldRegisterSymbol, + ) -> Result> { + self.log(|| format!("Got DefRangeSubFieldRegister symbol: {:?}", data)); + Ok(None) + } + + fn handle_def_range_register_relative_symbol( + &mut self, + _index: SymbolIndex, + data: &DefRangeRegisterRelativeSymbol, + ) -> Result> { + self.log(|| format!("Got DefRangeRegisterRelative symbol: {:?}", data)); + Ok(None) + } + + fn handle_base_pointer_relative_symbol( + &mut self, + _index: SymbolIndex, + data: &BasePointerRelativeSymbol, + ) -> Result> { + self.log(|| format!("Got BasePointerRelative symbol: {:?}", data)); + + // These are usually parameters if offset > 0 + + Ok(Some(ParsedSymbol::LocalVariable(ParsedVariable { + name: data.name.to_string().to_string(), + type_: self.lookup_type_conf(&data.type_index, false)?, + storage: vec![ParsedLocation { + location: Variable { + t: BNVariableSourceType::StackVariableSourceType, + index: 0, + storage: data.offset as i64, + }, + base_relative: true, + stack_relative: false, + }], + is_param: data.offset as i64 > 0 || data.slot.map_or(false, |slot| slot > 0), + }))) + } + + fn handle_frame_procedure_symbol( + &mut self, + index: SymbolIndex, + data: &FrameProcedureSymbol, + ) -> Result> { + self.log(|| format!("Got FrameProcedure symbol: {:?}", data)); + + // This symbol generally comes before a proc and all various parameters + // It has a lot of information we don't care about, and some information we maybe do? + // This function also tries to find all the locals and parameters of the procedure + + let mut params = vec![]; + let mut locals = vec![]; + let mut seen_offsets = HashSet::new(); + + for child in self.symbol_children(index) { + match self.lookup_symbol(&child) { + Some(ParsedSymbol::LocalVariable(ParsedVariable { + name, + type_, + storage, + is_param, + .. + })) => { + let new_storage = storage.iter().map(|&var| var.location).collect::>(); + + // See if the parameter really is a parameter. Sometimes they don't say they are + let mut really_is_param = *is_param; + for loc in &new_storage { + match loc { + Variable { + t: BNVariableSourceType::RegisterVariableSourceType, + .. + } => { + // Assume register vars are always parameters + really_is_param = true; + } + Variable { + t: BNVariableSourceType::StackVariableSourceType, + storage, + .. + } if *storage >= 0 => { + // Sometimes you can get two locals at the same offset, both rbp+(x > 0) + // I'm guessing from looking at dumps from dia2dump that only the first + // one is considered a parameter, although there are times that I see + // two params at the same offset and both are considered parameters... + // This doesn't seem possible (or correct) because they would overlap + // and only one would be useful anyway. + // Regardless of the mess, Binja can only handle one parameter per slot + // so we're just going to use the first one. + really_is_param = seen_offsets.insert(*storage); + } + _ => {} + } + } + + if really_is_param { + params.push(ParsedVariable { + name: name.clone(), + type_: type_.clone(), + storage: new_storage + .into_iter() + .map(|loc| ParsedLocation { + location: loc, + // This has been handled now + base_relative: false, + stack_relative: false, + }) + .collect(), + is_param: really_is_param, + }); + } else { + locals.push(ParsedVariable { + name: name.clone(), + type_: type_.clone(), + storage: new_storage + .into_iter() + .map(|loc| ParsedLocation { + location: loc, + // This has been handled now + base_relative: false, + stack_relative: false, + }) + .collect(), + is_param: really_is_param, + }); + } + } + Some(ParsedSymbol::Data(_)) => { + // Apparently you can have static data symbols as parameters + // Because of course you can + } + None => {} + e => self.log(|| format!("Unexpected symbol type in frame: {:?}", e)), + } + } + + Ok(Some(ParsedSymbol::ProcedureInfo(ParsedProcedureInfo { + params, + locals, + }))) + } + + fn handle_call_site_info( + &mut self, + _index: SymbolIndex, + data: &CallSiteInfoSymbol, + ) -> Result> { + self.log(|| format!("Got CallSiteInfo symbol: {:?}", data)); + Ok(None) + } + + /// Demangle a name and get a type out + /// Also fixes void(void) and __s_RTTI_Nonsense + fn demangle_to_type( + &self, + raw_name: &String, + rva: Rva, + ) -> Result<(Option>>, Option)> { + let (mut t, mut name) = match demangle_ms(&self.arch, raw_name.clone(), true) { + Ok((Some(t), name)) => (Some(Conf::new(t, DEMANGLE_CONFIDENCE)), name), + Ok((_, name)) => (None, name), + _ => (None, vec![raw_name.clone()]), + }; + + if let Some(ty) = t.as_ref() { + if ty.contents.type_class() == TypeClass::FunctionTypeClass { + // demangler makes (void) into (void arg1) which is wrong + let parameters = ty + .contents + .parameters() + .map_err(|_| anyhow!("no parameters"))?; + if let [p] = parameters.as_slice() { + if p.t.contents.type_class() == TypeClass::VoidTypeClass { + t = Some(Conf::new( + Type::function::<_>( + &ty.contents + .return_value() + .map_err(|_| anyhow!("no return value"))?, + &[], + ty.contents.has_variable_arguments().contents, + ), + ty.confidence, + )) + } + } + } + } + + // These have types but they aren't actually set anywhere. So it's the demangler's + // job to take care of them, apparently? + let name_to_type: HashMap> = HashMap::from_iter([ + ( + "`RTTI Complete Object Locator'".to_string(), + vec![ + "_s_RTTICompleteObjectLocator".to_string(), + "_s__RTTICompleteObjectLocator".to_string(), + "_s__RTTICompleteObjectLocator2".to_string(), + ], + ), + ( + "`RTTI Class Hierarchy Descriptor'".to_string(), + vec![ + "_s_RTTIClassHierarchyDescriptor".to_string(), + "_s__RTTIClassHierarchyDescriptor".to_string(), + "_s__RTTIClassHierarchyDescriptor2".to_string(), + ], + ), + ( + // TODO: This type is dynamic + "`RTTI Base Class Array'".to_string(), + vec![ + "_s_RTTIBaseClassArray".to_string(), + "_s__RTTIBaseClassArray".to_string(), + "_s__RTTIBaseClassArray2".to_string(), + ], + ), + ( + "`RTTI Base Class Descriptor at (".to_string(), + vec![ + "_s_RTTIBaseClassDescriptor".to_string(), + "_s__RTTIBaseClassDescriptor".to_string(), + "_s__RTTICBaseClassDescriptor2".to_string(), + ], + ), + ( + "`RTTI Type Descriptor'".to_string(), + vec!["_TypeDescriptor".to_string()], + ), + ]); + + if let Some(last_name) = name.last() { + for (search_name, search_types) in &name_to_type { + if last_name.contains(search_name) { + for search_type in search_types { + if let Some(ty) = self.named_types.get(search_type) { + // Fallback in case we don't find a specific one + t = Some(Conf::new( + Type::named_type_from_type(search_type, ty.as_ref()), + max_confidence(), + )); + + if self.settings.get_bool( + "pdb.features.expandRTTIStructures", + Some(self.bv), + None, + ) { + if let Some((lengthy_type, length)) = + self.make_lengthy_type(ty, self.bv.start() + rva.0 as u64)? + { + // See if we have a type with this length + let lengthy_name = + format!("${}$_extraBytes_{}", search_type, length); + + if let Some(ty) = self.named_types.get(&lengthy_name) { + // Wow! + t = Some(Conf::new( + Type::named_type_from_type(lengthy_name, ty.as_ref()), + max_confidence(), + )); + } else { + t = Some(Conf::new(lengthy_type, max_confidence())); + } + } + } + } + } + } + } + } + + // VTables have types on their data symbols, + if let Some((class_name, last)) = name.join("::").rsplit_once("::") { + if last.contains("`vftable'") { + let mut vt_name = class_name.to_string() + "::" + "VTable"; + if last.contains("{for") { + // DerivedClass::`vftable'{for `BaseClass'} + let mut base_name = last.to_owned(); + base_name.drain(0..("`vftable'{for `".len())); + base_name.drain((base_name.len() - "'}".len())..(base_name.len())); + // Multiply inherited classes have multiple vtable types + // TODO: Do that + vt_name = base_name + "::" + "VTable"; + } + + vt_name = vt_name + .replace("class ", "") + .replace("struct ", "") + .replace("enum ", ""); + + if let Some(ty) = self.named_types.get(&vt_name) { + t = Some(Conf::new( + Type::named_type_from_type(&vt_name, ty.as_ref()), + max_confidence(), + )); + } else { + // Sometimes the demangler has trouble with `class Foo` in templates + vt_name = vt_name + .replace("class ", "") + .replace("struct ", "") + .replace("enum ", ""); + + if let Some(ty) = self.named_types.get(&vt_name) { + t = Some(Conf::new( + Type::named_type_from_type(&vt_name, ty.as_ref()), + max_confidence(), + )); + } else { + t = Some(Conf::new( + Type::named_type_from_type( + &vt_name, + Type::structure(StructureBuilder::new().finalize().as_ref()) + .as_ref(), + ), + DEMANGLE_CONFIDENCE, + )); + } + } + } + } + + if let Some(last_name) = name.last_mut() { + if last_name.starts_with("__imp_") { + last_name.drain(0..("__imp_".len())); + } + } + + let name = if name.len() == 1 && &name[0] == raw_name && raw_name.starts_with('?') { + None + } else if name.len() == 1 && name[0] == "" { + None + } else if name.len() > 0 && name[0].starts_with("\x7f") { + // Not sure why these exist but they do Weird Stuff + name[0].drain(0..1); + Some(QualifiedName::from(name)) + } else { + Some(QualifiedName::from(name)) + }; + + Ok((t, name)) + } + + fn make_lengthy_type( + &self, + base_type: &Ref, + base_address: u64, + ) -> Result, usize)>> { + if base_type.type_class() != TypeClass::StructureTypeClass { + return Ok(None); + } + let structure = base_type + .get_structure() + .map_err(|_| anyhow!("Expected structure"))?; + let mut members = structure + .members() + .map_err(|_| anyhow!("Expected structure to have members"))?; + let last_member = members + .last_mut() + .ok_or_else(|| anyhow!("Not enough members"))?; + + if last_member.ty.contents.type_class() != TypeClass::ArrayTypeClass { + return Ok(None); + } + if last_member.ty.contents.count() != 0 { + return Ok(None); + } + + let member_element = last_member + .ty + .contents + .element_type() + .map_err(|_| anyhow!("Last member has no type"))? + .contents; + let member_width = member_element.width(); + + // Read member_width bytes from bv starting at that member, until we read all zeroes + let member_address = base_address + last_member.offset; + + let mut bytes = Vec::::new(); + bytes.resize(member_width as usize, 0); + + let mut element_count = 0; + while self.bv.read( + bytes.as_mut_slice(), + member_address + member_width * element_count, + ) == member_width as usize + { + if bytes.iter().all(|&b| b == 0) { + break; + } + element_count += 1; + } + + // Make a new copy of the type with the correct element count + last_member.ty.contents = Type::array(member_element.as_ref(), element_count); + + Ok(Some(( + Type::structure(StructureBuilder::from(members).finalize().as_ref()), + element_count as usize, + ))) + } + + /// Sorry about the type names + /// Given a pdb::Register (u32), get a pdb::register::Register (big enum with names) + fn lookup_register(&self, reg: pdb::Register) -> Option { + if let Some(cpu) = self.module_cpu_type { + pdb::register::Register::new(reg, cpu).ok() + } else { + None + } + } + + /// Convert a pdb::Register (u32) to a binja register index for the current arch + fn convert_register(&self, reg: pdb::Register) -> Option { + match self.lookup_register(reg) { + Some(X86(xreg)) => { + self.log(|| format!("Register {:?} ==> {:?}", reg, xreg)); + self.arch + .register_by_name(xreg.to_string().to_lowercase()) + .map(|reg| reg.id() as i64) + } + Some(AMD64(areg)) => { + self.log(|| format!("Register {:?} ==> {:?}", reg, areg)); + self.arch + .register_by_name(areg.to_string().to_lowercase()) + .map(|reg| reg.id() as i64) + } + // TODO: Other arches + _ => None, + } + } +} diff --git a/rust/examples/pdb-ng/src/type_parser.rs b/rust/examples/pdb-ng/src/type_parser.rs new file mode 100644 index 0000000000..dc23f2ff15 --- /dev/null +++ b/rust/examples/pdb-ng/src/type_parser.rs @@ -0,0 +1,2445 @@ +// Copyright 2022-2024 Vector 35 Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use anyhow::{anyhow, Result}; +use binaryninja::architecture::{Architecture, CoreArchitecture}; +use binaryninja::binaryview::BinaryViewExt; +use binaryninja::callingconvention::CallingConvention; +use binaryninja::platform::Platform; +use binaryninja::rc::Ref; +use binaryninja::types::{ + max_confidence, BaseStructure, Conf, EnumerationBuilder, EnumerationMember, FunctionParameter, + MemberAccess, MemberScope, NamedTypeReference, NamedTypeReferenceClass, QualifiedName, + StructureBuilder, StructureMember, StructureType, Type, TypeBuilder, TypeClass, +}; +use log::warn; +use pdb::Error::UnimplementedTypeKind; +use pdb::{ + ArgumentList, ArrayType, BaseClassType, BitfieldType, ClassKind, ClassType, EnumerateType, + EnumerationType, FallibleIterator, FieldAttributes, FieldList, FunctionAttributes, Indirection, + ItemFinder, MemberFunctionType, MemberType, MethodList, MethodType, ModifierType, NestedType, + OverloadedMethodType, PointerMode, PointerType, PrimitiveKind, PrimitiveType, ProcedureType, + Source, StaticMemberType, TypeData, TypeIndex, UnionType, Variant, VirtualBaseClassType, + VirtualFunctionTablePointerType, VirtualFunctionTableType, VirtualTableShapeType, +}; +use regex::Regex; + +use crate::struct_grouper::group_structure; +use crate::PDBParserInstance; + +static BUILTIN_NAMES: &[&'static str] = &[ + "size_t", + "ssize_t", + "ptrdiff_t", + "wchar_t", + "wchar16", + "wchar32", + "bool", +]; +// const VOID_RETURN_CONFIDENCE: u8 = 16; + +/// Function types +#[derive(Debug, Clone)] +pub struct ParsedProcedureType { + /// Interpreted type of the method, with thisptr, __return, etc + pub method_type: Ref, + /// Base method type right outta the pdb with no frills + pub raw_method_type: Ref, +} + +/// Bitfield member type, if we ever get around to implementing these +#[derive(Debug, Clone)] +pub struct ParsedBitfieldType { + /// Size in bits + pub size: u64, + /// Bit offset in the current bitfield set + pub position: u64, + /// Underlying type of the whole bitfield set + pub ty: Ref, +} + +/// Parsed member of a class/structure, basically just binaryninja::StructureMember but with bitfields :( +#[derive(Debug, Clone)] +pub struct ParsedMember { + /// Member type + pub ty: Conf>, + /// Member name + pub name: String, + /// Offset in structure + pub offset: u64, + /// Access flags + pub access: MemberAccess, + /// Scope doesn't really mean anything in binja + pub scope: MemberScope, + /// Bitfield size, if this is in a bitfield. Mainly you should just be checking for Some() + pub bitfield_size: Option, + /// Bit offset, if this is in a bitfield. Mainly you should just be checking for Some() + pub bitfield_position: Option, +} + +/// Parsed named method of a class +#[derive(Debug, Clone)] +pub struct ParsedMethod { + /// Attributes from pdb-rs + pub attributes: FieldAttributes, + /// Name of method + pub name: String, + /// Type of the method + class info + pub method_type: ParsedMemberFunction, + /// Offset in class's virtual table, if virtual + pub vtable_offset: Option, +} + +/// One entry in a list of parsed methods? This is just here so overloaded methods have a struct to use +#[derive(Debug, Clone)] +pub struct ParsedMethodListEntry { + /// Attributes from pdb-rs + pub attributes: FieldAttributes, + /// Type of the method + class info + pub method_type: ParsedMemberFunction, + /// Offset in class's virtual table, if virtual + pub vtable_offset: Option, +} + +/// Parsed member function type info +#[derive(Debug, Clone)] +pub struct ParsedMemberFunction { + /// Attributes from pdb-rs + pub attributes: FunctionAttributes, + /// Parent class's name + pub class_name: String, + /// Interpreted type of the method, with thisptr, __return, etc + pub method_type: Ref, + /// Base method type right outta the pdb with no frills + pub raw_method_type: Ref, + /// Type of thisptr object, if relevant + pub this_pointer_type: Option>, + /// Adjust to thisptr at start, for virtual bases or something + pub this_adjustment: usize, +} + +/// Virtual base class, c++ nightmare fuel +#[derive(Debug, Clone)] +pub struct VirtualBaseClass { + /// Base class name + pub base_name: String, + /// Base class type + pub base_type: Ref, + /// Offset in this class where the base's fields are located + pub base_offset: u64, + /// Type of vbtable, probably + pub base_table_type: Ref, + /// Offset of this base in the vbtable + pub base_table_offset: u64, +} + +/// Mega enum of all the different types of types we can parse +#[derive(Debug, Clone)] +pub enum ParsedType { + /// No info other than type data + Bare(Ref), + /// Named fully parsed class/enum/union/etc type + Named(String, Ref), + /// Function procedure + Procedure(ParsedProcedureType), + /// Bitfield entries + BitfieldType(ParsedBitfieldType), + /// A list of members for a structure / union + FieldList(Vec), + /// One member in a structure/union + Member(ParsedMember), + /// Base class name and offset details + BaseClass(String, StructureMember), + /// One member in an enumeration + Enumerate(EnumerationMember), + /// List of arguments to a function + ArgumentList(Vec), + /// Parsed member function type info + MemberFunction(ParsedMemberFunction), + /// Parsed named method of a class + Method(ParsedMethod), + /// List of all the methods in a class + MethodList(Vec), + /// (Name, Overloads) equivalent to ParsedMethod + OverloadedMethod(String, Vec), + /// Virtual table shape straight outta pdb-rs + VTableShape(Vec), + /// Also virtual table shape, but you want a pointer this time + VTablePointer(Vec), + /// Virtual base class, c++ nightmare fuel + VBaseClass(VirtualBaseClass), +} + +#[allow(non_camel_case_types)] +#[derive(Debug)] +pub enum CV_call_t { + NEAR_C = 1, + FAR_C = 2, + NEAR_PASCAL = 3, + FAR_PASCAL = 4, + NEAR_FAST = 5, + FAR_FAST = 6, + SKIPPED = 7, + NEAR_STD = 8, + FAR_STD = 9, + NEAR_SYS = 10, + FAR_SYS = 11, + THISCALL = 12, + MIPSCALL = 13, + GENERIC = 14, + ALPHACALL = 15, + PPCCALL = 16, + SHCALL = 17, + ARMCALL = 18, + AM33CALL = 19, + TRICALL = 20, + SH5CALL = 21, + M32RCALL = 22, + ALWAYS_INLINED = 23, + NEAR_VECTOR = 24, + RESERVED = 25, +} + +impl TryFrom for CV_call_t { + type Error = anyhow::Error; + + fn try_from(value: u8) -> Result { + match value { + 0 => Err(anyhow!("Empty calling convention")), + 1 => Ok(Self::NEAR_C), + 2 => Ok(Self::FAR_C), + 3 => Ok(Self::NEAR_PASCAL), + 4 => Ok(Self::FAR_PASCAL), + 5 => Ok(Self::NEAR_FAST), + 6 => Ok(Self::FAR_FAST), + 7 => Ok(Self::SKIPPED), + 8 => Ok(Self::NEAR_STD), + 9 => Ok(Self::FAR_STD), + 10 => Ok(Self::NEAR_SYS), + 11 => Ok(Self::FAR_SYS), + 12 => Ok(Self::THISCALL), + 13 => Ok(Self::MIPSCALL), + 14 => Ok(Self::GENERIC), + 15 => Ok(Self::ALPHACALL), + 16 => Ok(Self::PPCCALL), + 17 => Ok(Self::SHCALL), + 18 => Ok(Self::ARMCALL), + 19 => Ok(Self::AM33CALL), + 20 => Ok(Self::TRICALL), + 21 => Ok(Self::SH5CALL), + 22 => Ok(Self::M32RCALL), + 23 => Ok(Self::ALWAYS_INLINED), + 24 => Ok(Self::NEAR_VECTOR), + 25 => Ok(Self::RESERVED), + e => Err(anyhow!("Unknown CV_call_t convention {}", e)), + } + } +} + +/// This is all done in the parser instance namespace because the lifetimes are impossible to +/// wrangle otherwise. +impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> { + /// Parse all the types in a pdb + pub fn parse_types( + &mut self, + progress: Box Result<()> + '_>, + ) -> Result<()> { + // Hack: This is needed for primitive types but it's not defined in the pdb itself + self.named_types + .insert("HRESULT".to_string(), Type::int(4, true)); + + let type_information = self.pdb.type_information()?; + let mut finder = type_information.finder(); + + let mut type_count = 0; + + // Do an initial pass on the types to find the full indexes for named types + // In case something like an array needs to reference them before they're fully defined + let mut prepass_types = type_information.iter(); + while let Some(ty) = prepass_types.next()? { + type_count += 1; + finder.update(&prepass_types); + match ty.parse() { + Ok(TypeData::Class(data)) => { + if !data.properties.forward_reference() { + self.full_type_indices.insert( + data.unique_name + .unwrap_or(data.name) + .to_string() + .to_string(), + ty.index(), + ); + } + } + Ok(TypeData::Enumeration(data)) => { + if !data.properties.forward_reference() { + self.full_type_indices.insert( + data.unique_name + .unwrap_or(data.name) + .to_string() + .to_string(), + ty.index(), + ); + } + } + Ok(TypeData::Union(data)) => { + if !data.properties.forward_reference() { + self.full_type_indices.insert( + data.unique_name + .unwrap_or(data.name) + .to_string() + .to_string(), + ty.index(), + ); + } + } + _ => {} + } + } + + self.log(|| format!("Now parsing named types")); + + // Parse the types we care about, so that recursion gives us parent relationships for free + let mut types = type_information.iter(); + let mut i = 0; + while let Some(ty) = types.next()? { + i += 1; + (progress)(i, type_count * 2)?; + + match ty.parse() { + Ok(TypeData::Class(_)) | Ok(TypeData::Enumeration(_)) | Ok(TypeData::Union(_)) => { + self.handle_type_index(ty.index(), &mut finder)?; + } + _ => {} + } + + assert!(self.namespace_stack.is_empty()); + assert!(self.type_stack.is_empty()); + } + + self.log(|| format!("Now parsing unused floating types")); + + // Parse the rest because symbols often use them + let mut postpass_types = type_information.iter(); + while let Some(ty) = postpass_types.next()? { + i += 1; + (progress)(i, type_count * 2)?; + + self.handle_type_index(ty.index(), &mut finder)?; + } + + self.log(|| format!("Now adding all unreferenced named types")); + // Any referenced named types that are only forward-declared will cause missing type references, + // so create empty types for those here. + for (_, parsed) in &self.indexed_types { + match parsed { + ParsedType::Bare(ty) if ty.type_class() == TypeClass::NamedTypeReferenceClass => { + // See if we have this type + let name = ty + .get_named_type_reference() + .map_err(|_| anyhow!("expected ntr"))? + .name() + .to_string(); + if Self::is_name_anonymous(&name) { + continue; + } + if self.named_types.contains_key(&name) { + continue; + } + // If the bv has this type, DebugInfo will just update us to reference it + if let Some(_) = self.bv.get_type_by_name(&name) { + continue; + } + + self.log(|| format!("Got undefined but referenced named type: {}", &name)); + let type_class = ty + .get_named_type_reference() + .map_err(|_| anyhow!("expected ntr"))? + .class(); + + let bare_type = match type_class { + NamedTypeReferenceClass::ClassNamedTypeClass => Type::structure( + StructureBuilder::new() + .set_structure_type(StructureType::ClassStructureType) + .finalize() + .as_ref(), + ), + // Missing typedefs are just going to become structures + NamedTypeReferenceClass::UnknownNamedTypeClass + | NamedTypeReferenceClass::TypedefNamedTypeClass + | NamedTypeReferenceClass::StructNamedTypeClass => { + Type::structure(StructureBuilder::new().finalize().as_ref()) + } + NamedTypeReferenceClass::UnionNamedTypeClass => Type::structure( + StructureBuilder::new() + .set_structure_type(StructureType::UnionStructureType) + .finalize() + .as_ref(), + ), + NamedTypeReferenceClass::EnumNamedTypeClass => Type::enumeration( + EnumerationBuilder::new().finalize().as_ref(), + self.arch.default_integer_size(), + false, + ), + }; + + self.log(|| format!("Bare type created: {} {}", &name, &bare_type)); + self.named_types.insert(name, bare_type); + } + _ => {} + } + } + + // Cleanup a couple builtin names + for &name in BUILTIN_NAMES { + if self.named_types.contains_key(name) { + self.named_types.remove(name); + self.log(|| format!("Remove builtin type {}", name)); + } + } + let mut remove_names = vec![]; + for (name, _) in &self.named_types { + if Regex::new(r"u?int\d+_t")?.is_match(name) { + remove_names.push(name.clone()); + } + if Regex::new(r"float\d+")?.is_match(name) { + remove_names.push(name.clone()); + } + } + for name in remove_names { + self.named_types.remove(&name); + self.log(|| format!("Remove builtin type {}", &name)); + } + + Ok(()) + } + + /// Lookup a type in the parsed types by its index (ie for a procedure) + pub(crate) fn lookup_type( + &self, + index: &TypeIndex, + fancy_procs: bool, + ) -> Result>> { + match self.indexed_types.get(index) { + Some(ParsedType::Bare(ty)) => Ok(Some(ty.clone())), + Some(ParsedType::Named(name, ty)) => Ok(Some(Type::named_type_from_type(name, &ty))), + Some(ParsedType::Procedure(ParsedProcedureType { + method_type, + raw_method_type, + })) => { + if fancy_procs { + Ok(Some(method_type.clone())) + } else { + Ok(Some(raw_method_type.clone())) + } + } + Some(ParsedType::MemberFunction(ParsedMemberFunction { + method_type, + raw_method_type, + .. + })) => { + if fancy_procs { + Ok(Some(method_type.clone())) + } else { + Ok(Some(raw_method_type.clone())) + } + } + Some(ParsedType::Member(ParsedMember { ty, .. })) => Ok(Some(ty.contents.clone())), + _ => Ok(None), + } + } + + /// Lookup a type in the parsed types and get a confidence value for it too + pub(crate) fn lookup_type_conf( + &self, + index: &TypeIndex, + fancy_procs: bool, + ) -> Result>>> { + match self.lookup_type(index, fancy_procs)? { + Some(ty) if ty.type_class() == TypeClass::VoidTypeClass => Ok(Some(Conf::new(ty, 0))), + Some(ty) => { + let mut confidence = max_confidence(); + + // Extra check here for void(void) functions, they should get minimum confidence since this + // is the signature PDB uses when it doesn't actually know the signature + if ty.type_class() == TypeClass::FunctionTypeClass { + if let Ok(ret) = ty.return_value() { + if ret.contents.type_class() == TypeClass::VoidTypeClass { + if let Ok(params) = ty.parameters() { + if params.len() == 0 { + confidence = 0; + } + } + } + } + } + + Ok(Some(Conf::new(ty, confidence))) + } + None => Ok(None), + } + } + + /// Parse and return a type by its index, used as lookup-or-parse + fn handle_type_index( + &mut self, + ty: TypeIndex, + finder: &mut ItemFinder, + ) -> Result> { + if let None = self.indexed_types.get(&ty) { + self.log(|| format!("Parsing Type {:x?} ", ty)); + + match finder.find(ty).and_then(|item| item.parse()) { + Ok(data) => { + self.type_stack.push(ty); + let handled = self.handle_type(&data, finder); + self.type_stack.pop(); + + match handled { + Ok(Some(parsed)) => { + self.log(|| format!("Type {} parsed into: {:?}", ty, parsed)); + match &*parsed { + ParsedType::Named(name, parsed) => { + // PDB does this thing where anonymous inner types are represented as + // some_type:: + if !Self::is_name_anonymous(name) { + if let Some(_old) = + self.named_types.insert(name.clone(), parsed.clone()) + { + warn!("Found two types both named `{}`, only one will be used.", name); + } + } + } + _ => {} + } + self.indexed_types.insert(ty, *parsed); + } + e => { + self.log(|| format!("Error parsing type {}: {:x?}", ty, e)); + } + } + } + Err(UnimplementedTypeKind(k)) if k != 0 => { + warn!("Not parsing unimplemented type {}: kind {:x?}", ty, k); + } + Err(e) => { + self.log(|| format!("Could not parse type: {}: {}", ty, e)); + } + }; + } + + Ok(self.indexed_types.get(&ty)) + } + + /// Parse a new type's data + fn handle_type( + &mut self, + data: &TypeData, + finder: &mut ItemFinder, + ) -> Result>> { + match data { + TypeData::Primitive(data) => Ok(self.handle_primitive_type(&data, finder)?), + TypeData::Class(data) => Ok(self.handle_class_type(&data, finder)?), + TypeData::Member(data) => Ok(self.handle_member_type(&data, finder)?), + TypeData::MemberFunction(data) => Ok(self.handle_member_function_type(&data, finder)?), + TypeData::OverloadedMethod(data) => { + Ok(self.handle_overloaded_method_type(&data, finder)?) + } + TypeData::Method(data) => Ok(self.handle_method_type(&data, finder)?), + TypeData::StaticMember(data) => Ok(self.handle_static_member_type(&data, finder)?), + TypeData::Nested(data) => Ok(self.handle_nested_type(&data, finder)?), + TypeData::BaseClass(data) => Ok(self.handle_base_class_type(&data, finder)?), + TypeData::VirtualBaseClass(data) => { + Ok(self.handle_virtual_base_class_type(&data, finder)?) + } + TypeData::VirtualFunctionTable(data) => { + Ok(self.handle_virtual_function_table_type(&data, finder)?) + } + TypeData::VirtualTableShape(data) => { + Ok(self.handle_virtual_table_shape_type(&data, finder)?) + } + TypeData::VirtualFunctionTablePointer(data) => { + Ok(self.handle_virtual_function_table_pointer_type(&data, finder)?) + } + TypeData::Procedure(data) => Ok(self.handle_procedure_type(&data, finder)?), + TypeData::Pointer(data) => Ok(self.handle_pointer_type(&data, finder)?), + TypeData::Modifier(data) => Ok(self.handle_modifier_type(&data, finder)?), + TypeData::Enumeration(data) => Ok(self.handle_enumeration_type(&data, finder)?), + TypeData::Enumerate(data) => Ok(self.handle_enumerate_type(&data, finder)?), + TypeData::Array(data) => Ok(self.handle_array_type(&data, finder)?), + TypeData::Union(data) => Ok(self.handle_union_type(&data, finder)?), + TypeData::Bitfield(data) => Ok(self.handle_bitfield_type(&data, finder)?), + TypeData::FieldList(data) => Ok(self.handle_field_list_type(&data, finder)?), + TypeData::ArgumentList(data) => Ok(self.handle_argument_list_type(&data, finder)?), + TypeData::MethodList(data) => Ok(self.handle_method_list_type(&data, finder)?), + _ => Err(anyhow!("Unknown typedata")), + } + } + + /// Get the raw (mangled) name out of a type, if possible + fn type_data_to_raw_name(data: &TypeData) -> Option { + match data { + TypeData::Class(data) => Some( + data.unique_name + .unwrap_or(data.name) + .to_string() + .to_string(), + ), + TypeData::Enumeration(data) => Some( + data.unique_name + .unwrap_or(data.name) + .to_string() + .to_string(), + ), + TypeData::Union(data) => Some( + data.unique_name + .unwrap_or(data.name) + .to_string() + .to_string(), + ), + _ => None, + } + } + + fn handle_primitive_type( + &mut self, + data: &PrimitiveType, + _finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Primitive type: {:x?}", data)); + let base = match data.kind { + PrimitiveKind::NoType => Ok(Type::void()), + PrimitiveKind::Void => Ok(Type::void()), + PrimitiveKind::Char => Ok(Type::int(1, true)), + PrimitiveKind::UChar => Ok(Type::int(1, false)), + PrimitiveKind::RChar => Ok(Type::int(1, true)), + PrimitiveKind::WChar => Ok(Type::wide_char(2)), + PrimitiveKind::RChar16 => Ok(Type::wide_char(2)), + PrimitiveKind::RChar32 => Ok(Type::wide_char(4)), + PrimitiveKind::I8 => Ok(Type::int(1, true)), + PrimitiveKind::U8 => Ok(Type::int(1, false)), + PrimitiveKind::Short => Ok(Type::int(2, true)), + PrimitiveKind::UShort => Ok(Type::int(2, false)), + PrimitiveKind::I16 => Ok(Type::int(2, true)), + PrimitiveKind::U16 => Ok(Type::int(2, false)), + PrimitiveKind::Long => Ok(Type::int(4, true)), + PrimitiveKind::ULong => Ok(Type::int(4, false)), + PrimitiveKind::I32 => Ok(Type::int(4, true)), + PrimitiveKind::U32 => Ok(Type::int(4, false)), + PrimitiveKind::Quad => Ok(Type::int(8, true)), + PrimitiveKind::UQuad => Ok(Type::int(8, false)), + PrimitiveKind::I64 => Ok(Type::int(8, true)), + PrimitiveKind::U64 => Ok(Type::int(8, false)), + PrimitiveKind::Octa => Ok(Type::int(16, true)), + PrimitiveKind::UOcta => Ok(Type::int(16, false)), + PrimitiveKind::I128 => Ok(Type::int(16, true)), + PrimitiveKind::U128 => Ok(Type::int(16, false)), + PrimitiveKind::F16 => Ok(Type::float(2)), + PrimitiveKind::F32 => Ok(Type::float(4)), + PrimitiveKind::F32PP => Ok(Type::float(4)), + PrimitiveKind::F48 => Ok(Type::float(6)), + PrimitiveKind::F64 => Ok(Type::float(8)), + PrimitiveKind::F80 => Ok(Type::float(10)), + PrimitiveKind::F128 => Ok(Type::float(16)), + PrimitiveKind::Complex32 => Err(anyhow!("Complex32 unimplmented")), + PrimitiveKind::Complex64 => Err(anyhow!("Complex64 unimplmented")), + PrimitiveKind::Complex80 => Err(anyhow!("Complex80 unimplmented")), + PrimitiveKind::Complex128 => Err(anyhow!("Complex128 unimplmented")), + PrimitiveKind::Bool8 => Ok(Type::int(1, false)), + PrimitiveKind::Bool16 => Ok(Type::int(2, false)), + PrimitiveKind::Bool32 => Ok(Type::int(4, false)), + PrimitiveKind::Bool64 => Ok(Type::int(8, false)), + // Hack: this isn't always defined + PrimitiveKind::HRESULT => Ok(Type::named_type_from_type( + "HRESULT", + Type::int(4, true).as_ref(), + )), + _ => Err(anyhow!("Unknown type unimplmented")), + }?; + + // TODO: Pointer suffix is not exposed + match data.indirection { + Some(Indirection::Near16) => Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))), + Some(Indirection::Far16) => Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))), + Some(Indirection::Huge16) => Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))), + Some(Indirection::Near32) => Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))), + Some(Indirection::Far32) => Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))), + Some(Indirection::Near64) => Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))), + Some(Indirection::Near128) => Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))), + None => Ok(Some(Box::new(ParsedType::Bare(base)))), + } + } + + fn handle_class_type( + &mut self, + data: &ClassType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Class type: {:x?}", data)); + + let raw_class_name = &data.name.to_string(); + let class_name = raw_class_name.to_string(); + + self.log(|| format!("Named: {}", class_name)); + + if data.properties.forward_reference() { + // Try and find it first + if let Some(existing) = self.named_types.get(&class_name) { + return Ok(Some(Box::new(ParsedType::Bare( + Type::named_type_from_type(&class_name, existing), + )))); + } + + let ntr_class = match data.kind { + ClassKind::Class => NamedTypeReferenceClass::ClassNamedTypeClass, + ClassKind::Struct => NamedTypeReferenceClass::StructNamedTypeClass, + ClassKind::Interface => NamedTypeReferenceClass::StructNamedTypeClass, + }; + return Ok(Some(Box::new(ParsedType::Bare(Type::named_type( + &*NamedTypeReference::new(ntr_class, QualifiedName::from(class_name)), + ))))); + } + + let struct_kind = match &data.kind { + ClassKind::Class => StructureType::ClassStructureType, + ClassKind::Struct => StructureType::StructStructureType, + ClassKind::Interface => StructureType::StructStructureType, + }; + + let mut structure = StructureBuilder::new(); + structure.set_structure_type(struct_kind); + structure.set_width(data.size); + structure.set_packed(data.properties.packed()); + + if let Some(fields) = data.fields { + self.namespace_stack.push(class_name.to_string()); + let success = self.parse_structure_fields(&mut structure, fields, finder); + self.namespace_stack.pop(); + let _ = success?; + } + + let new_type = Type::structure(structure.finalize().as_ref()); + Ok(Some(Box::new(ParsedType::Named(class_name, new_type)))) + } + + /// Handle all the structure field parsing for a given field list, putting the fields into a struct + fn parse_structure_fields( + &mut self, + structure: &mut StructureBuilder, + fields: TypeIndex, + finder: &mut ItemFinder, + ) -> Result<()> { + let mut base_classes = vec![]; + let mut virt_methods = HashMap::new(); + let mut non_virt_methods = Vec::new(); + + let mut members = vec![]; + + match self.handle_type_index(fields, finder)? { + Some(ParsedType::FieldList(fields)) => { + for field in fields { + match field { + ParsedType::Member(member) => { + members.push(member.clone()); + } + b @ ParsedType::BaseClass(..) => { + base_classes.push(b.clone()); + } + b @ ParsedType::VBaseClass(..) => { + base_classes.push(b.clone()); + } + ParsedType::Named(..) => {} + ParsedType::VTablePointer(_vt) => {} + ParsedType::Method(method) => { + if let Some(offset) = method.vtable_offset { + virt_methods.insert( + offset, + (method.name.clone(), method.method_type.clone()), + ); + } else { + non_virt_methods + .push((method.name.clone(), method.method_type.clone())); + } + } + ParsedType::OverloadedMethod(name, methods) => { + for method in methods { + if let Some(offset) = method.vtable_offset { + virt_methods + .insert(offset, (name.clone(), method.method_type.clone())); + } + } + } + f => { + return Err(anyhow!("Unexpected field type {:?}", f)); + } + } + } + } + Some(_) => { + return Err(anyhow!( + "Structure fields list did not parse into member list?" + )); + } + // No fields? + None => {} + } + + // Combine bitfields into structures + let mut combined_bitfield_members = vec![]; + let mut last_bitfield_offset = u64::MAX; + let mut last_bitfield_pos = u64::MAX; + let mut last_bitfield_idx = 0; + let mut bitfield_builder: Option = None; + + fn bitfield_name(offset: u64, idx: u64) -> String { + if idx > 0 { + format!("__bitfield{:x}_{}", offset, idx) + } else { + format!("__bitfield{:x}", offset) + } + } + + for m in members { + match (m.bitfield_position, m.bitfield_size) { + (Some(pos), Some(_size)) => { + if last_bitfield_offset != m.offset || last_bitfield_pos >= pos { + if let Some(builder) = bitfield_builder.take() { + combined_bitfield_members.push(ParsedMember { + ty: Conf::new( + Type::structure(builder.finalize().as_ref()), + max_confidence(), + ), + name: bitfield_name( + last_bitfield_offset, + last_bitfield_idx, + ), + offset: last_bitfield_offset, + access: MemberAccess::PublicAccess, + scope: MemberScope::NoScope, + bitfield_size: None, + bitfield_position: None, + }); + } + let new_builder = StructureBuilder::new(); + new_builder.set_structure_type(StructureType::UnionStructureType); + new_builder.set_width(m.ty.contents.width()); + bitfield_builder = Some(new_builder); + + if last_bitfield_offset != m.offset { + last_bitfield_idx = 0; + } else { + last_bitfield_idx += 1; + } + } + + last_bitfield_pos = pos; + last_bitfield_offset = m.offset; + bitfield_builder + .as_mut() + .expect("Invariant") + .insert(&m.ty, m.name, 0, false, m.access, m.scope); + } + (None, None) => { + if let Some(builder) = bitfield_builder.take() { + combined_bitfield_members.push(ParsedMember { + ty: Conf::new( + Type::structure(builder.finalize().as_ref()), + max_confidence(), + ), + name: bitfield_name( + last_bitfield_offset, + last_bitfield_idx, + ), + offset: last_bitfield_offset, + access: MemberAccess::PublicAccess, + scope: MemberScope::NoScope, + bitfield_size: None, + bitfield_position: None, + }); + } + last_bitfield_offset = u64::MAX; + last_bitfield_pos = u64::MAX; + combined_bitfield_members.push(m); + } + e => return Err(anyhow!("Unexpected bitfield parameters {:?}", e)), + } + } + if let Some(builder) = bitfield_builder.take() { + combined_bitfield_members.push(ParsedMember { + ty: Conf::new( + Type::structure(builder.finalize().as_ref()), + max_confidence(), + ), + name: bitfield_name(last_bitfield_offset, last_bitfield_idx), + offset: last_bitfield_offset, + access: MemberAccess::PublicAccess, + scope: MemberScope::NoScope, + bitfield_size: None, + bitfield_position: None, + }); + } + members = combined_bitfield_members; + group_structure( + &format!( + "`{}`", + self.namespace_stack + .last() + .ok_or_else(|| anyhow!("Expected class in ns stack"))? + ), + &members, + structure, + )?; + + let mut bases = vec![]; + + for base_class in &base_classes { + match base_class { + ParsedType::BaseClass(name, base) => { + let ntr_class = match self.named_types.get(name) { + Some(ty) if ty.type_class() == TypeClass::StructureTypeClass => { + match ty.get_structure() { + Ok(str) + if str.structure_type() + == StructureType::StructStructureType => + { + NamedTypeReferenceClass::StructNamedTypeClass + } + Ok(str) + if str.structure_type() + == StructureType::ClassStructureType => + { + NamedTypeReferenceClass::ClassNamedTypeClass + } + _ => NamedTypeReferenceClass::StructNamedTypeClass, + } + } + _ => NamedTypeReferenceClass::StructNamedTypeClass, + }; + bases.push(BaseStructure::new( + NamedTypeReference::new(ntr_class, name.into()), + base.offset, + base.ty.contents.width(), + )); + } + ParsedType::VBaseClass(VirtualBaseClass { + base_name, + base_type, + base_offset, + .. + }) => { + let ntr_class = match self.named_types.get(base_name) { + Some(ty) if ty.type_class() == TypeClass::StructureTypeClass => { + match ty.get_structure() { + Ok(str) + if str.structure_type() + == StructureType::StructStructureType => + { + NamedTypeReferenceClass::StructNamedTypeClass + } + Ok(str) + if str.structure_type() + == StructureType::ClassStructureType => + { + NamedTypeReferenceClass::ClassNamedTypeClass + } + _ => NamedTypeReferenceClass::StructNamedTypeClass, + } + } + _ => NamedTypeReferenceClass::StructNamedTypeClass, + }; + bases.push(BaseStructure::new( + NamedTypeReference::new(ntr_class, base_name.into()), + *base_offset, + base_type.width(), + )); + warn!( + "Class `{}` uses virtual inheritance. Type information may be inaccurate.", + self.namespace_stack + .last() + .ok_or_else(|| anyhow!("Expected class in ns stack"))? + ); + } + e => return Err(anyhow!("Unexpected base class type: {:x?}", e)), + } + } + + if bases.len() > 1 { + warn!( + "Class `{}` has multiple base classes. Type information may be inaccurate.", + self.namespace_stack + .last() + .ok_or_else(|| anyhow!("Expected class in ns stack"))? + ); + } + structure.set_base_structures(bases); + + if self + .settings + .get_bool("pdb.features.generateVTables", Some(self.bv), None) + && !virt_methods.is_empty() + { + let vt = StructureBuilder::new(); + + let mut vt_bases = vec![]; + + for base_class in &base_classes { + match base_class { + ParsedType::BaseClass(base_name, _base_type) => { + let mut vt_base_name = base_name + .split("::") + .into_iter() + .map(|s| s.to_string()) + .collect::>(); + vt_base_name.push("VTable".to_string()); + let vt_base_name = vt_base_name.join("::"); + + match self.named_types.get(&vt_base_name) { + Some(vt_base_type) + if vt_base_type.type_class() == TypeClass::StructureTypeClass => + { + let ntr_class = + if vt_base_type.type_class() == TypeClass::StructureTypeClass { + match vt_base_type.get_structure() { + Ok(str) + if str.structure_type() + == StructureType::StructStructureType => + { + NamedTypeReferenceClass::StructNamedTypeClass + } + Ok(str) + if str.structure_type() + == StructureType::ClassStructureType => + { + NamedTypeReferenceClass::ClassNamedTypeClass + } + _ => NamedTypeReferenceClass::StructNamedTypeClass, + } + } else { + NamedTypeReferenceClass::StructNamedTypeClass + }; + vt_bases.push(BaseStructure::new( + NamedTypeReference::new(ntr_class, vt_base_name.into()), + 0, + vt_base_type.width(), + )); + } + e @ Some(_) => { + return Err(anyhow!("Unexpected vtable base class: {:?}", e)) + } + None => { + // Parent might just not have a vtable + } + } + } + ParsedType::VBaseClass(_vbase) => {} + e => return Err(anyhow!("Unexpected base class type: {:x?}", e)), + } + } + + let mut min_width = 0; + for base in &vt_bases { + min_width = min_width.max(base.width); + } + + vt.set_base_structures(vt_bases); + vt.set_propagates_data_var_refs(true); + + for (offset, (name, method)) in virt_methods { + vt.insert( + &Conf::new( + Type::pointer(&self.arch, &Conf::new(method.method_type, max_confidence())), + max_confidence(), + ), + &name, + offset as u64, + true, + MemberAccess::PublicAccess, + MemberScope::NoScope, + ); + min_width = min_width.max((offset + self.arch.address_size()) as u64); + } + + vt.set_width(min_width); + + let vt_type = Type::structure(vt.finalize().as_ref()); + // Need to insert a new named type for the vtable + let mut vt_name = self + .namespace_stack + .last() + .ok_or_else(|| anyhow!("Expected class in ns stack"))? + .clone(); + vt_name += "::VTable"; + self.named_types.insert(vt_name.clone(), vt_type.clone()); + + let vt_pointer = Type::pointer( + &self.arch, + &Conf::new( + Type::named_type_from_type(&QualifiedName::from(vt_name), vt_type.as_ref()), + max_confidence(), + ), + ); + + structure.insert( + &Conf::new(vt_pointer, max_confidence()), + "vtable", + 0, + true, + MemberAccess::PublicAccess, + MemberScope::NoScope, + ); + } + + Ok(()) + } + + fn handle_member_type( + &mut self, + data: &MemberType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Member type: {:x?}", data)); + + let member_name = data.name.to_string(); + let member_offset = data.offset; + let member_attrs = data.attributes; + + let access = match member_attrs.access() { + 1 /* CV_private */ => MemberAccess::PrivateAccess, + 2 /* CV_protected */ => MemberAccess::ProtectedAccess, + 3 /* CV_public */ => MemberAccess::PublicAccess, + _ => return Err(anyhow!("Unknown access")) + }; + + let scope = MemberScope::NoScope; + + match self.try_type_index_to_bare(data.field_type, finder, true)? { + Some(ty) => Ok(Some(Box::new(ParsedType::Member(ParsedMember { + ty: Conf::new(ty, max_confidence()), + name: member_name.into_owned(), + offset: member_offset, + access, + scope, + bitfield_position: None, + bitfield_size: None, + })))), + None => match self.handle_type_index(data.field_type, finder)? { + Some(ParsedType::BitfieldType(bitfield)) => { + Ok(Some(Box::new(ParsedType::Member(ParsedMember { + ty: Conf::new(bitfield.ty.clone(), max_confidence()), + name: member_name.into_owned(), + offset: member_offset, + access, + scope, + bitfield_position: Some(bitfield.position), + bitfield_size: Some(bitfield.size), + })))) + } + e => Err(anyhow!("Unexpected member type: {:x?}", e)), + }, + } + } + + fn handle_member_function_type( + &mut self, + data: &MemberFunctionType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got MemberFunction type: {:x?}", data)); + let return_type = self.type_index_to_bare(data.return_type, finder, false)?; + + let class_name = match self.handle_type_index(data.class_type, finder)? { + Some(ParsedType::Bare(ty)) if ty.type_class() == TypeClass::NamedTypeReferenceClass => { + ty.get_named_type_reference() + .map_err(|_| anyhow!("Expected NTR to have NTR"))? + .name() + .to_string() + } + e => return Err(anyhow!("Unexpected class type: {:x?}", e)), + }; + + let this_pointer_type = if let Some(this_pointer_type) = data.this_pointer_type { + match self.handle_type_index(this_pointer_type, finder)? { + Some(ParsedType::Bare(ty)) => Some(ty.clone()), + e => return Err(anyhow!("Unexpected this pointer type: {:x?}", e)), + } + } else { + None + }; + + let mut arguments = match self.handle_type_index(data.argument_list, finder)? { + Some(ParsedType::ArgumentList(args)) => args.clone(), + e => return Err(anyhow!("Unexpected argument list type: {:x?}", e)), + }; + + // It looks like pdb stores varargs by having the final argument be void + let mut is_varargs = false; + if let Some(last) = arguments.pop() { + if last.t.contents.as_ref().type_class() == TypeClass::VoidTypeClass { + is_varargs = true; + } else { + arguments.push(last); + } + } + + let mut fancy_return_type = return_type.clone(); + let mut fancy_arguments = arguments.clone(); + + if data.attributes.cxx_return_udt() + || !self.can_fit_in_register(data.return_type, finder, true) + { + // Return UDT?? + // This probably means the return value got pushed to the stack + fancy_return_type = Type::pointer( + &self.arch, + &Conf::new(return_type.clone(), max_confidence()), + ); + fancy_arguments.insert( + 0, + FunctionParameter::new( + Conf::new(fancy_return_type.clone(), max_confidence()), + "__return".to_string(), + None, + ), + ); + } + + if let Some(this_ptr) = &this_pointer_type { + self.insert_this_pointer(&mut fancy_arguments, this_ptr.clone())?; + } + + let convention = self + .cv_call_t_to_calling_convention(data.attributes.calling_convention()) + .map(|cc| Conf::new(cc, max_confidence())) + .unwrap_or({ + if is_varargs { + Conf::new(self.cdecl_cc.clone(), max_confidence()) + } else if this_pointer_type.is_some() { + Conf::new(self.thiscall_cc.clone(), max_confidence()) + } else { + Conf::new(self.default_cc.clone(), 16) + } + }); + + let func = Type::function_with_options( + &Conf::new(return_type, max_confidence()), + arguments.as_slice(), + is_varargs, + &convention, + Conf::new(0, 0), + ); + + let fancy_func = Type::function_with_options( + &Conf::new(fancy_return_type, max_confidence()), + fancy_arguments.as_slice(), + is_varargs, + &convention, + Conf::new(0, 0), + ); + + Ok(Some(Box::new(ParsedType::MemberFunction( + ParsedMemberFunction { + attributes: data.attributes, + class_name, + method_type: fancy_func, + raw_method_type: func, + this_pointer_type, + this_adjustment: data.this_adjustment as usize, + }, + )))) + } + + fn handle_overloaded_method_type( + &mut self, + data: &OverloadedMethodType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got OverloadedMethod type: {:x?}", data)); + // This is just a MethodList in disguise + let method_list = match self.handle_type_index(data.method_list, finder)? { + Some(ParsedType::MethodList(list)) => list.clone(), + e => return Err(anyhow!("Unexpected method list type: {:x?}", e)), + }; + + Ok(Some(Box::new(ParsedType::OverloadedMethod( + data.name.to_string().to_string(), + method_list, + )))) + } + + fn handle_method_type( + &mut self, + data: &MethodType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Method type: {:x?}", data)); + + let member_function = match self.handle_type_index(data.method_type, finder)? { + Some(ParsedType::MemberFunction(func)) => func.clone(), + e => return Err(anyhow!("Unexpected method type {:?}", e)), + }; + + Ok(Some(Box::new(ParsedType::Method(ParsedMethod { + attributes: data.attributes, + name: data.name.to_string().to_string(), + method_type: member_function, + vtable_offset: data.vtable_offset.map(|o| o as usize), + })))) + } + + fn handle_static_member_type( + &mut self, + data: &StaticMemberType, + _finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got StaticMember type: {:x?}", data)); + // TODO: Not handling these + Ok(None) + } + + fn handle_nested_type( + &mut self, + data: &NestedType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Nested type: {:x?}", data)); + let mut class_name_ns = self.namespace_stack.clone(); + class_name_ns.push(data.name.to_string().to_string()); + let ty = self.type_index_to_bare(data.nested_type, finder, false)?; + Ok(Some(Box::new(ParsedType::Named( + class_name_ns.join("::"), + ty, + )))) + } + + fn handle_base_class_type( + &mut self, + data: &BaseClassType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got BaseClass type: {:x?}", data)); + + let base_offset = data.offset; + let base_attrs = data.attributes; + + let (member_name, t) = match self.handle_type_index(data.base_class, finder)? { + Some(ParsedType::Named(n, t)) => (n.clone(), t.clone()), + Some(ParsedType::Bare(t)) if t.type_class() == TypeClass::NamedTypeReferenceClass => { + let name = t + .get_named_type_reference() + .map_err(|_| anyhow!("Expected NTR to have NTR"))? + .name() + .to_string(); + (name, t.clone()) + } + e => return Err(anyhow!("Unexpected base class type: {:x?}", e)), + }; + + // Try to resolve the full base type + let resolved_type = match self.try_type_index_to_bare(data.base_class, finder, true)? { + Some(ty) => Type::named_type_from_type(&member_name, ty.as_ref()), + None => t.clone(), + }; + + let access = match base_attrs.access() { + 1 /* CV_private */ => MemberAccess::PrivateAccess, + 2 /* CV_protected */ => MemberAccess::ProtectedAccess, + 3 /* CV_public */ => MemberAccess::PublicAccess, + _ => return Err(anyhow!("Unknown access")) + }; + + let scope = MemberScope::NoScope; + Ok(Some(Box::new(ParsedType::BaseClass( + member_name.clone(), + StructureMember::new( + Conf::new(resolved_type, max_confidence()), + member_name, + base_offset as u64, + access, + scope, + ), + )))) + } + + fn handle_virtual_base_class_type( + &mut self, + data: &VirtualBaseClassType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got VirtualBaseClass type: {:x?}", data)); + + let (n, ty) = match self.handle_type_index(data.base_class, finder)? { + Some(ParsedType::Named(n, t)) => (n.clone(), t.clone()), + Some(ParsedType::Bare(t)) if t.type_class() == TypeClass::NamedTypeReferenceClass => { + let name = t + .get_named_type_reference() + .map_err(|_| anyhow!("Expected NTR to have NTR"))? + .name() + .to_string(); + (name, t.clone()) + } + e => return Err(anyhow!("Unexpected base class type: {:x?}", e)), + }; + + // In addition to the base class, we also have a vbtable + let vbptr_type = match self.handle_type_index(data.base_pointer, finder)? { + Some(ParsedType::Bare(t)) => t.clone(), + e => return Err(anyhow!("Unexpected virtual base pointer type: {:x?}", e)), + }; + + Ok(Some(Box::new(ParsedType::VBaseClass(VirtualBaseClass { + base_name: n, + base_type: ty, + base_offset: data.base_pointer_offset as u64, + base_table_type: vbptr_type, + base_table_offset: data.virtual_base_offset as u64, + })))) + } + + fn handle_virtual_function_table_type( + &mut self, + data: &VirtualFunctionTableType, + _finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got VirtualFunctionTableType type: {:x?}", data)); + Err(anyhow!("VirtualFunctionTableType unimplemented")) + } + + fn handle_virtual_table_shape_type( + &mut self, + data: &VirtualTableShapeType, + _finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got VirtualTableShapeType type: {:x?}", data)); + Ok(Some(Box::new(ParsedType::VTableShape( + data.descriptors.clone(), + )))) + } + + fn handle_virtual_function_table_pointer_type( + &mut self, + data: &VirtualFunctionTablePointerType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got VirtualFunctionTablePointer type: {:x?}", data)); + let shape = match self.handle_type_index(data.table, finder)? { + Some(ParsedType::VTablePointer(shape)) => shape.clone(), + e => { + return Err(anyhow!( + "Could not parse virtual function table pointer type: {:x?}", + e + )) + } + }; + + Ok(Some(Box::new(ParsedType::VTablePointer(shape)))) + } + + fn handle_procedure_type( + &mut self, + data: &ProcedureType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Procedure type: {:x?}", data)); + let return_type = if let Some(return_type_index) = data.return_type { + self.try_type_index_to_bare(return_type_index, finder, false)? + } else { + None + } + .map(|r| Conf::new(r, max_confidence())) + .unwrap_or(Conf::new(Type::void(), 0)); + + let mut arguments = match self.handle_type_index(data.argument_list, finder)? { + Some(ParsedType::ArgumentList(args)) => args.clone(), + e => return Err(anyhow!("Unexpected argument list type: {:x?}", e)), + }; + + // It looks like pdb stores varargs by having the final argument be void + let mut is_varargs = false; + if let Some(last) = arguments.pop() { + if last.t.contents.as_ref().type_class() == TypeClass::VoidTypeClass { + is_varargs = true; + } else { + arguments.push(last); + } + } + + let mut fancy_return_type = return_type.clone(); + let mut fancy_arguments = arguments.clone(); + + let mut return_stacky = data.attributes.cxx_return_udt(); + if let Some(return_type_index) = data.return_type { + return_stacky |= !self.can_fit_in_register(return_type_index, finder, true); + } + if return_stacky { + // Stack return via a pointer in the first parameter + fancy_return_type = + Conf::new(Type::pointer(&self.arch, &return_type), max_confidence()); + fancy_arguments.insert( + 0, + FunctionParameter::new(fancy_return_type.clone(), "__return".to_string(), None), + ); + } + + let convention = self + .cv_call_t_to_calling_convention(data.attributes.calling_convention()) + .map(|cc| Conf::new(cc, max_confidence())) + .unwrap_or(Conf::new(self.default_cc.clone(), 0)); + self.log(|| format!("Convention: {:?}", convention)); + + let func = Type::function_with_options( + &return_type, + arguments.as_slice(), + is_varargs, + &convention, + Conf::new(0, 0), + ); + + let fancy_func = Type::function_with_options( + &fancy_return_type, + fancy_arguments.as_slice(), + is_varargs, + &convention, + Conf::new(0, 0), + ); + + Ok(Some(Box::new(ParsedType::Procedure(ParsedProcedureType { + method_type: fancy_func, + raw_method_type: func, + })))) + } + + fn handle_pointer_type( + &mut self, + data: &PointerType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Pointer type: {:x?}", data)); + let base = match self.try_type_index_to_bare(data.underlying_type, finder, false)? { + Some(ty) => Some(ty.clone()), + None => match self.handle_type_index(data.underlying_type, finder)? { + Some(ParsedType::VTableShape(descriptors)) => { + return Ok(Some(Box::new(ParsedType::VTablePointer( + descriptors.clone(), + )))); + } + _ => None, + }, + }; + + if let Some(base) = base { + Ok(Some(Box::new(ParsedType::Bare(Type::pointer( + &self.arch, + base.as_ref(), + ))))) + } else { + Ok(None) + } + } + + fn handle_modifier_type( + &mut self, + data: &ModifierType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Modifier type: {:x?}", data)); + let base = self.try_type_index_to_bare(data.underlying_type, finder, false)?; + + if let Some(base) = base { + let builder = TypeBuilder::new(base.as_ref()); + builder.set_const(data.constant); + builder.set_volatile(data.volatile); + Ok(Some(Box::new(ParsedType::Bare(builder.finalize())))) + } else { + Ok(None) + } + } + + fn handle_enumeration_type( + &mut self, + data: &EnumerationType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Enumeration type: {:x?}", data)); + + let raw_enum_name = &data.name.to_string(); + let enum_name = raw_enum_name.to_string(); + self.log(|| format!("Named: {}", enum_name)); + + if data.properties.forward_reference() { + // Try and find it first + if let Some(existing) = self.named_types.get(&enum_name) { + return Ok(Some(Box::new(ParsedType::Bare( + Type::named_type_from_type(&enum_name, existing), + )))); + } + + let ntr_class = NamedTypeReferenceClass::EnumNamedTypeClass; + return Ok(Some(Box::new(ParsedType::Bare(Type::named_type( + &*NamedTypeReference::new(ntr_class, QualifiedName::from(enum_name)), + ))))); + } + + let enumeration = EnumerationBuilder::new(); + + match self.handle_type_index(data.fields, finder)? { + Some(ParsedType::FieldList(fields)) => { + for field in fields { + match field { + ParsedType::Enumerate(member) => { + enumeration.insert(member.name.clone(), member.value); + } + e => return Err(anyhow!("Unexpected enumerate member: {:?}", e)), + } + } + } + // No fields? + None => {} + e => return Err(anyhow!("Unexpected enumeration field list: {:?}", e)), + } + + let underlying = match self.handle_type_index(data.underlying_type, finder)? { + Some(ParsedType::Bare(ty)) => ty.clone(), + e => return Err(anyhow!("Making enumeration from unexpected type: {:x?}", e)), + }; + + let new_type = Type::enumeration( + enumeration.finalize().as_ref(), + underlying.width() as usize, + underlying.is_signed().contents, + ); + + Ok(Some(Box::new(ParsedType::Named(enum_name, new_type)))) + } + + fn handle_enumerate_type( + &mut self, + data: &EnumerateType, + _finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Enumerate type: {:x?}", data)); + Ok(Some(Box::new(ParsedType::Enumerate(EnumerationMember { + name: data.name.to_string().to_string(), + value: match data.value { + Variant::U8(v) => v as u64, + Variant::U16(v) => v as u64, + Variant::U32(v) => v as u64, + Variant::U64(v) => v as u64, + Variant::I8(v) => (v as i64) as u64, + Variant::I16(v) => (v as i64) as u64, + Variant::I32(v) => (v as i64) as u64, + Variant::I64(v) => (v as i64) as u64, + }, + is_default: false, + })))) + } + + fn handle_array_type( + &mut self, + data: &ArrayType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Array type: {:x?}", data)); + // PDB stores array sizes as TOTAL bytes not element count + // So we need to look up the original type's size to know how many there are + let base = self.try_type_index_to_bare(data.element_type, finder, true)?; + + if let Some(base) = base { + let mut new_type = base; + if new_type.width() == 0 { + if new_type.width() == 0 { + return Err(anyhow!( + "Cannot calculate array of 0-size elements: {}", + new_type + )); + } + } + + let mut counts = data + .dimensions + .iter() + .map(|t| *t as u64) + .collect::>(); + for i in 0..counts.len() { + for j in i..counts.len() { + if counts[j] % new_type.width() != 0 { + return Err(anyhow!( + "Array stride {} is not a multiple of element {} size {}", + counts[j], + new_type, + new_type.width() + )); + } + counts[j] /= new_type.width(); + } + + new_type = Type::array(new_type.as_ref(), counts[i] as u64); + } + + Ok(Some(Box::new(ParsedType::Bare(new_type)))) + } else { + Ok(None) + } + } + + fn handle_union_type( + &mut self, + data: &UnionType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Union type: {:x?}", data)); + + let raw_union_name = &data.name.to_string(); + let union_name = raw_union_name.to_string(); + self.log(|| format!("Named: {}", union_name)); + + if data.properties.forward_reference() { + // Try and find it first + if let Some(existing) = self.named_types.get(&union_name) { + return Ok(Some(Box::new(ParsedType::Bare( + Type::named_type_from_type(&union_name, existing), + )))); + } + + let ntr_class = NamedTypeReferenceClass::UnionNamedTypeClass; + return Ok(Some(Box::new(ParsedType::Bare(Type::named_type( + &*NamedTypeReference::new(ntr_class, QualifiedName::from(union_name)), + ))))); + } + + let mut structure = StructureBuilder::new(); + structure.set_structure_type(StructureType::UnionStructureType); + structure.set_width(data.size); + + self.namespace_stack.push(union_name.to_string()); + let success = self.parse_union_fields(&mut structure, data.fields, finder); + self.namespace_stack.pop(); + let _ = success?; + + let new_type = Type::structure(structure.finalize().as_ref()); + Ok(Some(Box::new(ParsedType::Named(union_name, new_type)))) + } + + /// Parse the fields in a union's field list + fn parse_union_fields( + &mut self, + structure: &mut StructureBuilder, + fields: TypeIndex, + finder: &mut ItemFinder, + ) -> Result<()> { + let mut union_groups = vec![]; + let mut last_union_group = u64::MAX; + + match self.handle_type_index(fields, finder) { + Ok(Some(ParsedType::FieldList(fields))) => { + for field in fields { + match field { + ParsedType::Member(member) => { + if member.offset <= last_union_group { + union_groups.push(vec![]); + } + last_union_group = member.offset; + union_groups + .last_mut() + .expect("invariant") + .push(member.clone()); + } + ParsedType::Method(..) => {} + ParsedType::Named(..) => {} + e => return Err(anyhow!("Unexpected union member type {:?}", e)), + } + } + } + e => return Err(anyhow!("Unexpected union field list type {:?}", e)), + } + + for (i, group) in union_groups.into_iter().enumerate() { + if group.len() == 1 { + structure.insert( + &group[0].ty, + group[0].name.clone(), + group[0].offset, + false, + group[0].access, + group[0].scope, + ); + } else { + let inner_struct = StructureBuilder::new(); + for member in group { + inner_struct.insert( + &member.ty, + member.name.clone(), + member.offset, + false, + member.access, + member.scope, + ); + } + structure.insert( + &Conf::new( + Type::structure(inner_struct.finalize().as_ref()), + max_confidence(), + ), + format!("__inner{:x}", i), + 0, + false, + MemberAccess::PublicAccess, + MemberScope::NoScope, + ); + } + } + + Ok(()) + } + + fn handle_bitfield_type( + &mut self, + data: &BitfieldType, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got Bitfield type: {:x?}", data)); + Ok(self + .try_type_index_to_bare(data.underlying_type, finder, true)? + .map(|ty| { + Box::new(ParsedType::BitfieldType(ParsedBitfieldType { + size: data.length as u64, + position: data.position as u64, + ty, + })) + })) + } + + fn handle_field_list_type( + &mut self, + data: &FieldList, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got FieldList type: {:x?}", data)); + + let mut fields = vec![]; + for (i, field) in data.fields.iter().enumerate() { + match self.handle_type(field, finder)? { + Some(f) => { + self.log(|| format!("Inner field {} parsed into {:?}", i, f)); + fields.push(*f); + } + None => { + self.log(|| format!("Inner field {} parsed into None", i)); + } + } + } + + if let Some(cont) = data.continuation { + match self.handle_type_index(cont, finder)? { + Some(ParsedType::FieldList(cont_fields)) => { + fields.extend(cont_fields.clone()); + } + None => {} + f => { + return Err(anyhow!("Unexpected field list continuation {:?}", f)); + } + } + } + Ok(Some(Box::new(ParsedType::FieldList(fields)))) + } + + fn handle_argument_list_type( + &mut self, + data: &ArgumentList, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got ArgumentList type: {:x?}", data)); + let mut args = vec![]; + for &arg in data.arguments.iter() { + match self.try_type_index_to_bare(arg, finder, false)? { + Some(ty) => { + // On x86_32, structures are stored on the stack directly + // On x64, they are put into pointers if they are not a int size + // TODO: Ugly hack + if self.arch.address_size() == 4 || Self::size_can_fit_in_register(ty.width()) { + args.push(FunctionParameter::new( + Conf::new(ty.clone(), max_confidence()), + "".to_string(), + None, + )); + } else { + args.push(FunctionParameter::new( + Conf::new( + Type::pointer(self.arch.as_ref(), ty.as_ref()), + max_confidence(), + ), + "".to_string(), + None, + )); + } + } + e => { + return Err(anyhow!("Unexpected argument type {:?}", e)); + } + } + } + Ok(Some(Box::new(ParsedType::ArgumentList(args)))) + } + + fn handle_method_list_type( + &mut self, + data: &MethodList, + finder: &mut ItemFinder, + ) -> Result>> { + self.log(|| format!("Got MethodList type: {:x?}", data)); + + let mut list = vec![]; + for method in &data.methods { + match self.handle_type_index(method.method_type, finder)? { + Some(ParsedType::MemberFunction(func)) => { + list.push(ParsedMethodListEntry { + attributes: method.attributes, + method_type: func.clone(), + vtable_offset: method.vtable_offset.map(|o| o as usize), + }); + } + e => return Err(anyhow!("Unexpected method list entry: {:?}", e)), + } + } + + Ok(Some(Box::new(ParsedType::MethodList(list)))) + } + + /// Given a type index, get a bare binja type (or fail if not found) + /// Optionally, set fully_resolve to true to parse and get the real type back in the case of NTRs + fn type_index_to_bare( + &mut self, + index: TypeIndex, + finder: &mut ItemFinder, + fully_resolve: bool, + ) -> Result> { + match self.try_type_index_to_bare(index, finder, fully_resolve)? { + Some(ty) => Ok(ty), + None => Err(anyhow!("Unresolved expected type {:?}", index)), + } + } + + /// Given a type index, try to get a bare binja type + /// Optionally, set fully_resolve to true to parse and get the real type back in the case of NTRs + fn try_type_index_to_bare( + &mut self, + index: TypeIndex, + finder: &mut ItemFinder, + fully_resolve: bool, + ) -> Result>> { + let (mut type_, inner) = match self.handle_type_index(index, finder)? { + Some(ParsedType::Bare(ty)) => (ty.clone(), None), + Some(ParsedType::Named(name, ty)) => { + (Type::named_type_from_type(name, &ty), Some(ty.clone())) + } + Some(ParsedType::Procedure(ParsedProcedureType { method_type, .. })) => { + (method_type.clone(), Some(method_type.clone())) + } + Some(ParsedType::MemberFunction(ParsedMemberFunction { method_type, .. })) => { + (method_type.clone(), Some(method_type.clone())) + } + Some(ParsedType::Member(ParsedMember { ty, .. })) => { + (ty.contents.clone(), Some(ty.contents.clone())) + } + _ => return Ok(None), + }; + + if type_.type_class() == TypeClass::NamedTypeReferenceClass { + if type_.width() == 0 { + // Replace empty NTR with fully parsed NTR, if we can + let name = type_ + .get_named_type_reference() + .map_err(|_| anyhow!("expected ntr"))? + .name() + .to_string(); + if let Some(full_ntr) = self.named_types.get(&name) { + type_ = Type::named_type_from_type(&name, full_ntr.as_ref()); + } + } + } + + if !fully_resolve { + return Ok(Some(type_)); + } + + if type_.type_class() == TypeClass::NamedTypeReferenceClass { + if type_.width() == 0 { + // Look up raw name of this type + if let Ok(raw) = finder.find(index) { + if let Ok(parsed) = raw.parse() { + // Have to use raw name here because self.full_type_indices uses raw name + // for some reason + if let Some(raw_name) = Self::type_data_to_raw_name(&parsed) { + if let Some(&full_index) = self.full_type_indices.get(&raw_name) { + if let None = self.type_stack.iter().find(|&&idx| idx == full_index) + { + if full_index != index { + return self.try_type_index_to_bare( + full_index, + finder, + fully_resolve, + ); + } + } + } + } + } + } + } + } + + if type_.type_class() == TypeClass::NamedTypeReferenceClass { + // PDB does this thing where anonymous inner types are represented as + // some_type:: + let name = type_ + .get_named_type_reference() + .map_err(|_| anyhow!("expected ntr"))? + .name() + .to_string(); + if Self::is_name_anonymous(&name) { + if let Some(inner) = inner.as_ref() { + type_ = inner.clone(); + } else { + // Look up raw name of this type + if let Ok(raw) = finder.find(index) { + if let Ok(parsed) = raw.parse() { + // Have to use raw name here because self.full_type_indices uses raw name + // for some reason + if let Some(raw_name) = Self::type_data_to_raw_name(&parsed) { + if let Some(&full_index) = self.full_type_indices.get(&raw_name) { + if let None = + self.type_stack.iter().find(|&&idx| idx == full_index) + { + if full_index != index { + return self.try_type_index_to_bare( + full_index, + finder, + fully_resolve, + ); + } + } + } + } + } + } + } + } + } + Ok(Some(type_)) + } + + /// Is this name one of the stupid microsoft unnamed type names + fn is_name_anonymous(name: &String) -> bool { + let name_string = name.split("::").last().unwrap_or("").to_string(); + return name_string == "" || name_string.starts_with(" Option>> { + platform + .calling_conventions() + .iter() + .find(|c| c.name().as_str() == name) + .map(|g| g.clone()) + } + + /// Convert pdb calling convention enum to binja + fn cv_call_t_to_calling_convention( + &self, + cv: u8, + ) -> Option>> { + match CV_call_t::try_from(cv) { + Ok(CV_call_t::NEAR_FAST) | Ok(CV_call_t::FAR_FAST) => { + self.platform.get_fastcall_calling_convention() + } + Ok(CV_call_t::NEAR_STD) | Ok(CV_call_t::FAR_STD) => { + self.platform.get_stdcall_calling_convention() + } + Ok(CV_call_t::NEAR_C) | Ok(CV_call_t::FAR_C) => { + self.platform.get_cdecl_calling_convention() + } + Ok(CV_call_t::THISCALL) => { + Self::find_calling_convention(self.platform.as_ref(), "thiscall") + } + Ok(CV_call_t::NEAR_PASCAL) | Ok(CV_call_t::FAR_PASCAL) => { + Self::find_calling_convention(self.platform.as_ref(), "pascal") + } + Ok(CV_call_t::NEAR_SYS) | Ok(CV_call_t::FAR_SYS) => { + Self::find_calling_convention(self.platform.as_ref(), "sys") + } + Ok(CV_call_t::MIPSCALL) => { + Self::find_calling_convention(self.platform.as_ref(), "mipscall") + } + Ok(CV_call_t::ALPHACALL) => { + Self::find_calling_convention(self.platform.as_ref(), "alphacall") + } + Ok(CV_call_t::PPCCALL) => { + Self::find_calling_convention(self.platform.as_ref(), "ppccall") + } + Ok(CV_call_t::SHCALL) => { + Self::find_calling_convention(self.platform.as_ref(), "shcall") + } + Ok(CV_call_t::ARMCALL) => { + Self::find_calling_convention(self.platform.as_ref(), "armcall") + } + Ok(CV_call_t::AM33CALL) => { + Self::find_calling_convention(self.platform.as_ref(), "am33call") + } + Ok(CV_call_t::TRICALL) => { + Self::find_calling_convention(self.platform.as_ref(), "tricall") + } + Ok(CV_call_t::SH5CALL) => { + Self::find_calling_convention(self.platform.as_ref(), "sh5call") + } + Ok(CV_call_t::M32RCALL) => { + Self::find_calling_convention(self.platform.as_ref(), "m32rcall") + } + Ok(CV_call_t::NEAR_VECTOR) => { + Self::find_calling_convention(self.platform.as_ref(), "vectorcall") + } + _ => None, + } + } + + /// Insert an argument for the thisptr in a function param list + fn insert_this_pointer( + &self, + parameters: &mut Vec, + this_type: Ref, + ) -> Result<()> { + parameters.insert( + 0, + FunctionParameter::new( + Conf::new(this_type, max_confidence()), + "this".to_string(), + None, + ), + ); + + Ok(()) + } + + /// Does this type get returned in rax? Or should we put it on the stack? + pub fn can_fit_in_register( + &mut self, + index: TypeIndex, + finder: &mut ItemFinder, + treat_references_like_pointers: bool, + ) -> bool { + // TLDR "This is impossible so we're making a best-guess" + // GET READY OKAY + + // "A scalar return value that can fit into 64 bits, including the __m64 type, is returned + // through RAX. Non-scalar types including floats, doubles, and vector types such as __m128, + // __m128i, __m128d are returned in XMM0. The state of unused bits in the value returned + // in RAX or XMM0 is undefined. + + // "User-defined types can be returned by value from global functions and static member + // functions. To return a user-defined type by value in RAX, it must have a length of + // 1, 2, 4, 8, 16, 32, or 64 bits. It must also have no user-defined constructor, destructor, + // or copy assignment operator. It can have no private or protected non-static data members, + // and no non-static data members of reference type. It can't have base classes or virtual + // functions. And, it can only have data members that also meet these requirements. + // (This definition is essentially the same as a C++03 POD type. Because the definition has + // changed in the C++11 standard, we don't recommend using std::is_pod for this test.) + // Otherwise, the caller must allocate memory for the return value and pass a pointer to it + // as the first argument. The remaining arguments are then shifted one argument to the right. + // The same pointer must be returned by the callee in RAX." + + // - length of 1, 2, 4, 8, 16, 32, or 64 bits + // - no user-defined constructor + // - no user-defined destructor + // - no user-defined copy assignment operator + // - no private data members + // - no protected data members + // - no reference data members + // - no base classes + // - no virtual functions + + // This one is incorrect, so we're not including it: + // - all members meet these requirements + // https://godbolt.org/z/hsTxrxq9c extremely cool + + // Are we going to implement all of this? + // No? We're just going to do something close and leave it to the users to figure out the rest + // There's no way I'm digging through all nonsense + + // After a quick GitHub discussion (https://github.com/MicrosoftDocs/cpp-docs/issues/4152) + // I've determined this is unknowable. + // Microsoft does it again!!!! + + if let Some(&returnable) = self.type_default_returnable.get(&index) { + returnable + } else { + let returnable = + self.can_fit_in_register_impl(index, finder, treat_references_like_pointers); + self.log(|| format!("Type {} is default returnable: {}", index, returnable)); + self.type_default_returnable.insert(index, returnable); + returnable + } + } + + fn size_can_fit_in_register(size: u64) -> bool { + match size { + 0 | 1 | 2 | 4 | 8 => true, + _ => false, + } + } + + // Memoized... because this has gotta be real slow + fn can_fit_in_register_impl( + &mut self, + index: TypeIndex, + finder: &mut ItemFinder, + treat_references_like_pointers: bool, + ) -> bool { + let ty = match finder.find(index) { + Ok(item) => match item.parse() { + Ok(ty) => ty, + Err(_) => return false, + }, + Err(_) => return false, + }; + + fn get_fields<'a>( + index: TypeIndex, + finder: &mut ItemFinder<'a, TypeIndex>, + ) -> Result>> { + match finder.find(index).and_then(|fields| fields.parse()) { + Ok(TypeData::FieldList(fields)) => { + if let Some(cont) = fields.continuation { + Ok(fields + .fields + .into_iter() + .chain(get_fields(cont, finder)?.into_iter()) + .collect::>()) + } else { + Ok(fields.fields) + } + } + _ => Err(anyhow!("can't lookup fields")), + } + } + + match ty { + TypeData::Primitive(_) => true, + TypeData::Pointer(p) => match p.attributes.pointer_mode() { + PointerMode::Pointer => true, + PointerMode::Member => true, + PointerMode::MemberFunction => true, + // - no reference data members + PointerMode::LValueReference => treat_references_like_pointers, + PointerMode::RValueReference => treat_references_like_pointers, + }, + TypeData::Array(a) => { + Self::size_can_fit_in_register(*a.dimensions.last().unwrap_or(&0) as u64) + && self.can_fit_in_register(a.element_type, finder, false) + } + TypeData::Modifier(m) => { + self.can_fit_in_register(m.underlying_type, finder, treat_references_like_pointers) + } + TypeData::Enumeration(e) => self.can_fit_in_register(e.underlying_type, finder, false), + TypeData::Class(c) => { + if c.properties.forward_reference() { + if let Some(raw_name) = c.unique_name { + if let Some(&full) = self + .full_type_indices + .get(&raw_name.to_string().to_string()) + { + return self.can_fit_in_register( + full, + finder, + treat_references_like_pointers, + ); + } + } + // Can't look up, assume not + return false; + } + + // - length of 1, 2, 4, 8, 16, 32, or 64 bits + if !Self::size_can_fit_in_register(c.size) { + return false; + } + + // - no user-defined constructor + // - no user-defined destructor + // - no user-defined copy assignment operator + if c.properties.constructors() || c.properties.overloaded_assignment() { + return false; + } + + // - no base classes + if let Some(_) = c.derived_from { + return false; + } + // - no virtual functions + if let Some(_) = c.vtable_shape { + return false; + } + + let fields = if let Some(fields_idx) = c.fields { + if let Ok(fields) = get_fields(fields_idx, finder) { + fields + } else { + return false; + } + } else { + // No fields? + return true; + }; + + for field in fields { + match field { + TypeData::Member(m) => { + // - no private data members + // - no protected data members + if m.attributes.access() == 1 || m.attributes.access() == 2 { + return false; + } + } + TypeData::OverloadedMethod(m) => { + match finder.find(m.method_list).and_then(|l| l.parse()) { + Ok(TypeData::MethodList(list)) => { + for m in list.methods { + // - no virtual functions + if m.attributes.is_virtual() { + return false; + } + } + } + _ => return false, + } + } + TypeData::Method(m) => { + // - no virtual functions + if m.attributes.is_virtual() { + return false; + } + } + // - no base classes + TypeData::BaseClass(_) => return false, + TypeData::VirtualBaseClass(_) => return false, + TypeData::VirtualFunctionTable(_) => return false, + TypeData::VirtualTableShape(_) => return false, + TypeData::VirtualFunctionTablePointer(_) => return false, + _ => {} + } + } + return true; + } + TypeData::Union(u) => { + if u.properties.forward_reference() { + if let Some(raw_name) = u.unique_name { + if let Some(&full) = self + .full_type_indices + .get(&raw_name.to_string().to_string()) + { + return self.can_fit_in_register( + full, + finder, + treat_references_like_pointers, + ); + } + } + // Can't look up, assume not + return false; + } + + // - length of 1, 2, 4, 8, 16, 32, or 64 bits + if !Self::size_can_fit_in_register(u.size) { + return false; + } + + // - no user-defined constructor + // - no user-defined destructor + // - no user-defined copy assignment operator + if u.properties.constructors() || u.properties.overloaded_assignment() { + return false; + } + + let fields = if let Ok(fields) = get_fields(u.fields, finder) { + fields + } else { + return false; + }; + + for field in fields { + match field { + TypeData::Member(m) => { + // - no private data members + // - no protected data members + if m.attributes.access() == 1 || m.attributes.access() == 2 { + return false; + } + } + TypeData::OverloadedMethod(m) => { + match finder.find(m.method_list).and_then(|l| l.parse()) { + Ok(TypeData::MethodList(list)) => { + for m in list.methods { + // - no virtual functions + if m.attributes.is_virtual() { + return false; + } + } + } + _ => return false, + } + } + TypeData::Method(m) => { + // - no virtual functions + if m.attributes.is_virtual() { + return false; + } + } + // - no base classes + TypeData::BaseClass(_) => return false, + TypeData::VirtualBaseClass(_) => return false, + TypeData::VirtualFunctionTable(_) => return false, + TypeData::VirtualTableShape(_) => return false, + TypeData::VirtualFunctionTablePointer(_) => return false, + _ => {} + } + } + return true; + } + _ => false, + } + } +} diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index aedde6bec7..9aee0b163c 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -23,7 +23,7 @@ use std::{ collections::HashMap, ffi::{c_char, c_int, CStr, CString}, hash::Hash, - mem::zeroed, + mem::{zeroed, MaybeUninit}, ops, ptr, slice, }; @@ -313,7 +313,7 @@ pub trait Intrinsic: Sized + Clone + Copy { fn id(&self) -> u32; /// Reeturns the list of the input names and types for this intrinsic. - fn inputs(&self) -> Vec>; + fn inputs(&self) -> Vec>; /// Returns the list of the output types for this intrinsic. fn outputs(&self) -> Vec>>; @@ -650,7 +650,7 @@ impl Intrinsic for UnusedIntrinsic { fn id(&self) -> u32 { unreachable!() } - fn inputs(&self) -> Vec> { + fn inputs(&self) -> Vec> { unreachable!() } fn outputs(&self) -> Vec>> { @@ -992,7 +992,7 @@ impl Intrinsic for crate::architecture::CoreIntrinsic { self.1 } - fn inputs(&self) -> Vec> { + fn inputs(&self) -> Vec> { let mut count: usize = 0; unsafe { @@ -1000,7 +1000,7 @@ impl Intrinsic for crate::architecture::CoreIntrinsic { let ret = slice::from_raw_parts_mut(inputs, count) .iter() - .map(NameAndType::from_raw) + .map(|x| NameAndType::from_raw(x).to_owned()) .collect(); BNFreeNameAndTypeList(inputs, count); @@ -1162,17 +1162,18 @@ impl Architecture for CoreArchitecture { &mut result as *mut _, &mut count as *mut _, ) { - let vec = Vec::::from_raw_parts(result, count, count) + let vec = slice::from_raw_parts(result, count) .iter() - .map(|x| InstructionTextToken::from_raw(x)) + .map(|x| InstructionTextToken::from_raw(x).to_owned()) .collect(); + BNFreeInstructionText(result, count); Some((consumed, vec)) } else { None } } } - + fn instruction_llil( &self, data: &[u8], @@ -1689,8 +1690,8 @@ where A: 'static + Architecture> + Send + Sync, F: FnOnce(CustomArchitectureHandle, CoreArchitecture) -> A, { - arch: A, - func: F, + arch: MaybeUninit, + func: Option, } extern "C" fn cb_init(ctxt: *mut c_void, obj: *mut BNArchitecture) @@ -1704,11 +1705,10 @@ where handle: ctxt as *mut A, }; - let create = ptr::read(&custom_arch.func); - ptr::write( - &mut custom_arch.arch, - create(custom_arch_handle, CoreArchitecture(obj)), - ); + let create = custom_arch.func.take().unwrap(); + custom_arch + .arch + .write(create(custom_arch_handle, CoreArchitecture(obj))); } } @@ -1811,27 +1811,25 @@ where let data = unsafe { slice::from_raw_parts(data, *len) }; let result = unsafe { &mut *result }; - match custom_arch.instruction_text(data, addr) { - Some((res_size, mut res_tokens)) => { - unsafe { - // TODO: Can't use into_raw_parts as it's unstable so we do this instead... - let r_ptr = res_tokens.as_mut_ptr(); - let r_count = res_tokens.len(); - mem::forget(res_tokens); - - *result = &mut (*r_ptr).0; - *count = r_count; - *len = res_size; - } - true - } - None => false, + let Some((res_size, res_tokens)) = custom_arch.instruction_text(data, addr) else { + return false; + }; + + let res_tokens: Box<[_]> = res_tokens.into_boxed_slice(); + unsafe { + let res_tokens = Box::leak(res_tokens); + let r_ptr = res_tokens.as_mut_ptr(); + let r_count = res_tokens.len(); + + *result = &mut (*r_ptr).0; + *count = r_count; + *len = res_size; } + true } extern "C" fn cb_free_instruction_text(tokens: *mut BNInstructionTextToken, count: usize) { - let _tokens = - unsafe { Vec::from_raw_parts(tokens as *mut InstructionTextToken, count, count) }; + let _tokens = unsafe { Box::from_raw(ptr::slice_from_raw_parts_mut(tokens, count)) }; } extern "C" fn cb_instruction_llil( @@ -1931,15 +1929,7 @@ where if len == 0 { ptr::null_mut() } else { - let mut res = Vec::with_capacity(len + 1); - - res.push(len as u32); - - for i in items { - res.push(i); - } - - assert!(res.len() == len + 1); + let mut res: Box<[_]> = [len as u32].into_iter().chain(items).collect(); let raw = res.as_mut_ptr(); mem::forget(res); @@ -2280,7 +2270,8 @@ where unsafe { let actual_start = regs.offset(-1); let len = *actual_start + 1; - let _regs = Vec::from_raw_parts(actual_start, len as usize, len as usize); + let regs_ptr = ptr::slice_from_raw_parts_mut(actual_start, len.try_into().unwrap()); + let _regs = Box::from_raw(regs_ptr); } } @@ -2420,28 +2411,25 @@ where { let custom_arch = unsafe { &*(ctxt as *mut A) }; - if let Some(intrinsic) = custom_arch.intrinsic_from_id(intrinsic) { - let inputs = intrinsic.inputs(); - let mut res = Vec::with_capacity(inputs.len()); - for input in inputs { - res.push(input.into_raw()); - } - - unsafe { - *count = res.len(); - if res.is_empty() { - ptr::null_mut() - } else { - let raw = res.as_mut_ptr(); - mem::forget(res); - raw - } - } - } else { + let Some(intrinsic) = custom_arch.intrinsic_from_id(intrinsic) else { unsafe { *count = 0; } - ptr::null_mut() + return ptr::null_mut(); + }; + + let inputs = intrinsic.inputs(); + let mut res: Box<[_]> = inputs.into_iter().map(|input| unsafe { Ref::into_raw(input) }.0).collect(); + + unsafe { + *count = res.len(); + if res.is_empty() { + ptr::null_mut() + } else { + let raw = res.as_mut_ptr(); + mem::forget(res); + raw + } } } @@ -2453,9 +2441,9 @@ where if !nt.is_null() { unsafe { - let list = Vec::from_raw_parts(nt, count, count); - for nt in list { - BnString::from_raw(nt.name); + let name_and_types = Box::from_raw(ptr::slice_from_raw_parts_mut(nt, count)); + for nt in name_and_types.into_iter() { + Ref::new(NameAndType::from_raw(nt)); } } } @@ -2473,10 +2461,7 @@ where if let Some(intrinsic) = custom_arch.intrinsic_from_id(intrinsic) { let inputs = intrinsic.outputs(); - let mut res = Vec::with_capacity(inputs.len()); - for input in inputs { - res.push(input.into()); - } + let mut res: Box<[_]> = inputs.iter().map(|input| input.as_ref().into()).collect(); unsafe { *count = res.len(); @@ -2505,9 +2490,7 @@ where { let _custom_arch = unsafe { &*(ctxt as *mut A) }; if !tl.is_null() { - unsafe { - let _list = Vec::from_raw_parts(tl, count, count); - } + let _type_list = unsafe { Box::from_raw(ptr::slice_from_raw_parts_mut(tl, count)) }; } } @@ -2685,13 +2668,13 @@ where let name = name.into_bytes_with_nul(); let uninit_arch = ArchitectureBuilder { - arch: unsafe { zeroed() }, - func, + arch: MaybeUninit::zeroed(), + func: Some(func), }; let raw = Box::into_raw(Box::new(uninit_arch)); let mut custom_arch = BNCustomArchitecture { - context: raw as *mut _, + context: raw as *mut ArchitectureBuilder<_, _> as *mut _, init: Some(cb_init::), getEndianness: Some(cb_endianness::), getAddressSize: Some(cb_address_size::), @@ -2776,7 +2759,7 @@ where assert!(!res.is_null()); - &(*raw).arch + (*raw).arch.assume_init_mut() } } diff --git a/rust/src/backgroundtask.rs b/rust/src/backgroundtask.rs index 1eb090d7c5..e62cfbcb04 100644 --- a/rust/src/backgroundtask.rs +++ b/rust/src/backgroundtask.rs @@ -112,13 +112,13 @@ unsafe impl CoreOwnedArrayProvider for BackgroundTask { } } -unsafe impl<'a> CoreArrayWrapper<'a> for BackgroundTask { - type Wrapped = Guard<'a, BackgroundTask>; +unsafe impl CoreArrayWrapper for BackgroundTask { + type Wrapped<'a> = Guard<'a, BackgroundTask>; - unsafe fn wrap_raw( + unsafe fn wrap_raw<'a>( raw: &'a *mut BNBackgroundTask, context: &'a (), - ) -> Guard<'a, BackgroundTask> { + ) -> Self::Wrapped<'a> { Guard::new(BackgroundTask::from_raw(*raw), context) } } diff --git a/rust/src/basicblock.rs b/rust/src/basicblock.rs index 73ad9362b0..f28e596f1b 100644 --- a/rust/src/basicblock.rs +++ b/rust/src/basicblock.rs @@ -76,10 +76,10 @@ unsafe impl<'a, C: 'a + BlockContext> CoreOwnedArrayProvider for Edge<'a, C> { } } -unsafe impl<'a, C: 'a + BlockContext> CoreArrayWrapper<'a> for Edge<'a, C> { - type Wrapped = Edge<'a, C>; +unsafe impl<'a, C: BlockContext> CoreArrayWrapper for Edge<'a, C> { + type Wrapped<'b> = Edge<'b, C> where 'a: 'b; - unsafe fn wrap_raw(raw: &'a Self::Raw, context: &'a Self::Context) -> Edge<'a, C> { + unsafe fn wrap_raw<'b>(raw: &'b Self::Raw, context: &'b Self::Context) -> Self::Wrapped<'b> { let edge_target = Guard::new( BasicBlock::from_raw(raw.target, context.orig_block.context.clone()), raw, @@ -309,10 +309,10 @@ unsafe impl CoreOwnedArrayProvider for BasicBlock { } } -unsafe impl<'a, C: 'a + BlockContext> CoreArrayWrapper<'a> for BasicBlock { - type Wrapped = Guard<'a, BasicBlock>; +unsafe impl CoreArrayWrapper for BasicBlock { + type Wrapped<'a> = Guard<'a, BasicBlock> where C: 'a; - unsafe fn wrap_raw(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a> { Guard::new(BasicBlock::from_raw(*raw, context.clone()), context) } } diff --git a/rust/src/binaryview.rs b/rust/src/binaryview.rs index b93f74117b..3adf4e9cae 100644 --- a/rust/src/binaryview.rs +++ b/rust/src/binaryview.rs @@ -574,16 +574,24 @@ pub trait BinaryViewExt: BinaryViewBase { } } - fn define_auto_data_var(&self, dv: DataVariable) { + fn define_auto_data_var(&self, dv: &DataVariable) { unsafe { - BNDefineDataVariable(self.as_ref().handle, dv.address, &mut dv.t.into()); + BNDefineDataVariable( + self.as_ref().handle, + dv.address(), + &mut dv.type_with_confidence().into(), + ); } } /// You likely would also like to call [`Self::define_user_symbol`] to bind this data variable with a name - fn define_user_data_var(&self, dv: DataVariable) { + fn define_user_data_var(&self, dv: &DataVariable) { unsafe { - BNDefineUserDataVariable(self.as_ref().handle, dv.address, &mut dv.t.into()); + BNDefineUserDataVariable( + self.as_ref().handle, + dv.address(), + &mut dv.type_with_confidence().into(), + ); } } diff --git a/rust/src/callingconvention.rs b/rust/src/callingconvention.rs index 815f4d42a6..ef2725e8fd 100644 --- a/rust/src/callingconvention.rs +++ b/rust/src/callingconvention.rs @@ -569,11 +569,43 @@ impl CallingConventionBase for CallingConvention { } fn int_arg_registers(&self) -> Vec { - Vec::new() + unsafe { + let mut count = 0; + let regs = BNGetIntegerArgumentRegisters(self.handle, &mut count); + let arch = self.arch_handle.borrow(); + + let res = slice::from_raw_parts(regs, count) + .iter() + .map(|&r| { + arch.register_from_id(r) + .expect("bad reg id from CallingConvention") + }) + .collect(); + + BNFreeRegisterList(regs); + + res + } } fn float_arg_registers(&self) -> Vec { - Vec::new() + unsafe { + let mut count = 0; + let regs = BNGetFloatArgumentRegisters(self.handle, &mut count); + let arch = self.arch_handle.borrow(); + + let res = slice::from_raw_parts(regs, count) + .iter() + .map(|&r| { + arch.register_from_id(r) + .expect("bad reg id from CallingConvention") + }) + .collect(); + + BNFreeRegisterList(regs); + + res + } } fn arg_registers_shared_index(&self) -> bool { @@ -662,10 +694,10 @@ unsafe impl CoreOwnedArrayProvider for CallingConvention { } } -unsafe impl<'a, A: Architecture> CoreArrayWrapper<'a> for CallingConvention { - type Wrapped = Guard<'a, CallingConvention>; +unsafe impl CoreArrayWrapper for CallingConvention { + type Wrapped<'a> = Guard<'a, CallingConvention>; - unsafe fn wrap_raw(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a> { Guard::new( CallingConvention { handle: *raw, diff --git a/rust/src/custombinaryview.rs b/rust/src/custombinaryview.rs index 956be9bdc3..4c645ffdaf 100644 --- a/rust/src/custombinaryview.rs +++ b/rust/src/custombinaryview.rs @@ -20,6 +20,7 @@ pub use binaryninjacore_sys::BNModificationStatus as ModificationStatus; use std::marker::PhantomData; use std::mem; +use std::mem::MaybeUninit; use std::os::raw::c_void; use std::ptr; use std::slice; @@ -122,11 +123,10 @@ where let long_name = long_name.into_bytes_with_nul(); let long_name_ptr = long_name.as_ref().as_ptr() as *mut _; - let ctxt = Box::new(unsafe { mem::zeroed() }); - let ctxt = Box::into_raw(ctxt); + let ctxt = Box::leak(Box::new(MaybeUninit::zeroed())); let mut bn_obj = BNCustomBinaryViewType { - context: ctxt as *mut _, + context: ctxt.as_mut_ptr() as *mut _, create: Some(cb_create::), parse: Some(cb_parse::), isValidForData: Some(cb_valid::), @@ -140,15 +140,16 @@ where if res.is_null() { // avoid leaking the space allocated for the type, but also // avoid running its Drop impl (if any -- not that there should - // be one since view types live for the life of the process) - mem::forget(*Box::from_raw(ctxt)); + // be one since view types live for the life of the process) as + // MaybeUninit suppress the Drop implementation of it's inner type + drop(Box::from_raw(ctxt)); panic!("bvt registration failed"); } - ptr::write(ctxt, constructor(BinaryViewType(res))); + ctxt.write(constructor(BinaryViewType(res))); - &*ctxt + ctxt.assume_init_mut() } } @@ -297,11 +298,11 @@ unsafe impl CoreOwnedArrayProvider for BinaryViewType { } } -unsafe impl<'a> CoreArrayWrapper<'a> for BinaryViewType { - type Wrapped = BinaryViewType; +unsafe impl CoreArrayWrapper for BinaryViewType { + type Wrapped<'a> = Guard<'a, BinaryViewType>; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { - BinaryViewType(*raw) + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + Guard::new(BinaryViewType(*raw), &()) } } diff --git a/rust/src/debuginfo.rs b/rust/src/debuginfo.rs index 63f517a0f3..a09e0c674f 100644 --- a/rust/src/debuginfo.rs +++ b/rust/src/debuginfo.rs @@ -78,7 +78,7 @@ use crate::{ types::{DataVariableAndName, NameAndType, Type}, }; -use std::{hash::Hash, mem, os::raw::c_void, ptr, slice}; +use std::{hash::Hash, os::raw::c_void, ptr, slice}; struct ProgressContext(Option Result<(), ()>>>); @@ -113,14 +113,14 @@ impl DebugInfoParser { /// List all debug-info parsers pub fn list() -> Array { - let mut count: usize = unsafe { mem::zeroed() }; + let mut count = 0; let raw_parsers = unsafe { BNGetDebugInfoParsers(&mut count as *mut _) }; unsafe { Array::new(raw_parsers, count, ()) } } /// Returns a list of debug-info parsers that are valid for the provided binary view pub fn parsers_for_view(bv: &BinaryView) -> Array { - let mut count: usize = unsafe { mem::zeroed() }; + let mut count = 0; let raw_parsers = unsafe { BNGetDebugInfoParsersForView(bv.handle, &mut count as *mut _) }; unsafe { Array::new(raw_parsers, count, ()) } } @@ -388,7 +388,7 @@ impl DebugInfo { } /// Returns a generator of all types provided by a named DebugInfoParser - pub fn types_by_name(&self, parser_name: S) -> Vec> { + pub fn types_by_name(&self, parser_name: S) -> Vec> { let parser_name = parser_name.into_bytes_with_nul(); let mut count: usize = 0; @@ -399,10 +399,10 @@ impl DebugInfo { &mut count, ) }; - let result: Vec> = unsafe { + let result: Vec> = unsafe { slice::from_raw_parts_mut(debug_types_ptr, count) .iter() - .map(NameAndType::::from_raw) + .map(|x| NameAndType::from_raw(x).to_owned()) .collect() }; @@ -411,13 +411,13 @@ impl DebugInfo { } /// A generator of all types provided by DebugInfoParsers - pub fn types(&self) -> Vec> { + pub fn types(&self) -> Vec> { let mut count: usize = 0; let debug_types_ptr = unsafe { BNGetDebugTypes(self.handle, ptr::null_mut(), &mut count) }; - let result: Vec> = unsafe { + let result: Vec> = unsafe { slice::from_raw_parts_mut(debug_types_ptr, count) .iter() - .map(NameAndType::::from_raw) + .map(|x| NameAndType::from_raw(x).to_owned()) .collect() }; @@ -770,21 +770,15 @@ impl DebugInfo { let short_name_bytes = new_func.short_name.map(|name| name.into_bytes_with_nul()); let short_name = short_name_bytes .as_ref() - .map_or(ptr::null_mut() as *mut _, |name| { - name.as_ptr() as _ - }); + .map_or(ptr::null_mut() as *mut _, |name| name.as_ptr() as _); let full_name_bytes = new_func.full_name.map(|name| name.into_bytes_with_nul()); let full_name = full_name_bytes .as_ref() - .map_or(ptr::null_mut() as *mut _, |name| { - name.as_ptr() as _ - }); + .map_or(ptr::null_mut() as *mut _, |name| name.as_ptr() as _); let raw_name_bytes = new_func.raw_name.map(|name| name.into_bytes_with_nul()); let raw_name = raw_name_bytes .as_ref() - .map_or(ptr::null_mut() as *mut _, |name| { - name.as_ptr() as _ - }); + .map_or(ptr::null_mut() as *mut _, |name| name.as_ptr() as _); let mut components_array: Vec<*const ::std::os::raw::c_char> = Vec::with_capacity(new_func.components.len()); diff --git a/rust/src/demangle.rs b/rust/src/demangle.rs index 19eb085c96..3756ea068a 100644 --- a/rust/src/demangle.rs +++ b/rust/src/demangle.rs @@ -33,8 +33,8 @@ pub fn demangle_gnu3( ) -> Result<(Option>, Vec)> { let mangled_name_bwn = mangled_name.into_bytes_with_nul(); let mangled_name_ptr = mangled_name_bwn.as_ref(); - let mut out_type: *mut BNType = unsafe { std::mem::zeroed() }; - let mut out_name: *mut *mut std::os::raw::c_char = unsafe { std::mem::zeroed() }; + let mut out_type: *mut BNType = std::ptr::null_mut(); + let mut out_name: *mut *mut std::os::raw::c_char = std::ptr::null_mut(); let mut out_size: usize = 0; let res = unsafe { BNDemangleGNU3( @@ -89,8 +89,8 @@ pub fn demangle_ms( let mangled_name_bwn = mangled_name.into_bytes_with_nul(); let mangled_name_ptr = mangled_name_bwn.as_ref(); - let mut out_type: *mut BNType = unsafe { std::mem::zeroed() }; - let mut out_name: *mut *mut std::os::raw::c_char = unsafe { std::mem::zeroed() }; + let mut out_type: *mut BNType = std::ptr::null_mut(); + let mut out_name: *mut *mut std::os::raw::c_char = std::ptr::null_mut(); let mut out_size: usize = 0; let res = unsafe { BNDemangleMS( diff --git a/rust/src/disassembly.rs b/rust/src/disassembly.rs index f213fa0eb4..855807b13a 100644 --- a/rust/src/disassembly.rs +++ b/rust/src/disassembly.rs @@ -73,7 +73,7 @@ pub type InstructionTextTokenContext = BNInstructionTextTokenContext; // IndirectImportToken = 69, // ExternalSymbolToken = 70, -#[repr(C)] +#[repr(transparent)] pub struct InstructionTextToken(pub(crate) BNInstructionTextToken); #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] @@ -99,8 +99,8 @@ pub enum InstructionTextTokenContents { } impl InstructionTextToken { - pub(crate) unsafe fn from_raw(raw: &BNInstructionTextToken) -> Self { - Self(*raw) + pub(crate) unsafe fn from_raw(raw: &BNInstructionTextToken) -> &Self { + mem::transmute(raw) } pub fn new(text: &str, contents: InstructionTextTokenContents) -> Self { @@ -254,13 +254,16 @@ impl Clone for InstructionTextToken { } } -// TODO : There is almost certainly a memory leak here - in the case where -// `impl CoreOwnedArrayProvider for InstructionTextToken` doesn't get triggered -// impl Drop for InstructionTextToken { -// fn drop(&mut self) { -// let _owned = unsafe { BnString::from_raw(self.0.text) }; -// } -// } +impl Drop for InstructionTextToken { + fn drop(&mut self) { + if !self.0.text.is_null() { + let _owned = unsafe { BnString::from_raw(self.0.text) }; + } + if !self.0.typeNames.is_null() && self.0.namesCount != 0 { + unsafe { BNFreeStringList(self.0.typeNames, self.0.namesCount) } + } + } +} pub struct DisassemblyTextLine(pub(crate) BNDisassemblyTextLine); @@ -290,7 +293,7 @@ impl DisassemblyTextLine { unsafe { std::slice::from_raw_parts::(self.0.tokens, self.0.count) .iter() - .map(|&x| InstructionTextToken::from_raw(&x)) + .map(|x| InstructionTextToken::from_raw(x).clone()) .collect() } } @@ -307,10 +310,9 @@ impl std::fmt::Display for DisassemblyTextLine { } impl From> for DisassemblyTextLine { - fn from(mut tokens: Vec) -> Self { - tokens.shrink_to_fit(); + fn from(tokens: Vec) -> Self { + let mut tokens: Box<[_]> = tokens.into(); - assert!(tokens.len() == tokens.capacity()); // TODO: let (tokens_pointer, tokens_len, _) = unsafe { tokens.into_raw_parts() }; // Can't use for now...still a rust nightly feature let tokens_pointer = tokens.as_mut_ptr(); let tokens_len = tokens.len(); @@ -345,14 +347,11 @@ impl From> for DisassemblyTextLine { impl From<&Vec<&str>> for DisassemblyTextLine { fn from(string_tokens: &Vec<&str>) -> Self { - let mut tokens: Vec = Vec::with_capacity(string_tokens.len()); - tokens.extend( - string_tokens.iter().map(|&token| { - InstructionTextToken::new(token, InstructionTextTokenContents::Text).0 - }), - ); - - assert!(tokens.len() == tokens.capacity()); + let mut tokens: Box<[BNInstructionTextToken]> = string_tokens + .iter() + .map(|&token| InstructionTextToken::new(token, InstructionTextTokenContents::Text).0) + .collect(); + // let (tokens_pointer, tokens_len, _) = unsafe { tokens.into_raw_parts() }; // Can't use for now...still a rust nighly feature let tokens_pointer = tokens.as_mut_ptr(); let tokens_len = tokens.len(); @@ -416,8 +415,9 @@ impl Default for DisassemblyTextLine { impl Drop for DisassemblyTextLine { fn drop(&mut self) { - unsafe { - Vec::from_raw_parts(self.0.tokens, self.0.count, self.0.count); + if !self.0.tokens.is_null() { + let ptr = core::ptr::slice_from_raw_parts_mut(self.0.tokens, self.0.count); + let _ = unsafe { Box::from_raw(ptr) }; } } } diff --git a/rust/src/downloadprovider.rs b/rust/src/downloadprovider.rs index 97ebbdbcba..8334e0cea8 100644 --- a/rust/src/downloadprovider.rs +++ b/rust/src/downloadprovider.rs @@ -1,5 +1,5 @@ use crate::rc::{ - Array, CoreArrayProvider, CoreArrayWrapper, CoreOwnedArrayProvider, Ref, RefCountable, + Array, CoreArrayProvider, CoreArrayWrapper, CoreOwnedArrayProvider, Guard, Ref, RefCountable, }; use crate::settings::Settings; use crate::string::{BnStrCompatible, BnString}; @@ -71,11 +71,11 @@ unsafe impl CoreOwnedArrayProvider for DownloadProvider { } } -unsafe impl<'a> CoreArrayWrapper<'a> for DownloadProvider { - type Wrapped = DownloadProvider; +unsafe impl CoreArrayWrapper for DownloadProvider { + type Wrapped<'a> = Guard<'a, DownloadProvider>; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { - DownloadProvider::from_raw(*raw) + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + Guard::new(DownloadProvider::from_raw(*raw), &()) } } diff --git a/rust/src/function.rs b/rust/src/function.rs index 273a08612d..74e05ea552 100644 --- a/rust/src/function.rs +++ b/rust/src/function.rs @@ -30,7 +30,6 @@ pub use binaryninjacore_sys::BNAnalysisSkipReason as AnalysisSkipReason; pub use binaryninjacore_sys::BNFunctionAnalysisSkipOverride as FunctionAnalysisSkipOverride; pub use binaryninjacore_sys::BNFunctionUpdateType as FunctionUpdateType; - use std::hash::Hash; use std::{fmt, mem}; @@ -315,6 +314,22 @@ impl Function { } } + pub fn parameter_variables(&self) -> Conf> { + unsafe { + let mut variables = BNGetFunctionParameterVariables(self.handle); + let mut result = Vec::with_capacity(variables.count); + let confidence = variables.confidence; + let vars = std::slice::from_raw_parts(variables.vars, variables.count); + + for i in 0..variables.count { + result.push(Variable::from_raw(vars[i])); + } + + BNFreeParameterVariables(&mut variables); + Conf::new(result, confidence) + } + } + pub fn apply_imported_types(&self, sym: &Symbol, t: Option<&Type>) { unsafe { BNApplyImportedTypes( @@ -407,10 +422,10 @@ unsafe impl CoreOwnedArrayProvider for Function { } } -unsafe impl<'a> CoreArrayWrapper<'a> for Function { - type Wrapped = Guard<'a, Function>; +unsafe impl CoreArrayWrapper for Function { + type Wrapped<'a> = Guard<'a, Function>; - unsafe fn wrap_raw(raw: &'a *mut BNFunction, context: &'a ()) -> Guard<'a, Function> { + unsafe fn wrap_raw<'a>(raw: &'a *mut BNFunction, context: &'a ()) -> Self::Wrapped<'a> { Guard::new(Function { handle: *raw }, context) } } @@ -461,10 +476,10 @@ unsafe impl CoreOwnedArrayProvider for AddressRange { } } -unsafe impl<'a> CoreArrayWrapper<'a> for AddressRange { - type Wrapped = &'a AddressRange; +unsafe impl CoreArrayWrapper for AddressRange { + type Wrapped<'a> = &'a AddressRange; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { mem::transmute(raw) } } diff --git a/rust/src/hlil/function.rs b/rust/src/hlil/function.rs index 4bad7f0f0c..25608d714a 100644 --- a/rust/src/hlil/function.rs +++ b/rust/src/hlil/function.rs @@ -2,8 +2,10 @@ use std::hash::{Hash, Hasher}; use binaryninjacore_sys::BNFreeHighLevelILFunction; use binaryninjacore_sys::BNGetHighLevelILBasicBlockList; +use binaryninjacore_sys::BNGetHighLevelILIndexForInstruction; use binaryninjacore_sys::BNGetHighLevelILInstructionCount; use binaryninjacore_sys::BNGetHighLevelILOwnerFunction; +use binaryninjacore_sys::BNGetHighLevelILRootExpr; use binaryninjacore_sys::BNGetHighLevelILSSAForm; use binaryninjacore_sys::BNHighLevelILFunction; use binaryninjacore_sys::BNNewHighLevelILFunctionReference; @@ -52,6 +54,29 @@ impl HighLevelILFunction { self.instruction_from_idx(expr_idx).lift() } + pub fn instruction_from_instruction_idx(&self, instr_idx: usize) -> HighLevelILInstruction { + HighLevelILInstruction::new(self.as_non_ast(), unsafe { + BNGetHighLevelILIndexForInstruction(self.handle, instr_idx) + }) + } + + pub fn lifted_instruction_from_instruction_idx( + &self, + instr_idx: usize, + ) -> HighLevelILLiftedInstruction { + self.instruction_from_instruction_idx(instr_idx).lift() + } + + pub fn root(&self) -> HighLevelILInstruction { + HighLevelILInstruction::new(self.as_ast(), unsafe { + BNGetHighLevelILRootExpr(self.handle) + }) + } + + pub fn lifted_root(&self) -> HighLevelILLiftedInstruction { + self.root().lift() + } + pub fn instruction_count(&self) -> usize { unsafe { BNGetHighLevelILInstructionCount(self.handle) } } @@ -81,6 +106,22 @@ impl HighLevelILFunction { unsafe { Array::new(blocks, count, context) } } + + pub fn as_ast(&self) -> Ref { + Self { + handle: self.handle, + full_ast: true, + } + .to_owned() + } + + pub fn as_non_ast(&self) -> Ref { + Self { + handle: self.handle, + full_ast: false, + } + .to_owned() + } } impl ToOwned for HighLevelILFunction { diff --git a/rust/src/hlil/instruction.rs b/rust/src/hlil/instruction.rs index 9bffadaf4d..7e77e379e4 100644 --- a/rust/src/hlil/instruction.rs +++ b/rust/src/hlil/instruction.rs @@ -16,6 +16,7 @@ pub struct HighLevelILInstruction { pub function: Ref, pub address: u64, pub index: usize, + pub size: usize, pub kind: HighLevelILInstructionKind, } @@ -629,6 +630,7 @@ impl HighLevelILInstruction { function, address: op.address, index, + size: op.size, kind, } } @@ -878,6 +880,7 @@ impl HighLevelILInstruction { function: self.function.clone(), address: self.address, index: self.index, + size: self.size, kind, } } diff --git a/rust/src/hlil/lift.rs b/rust/src/hlil/lift.rs index 74ae0c64e6..731a785c10 100644 --- a/rust/src/hlil/lift.rs +++ b/rust/src/hlil/lift.rs @@ -24,6 +24,7 @@ pub struct HighLevelILLiftedInstruction { pub function: Ref, pub address: u64, pub index: usize, + pub size: usize, pub kind: HighLevelILLiftedInstructionKind, } diff --git a/rust/src/hlil/operation.rs b/rust/src/hlil/operation.rs index 965d951736..ee0d437b53 100644 --- a/rust/src/hlil/operation.rs +++ b/rust/src/hlil/operation.rs @@ -9,7 +9,7 @@ use super::HighLevelILLiftedInstruction; #[derive(Clone, Debug, PartialEq, Eq)] pub struct GotoLabel { pub(crate) function: Ref, - pub(crate) target: u64, + pub target: u64, } impl GotoLabel { diff --git a/rust/src/interaction.rs b/rust/src/interaction.rs index f5261817a6..7d10bbe4ab 100644 --- a/rust/src/interaction.rs +++ b/rust/src/interaction.rs @@ -296,7 +296,9 @@ impl FormInputBuilder { result.type_ = BNFormInputFieldType::AddressFormField; result.prompt = prompt.as_ref().as_ptr() as *const c_char; if let Some(view) = view { - result.view = view.handle; + // the view is being moved into result, there is no need to clone + // and drop is intentionally being avoided with `Ref::into_raw` + result.view = unsafe { Ref::into_raw(view) }.handle; } result.currentAddress = current_address.unwrap_or(0); result.hasDefault = default.is_some(); diff --git a/rust/src/linearview.rs b/rust/src/linearview.rs index 09968fc681..31b05ad274 100644 --- a/rust/src/linearview.rs +++ b/rust/src/linearview.rs @@ -423,10 +423,10 @@ unsafe impl CoreOwnedArrayProvider for LinearDisassemblyLine { } } -unsafe impl<'a> CoreArrayWrapper<'a> for LinearDisassemblyLine { - type Wrapped = Guard<'a, LinearDisassemblyLine>; +unsafe impl CoreArrayWrapper for LinearDisassemblyLine { + type Wrapped<'a> = Guard<'a, LinearDisassemblyLine>; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { Guard::new(LinearDisassemblyLine::from_raw(raw), _context) } } diff --git a/rust/src/llil/operation.rs b/rust/src/llil/operation.rs index 3c40f20777..3ba4fa785c 100644 --- a/rust/src/llil/operation.rs +++ b/rust/src/llil/operation.rs @@ -89,10 +89,10 @@ pub struct Syscall; pub struct Intrinsic; impl<'func, A, M, V> Operation<'func, A, M, NonSSA, Intrinsic> - where - A: 'func + Architecture, - M: FunctionMutability, - V: NonSSAVariant, +where + A: 'func + Architecture, + M: FunctionMutability, + V: NonSSAVariant, { // TODO: Support register and expression lists pub fn intrinsic(&self) -> Option { @@ -382,12 +382,20 @@ where } } + pub fn true_target_idx(&self) -> usize { + self.op.operands[1] as usize + } + pub fn false_target(&self) -> Instruction<'func, A, M, F> { Instruction { function: self.function, instr_idx: self.op.operands[2] as usize, } } + + pub fn false_target_idx(&self) -> usize { + self.op.operands[2] as usize + } } // LLIL_GOTO @@ -405,6 +413,10 @@ where instr_idx: self.op.operands[0] as usize, } } + + pub fn target_idx(&self) -> usize { + self.op.operands[0] as usize + } } // LLIL_FLAG_COND diff --git a/rust/src/metadata.rs b/rust/src/metadata.rs index e29789e52e..6f026a04cd 100644 --- a/rust/src/metadata.rs +++ b/rust/src/metadata.rs @@ -343,10 +343,10 @@ unsafe impl CoreOwnedArrayProvider for Metadata { } } -unsafe impl<'a> CoreArrayWrapper<'a> for Metadata { - type Wrapped = Guard<'a, Metadata>; +unsafe impl CoreArrayWrapper for Metadata { + type Wrapped<'a> = Guard<'a, Metadata>; - unsafe fn wrap_raw(raw: &'a *mut BNMetadata, context: &'a ()) -> Guard<'a, Metadata> { + unsafe fn wrap_raw<'a>(raw: &'a *mut BNMetadata, context: &'a ()) -> Self::Wrapped<'a> { Guard::new(Metadata::from_raw(*raw), context) } } @@ -403,12 +403,6 @@ impl From<&str> for Ref { } } -impl>> From<&T> for Ref { - fn from(value: &T) -> Self { - value.into() - } -} - impl From<&Vec> for Ref { fn from(value: &Vec) -> Self { unsafe { Metadata::ref_from_raw(BNCreateMetadataRawData(value.as_ptr(), value.len())) } @@ -441,16 +435,15 @@ impl From<&Array> for Ref { impl From>> for Ref { fn from(value: HashMap>) -> Self { - let mut key_refs: Vec = vec![]; - let mut keys: Vec<*const c_char> = vec![]; - let mut values: Vec<*mut BNMetadata> = vec![]; - for (k, v) in value.into_iter() { - key_refs.push(k.into_bytes_with_nul()); - values.push(v.as_ref().handle); - } - for k in &key_refs { - keys.push(k.as_ref().as_ptr() as *const c_char); - } + let data: Vec<(S::Result, Ref)> = value + .into_iter() + .map(|(k, v)| (k.into_bytes_with_nul(), v)) + .collect(); + let mut keys: Vec<*const c_char> = data + .iter() + .map(|(k, _)| k.as_ref().as_ptr() as *const c_char) + .collect(); + let mut values: Vec<*mut BNMetadata> = data.iter().map(|(_, v)| v.handle).collect(); unsafe { Metadata::ref_from_raw(BNCreateMetadataValueStore( @@ -462,19 +455,21 @@ impl From>> for Ref { } } -impl>> From<&[(S, T)]> for Ref { +impl From<&[(S, T)]> for Ref +where + S: BnStrCompatible + Copy, + for<'a> &'a T: Into>, +{ fn from(value: &[(S, T)]) -> Self { - let mut key_refs: Vec = vec![]; - let mut keys: Vec<*const c_char> = vec![]; - let mut values: Vec<*mut BNMetadata> = vec![]; - for (k, v) in value.iter() { - key_refs.push(k.into_bytes_with_nul()); - let value_metadata: Ref = v.into(); - values.push(value_metadata.handle); - } - for k in &key_refs { - keys.push(k.as_ref().as_ptr() as *const c_char); - } + let data: Vec<(S::Result, Ref)> = value + .into_iter() + .map(|(k, v)| (k.into_bytes_with_nul(), v.into())) + .collect(); + let mut keys: Vec<*const c_char> = data + .iter() + .map(|(k, _)| k.as_ref().as_ptr() as *const c_char) + .collect(); + let mut values: Vec<*mut BNMetadata> = data.iter().map(|(_, v)| v.handle).collect(); unsafe { Metadata::ref_from_raw(BNCreateMetadataValueStore( @@ -486,29 +481,15 @@ impl>> From<&[(S, T)]> for Ref< } } -impl>, const N: usize> From<[(S, T); N]> - for Ref +impl From<[(S, T); N]> for Ref +where + S: BnStrCompatible + Copy, + for<'a> &'a T: Into>, { fn from(value: [(S, T); N]) -> Self { - let mut key_refs: Vec = vec![]; - let mut keys: Vec<*const c_char> = vec![]; - let mut values: Vec<*mut BNMetadata> = vec![]; - for (k, v) in value.into_iter() { - key_refs.push(k.into_bytes_with_nul()); - let value_metadata: Ref = v.into(); - values.push(value_metadata.handle); - } - for k in &key_refs { - keys.push(k.as_ref().as_ptr() as *const c_char); - } - - unsafe { - Metadata::ref_from_raw(BNCreateMetadataValueStore( - keys.as_mut_ptr(), - values.as_mut_ptr(), - keys.len(), - )) - } + let slice = &value[..]; + // use the `impl From<&[(S, T)]>` + slice.into() } } diff --git a/rust/src/mlil/function.rs b/rust/src/mlil/function.rs index 16cc510209..0b662578c3 100644 --- a/rust/src/mlil/function.rs +++ b/rust/src/mlil/function.rs @@ -2,6 +2,7 @@ use core::hash::{Hash, Hasher}; use binaryninjacore_sys::BNFreeMediumLevelILFunction; use binaryninjacore_sys::BNGetMediumLevelILBasicBlockList; +use binaryninjacore_sys::BNGetMediumLevelILIndexForInstruction; use binaryninjacore_sys::BNGetMediumLevelILInstructionCount; use binaryninjacore_sys::BNGetMediumLevelILOwnerFunction; use binaryninjacore_sys::BNGetMediumLevelILSSAForm; @@ -65,6 +66,19 @@ impl MediumLevelILFunction { self.instruction_from_idx(expr_idx).lift() } + pub fn instruction_from_instruction_idx(&self, instr_idx: usize) -> MediumLevelILInstruction { + MediumLevelILInstruction::new(self.to_owned(), unsafe { + BNGetMediumLevelILIndexForInstruction(self.handle, instr_idx) + }) + } + + pub fn lifted_instruction_from_instruction_idx( + &self, + instr_idx: usize, + ) -> MediumLevelILLiftedInstruction { + self.instruction_from_instruction_idx(instr_idx).lift() + } + pub fn instruction_count(&self) -> usize { unsafe { BNGetMediumLevelILInstructionCount(self.handle) } } diff --git a/rust/src/mlil/instruction.rs b/rust/src/mlil/instruction.rs index 3b5dcfb423..bd2cc71719 100644 --- a/rust/src/mlil/instruction.rs +++ b/rust/src/mlil/instruction.rs @@ -18,6 +18,7 @@ pub struct MediumLevelILInstruction { pub function: Ref, pub address: u64, pub index: usize, + pub size: usize, pub kind: MediumLevelILInstructionKind, } @@ -704,7 +705,12 @@ impl MediumLevelILInstruction { }), // translated directly into a list for Expression or Variables // TODO MLIL_MEMORY_INTRINSIC_SSA needs to be handled properly - MLIL_CALL_OUTPUT | MLIL_CALL_PARAM | MLIL_CALL_PARAM_SSA | MLIL_CALL_OUTPUT_SSA | MLIL_MEMORY_INTRINSIC_OUTPUT_SSA | MLIL_MEMORY_INTRINSIC_SSA => { + MLIL_CALL_OUTPUT + | MLIL_CALL_PARAM + | MLIL_CALL_PARAM_SSA + | MLIL_CALL_OUTPUT_SSA + | MLIL_MEMORY_INTRINSIC_OUTPUT_SSA + | MLIL_MEMORY_INTRINSIC_SSA => { unreachable!() } }; @@ -713,6 +719,7 @@ impl MediumLevelILInstruction { function, address: op.address, index, + size: op.size, kind, } } @@ -1022,6 +1029,7 @@ impl MediumLevelILInstruction { function: self.function.clone(), address: self.address, index: self.index, + size: self.size, kind, } } diff --git a/rust/src/mlil/lift.rs b/rust/src/mlil/lift.rs index 39e8e98302..e8548b064e 100644 --- a/rust/src/mlil/lift.rs +++ b/rust/src/mlil/lift.rs @@ -27,6 +27,7 @@ pub struct MediumLevelILLiftedInstruction { pub function: Ref, pub address: u64, pub index: usize, + pub size: usize, pub kind: MediumLevelILLiftedInstructionKind, } diff --git a/rust/src/platform.rs b/rust/src/platform.rs index 3df5e7c47f..42e2f80cc1 100644 --- a/rust/src/platform.rs +++ b/rust/src/platform.rs @@ -373,10 +373,10 @@ unsafe impl CoreOwnedArrayProvider for Platform { } } -unsafe impl<'a> CoreArrayWrapper<'a> for Platform { - type Wrapped = Guard<'a, Platform>; +unsafe impl CoreArrayWrapper for Platform { + type Wrapped<'a> = Guard<'a, Platform>; - unsafe fn wrap_raw(raw: &'a *mut BNPlatform, context: &'a ()) -> Guard<'a, Platform> { + unsafe fn wrap_raw<'a>(raw: &'a *mut BNPlatform, context: &'a ()) -> Self::Wrapped<'a> { debug_assert!(!raw.is_null()); Guard::new(Platform { handle: *raw }, context) } diff --git a/rust/src/rc.rs b/rust/src/rc.rs index cdcae1792a..eaf8e5d26e 100644 --- a/rust/src/rc.rs +++ b/rust/src/rc.rs @@ -196,14 +196,12 @@ pub unsafe trait CoreOwnedArrayProvider: CoreArrayProvider { unsafe fn free(raw: *mut Self::Raw, count: usize, context: &Self::Context); } -pub unsafe trait CoreArrayWrapper<'a>: CoreArrayProvider -where - Self::Raw: 'a, - Self::Context: 'a, -{ - type Wrapped: 'a; +pub unsafe trait CoreArrayWrapper: CoreArrayProvider { + type Wrapped<'a> + where + Self: 'a; - unsafe fn wrap_raw(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped; + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a>; } pub struct Array { @@ -250,16 +248,16 @@ impl Array

{ } } -impl<'a, P: 'a + CoreArrayWrapper<'a> + CoreOwnedArrayProvider> Array

{ +impl Array

{ #[inline] - pub fn get(&'a self, index: usize) -> P::Wrapped { + pub fn get(&self, index: usize) -> P::Wrapped<'_> { unsafe { let backing = slice::from_raw_parts(self.contents, self.count); P::wrap_raw(&backing[index], &self.context) } } - pub fn iter(&'a self) -> ArrayIter<'a, P> { + pub fn iter(&self) -> ArrayIter

{ ArrayIter { it: unsafe { slice::from_raw_parts(self.contents, self.count).iter() }, context: &self.context, @@ -267,8 +265,8 @@ impl<'a, P: 'a + CoreArrayWrapper<'a> + CoreOwnedArrayProvider> Array

{ } } -impl<'a, P: 'a + CoreArrayWrapper<'a> + CoreOwnedArrayProvider> IntoIterator for &'a Array

{ - type Item = P::Wrapped; +impl<'a, P: CoreArrayWrapper + CoreOwnedArrayProvider> IntoIterator for &'a Array

{ + type Item = P::Wrapped<'a>; type IntoIter = ArrayIter<'a, P>; fn into_iter(self) -> Self::IntoIter { @@ -323,16 +321,16 @@ impl ArrayGuard

{ } } -impl<'a, P: 'a + CoreArrayWrapper<'a> + CoreArrayProvider> ArrayGuard

{ +impl ArrayGuard

{ #[inline] - pub fn get(&'a self, index: usize) -> P::Wrapped { + pub fn get(&self, index: usize) -> P::Wrapped<'_> { unsafe { let backing = slice::from_raw_parts(self.contents, self.count); P::wrap_raw(&backing[index], &self.context) } } - pub fn iter(&'a self) -> ArrayIter<'a, P> { + pub fn iter(&self) -> ArrayIter

{ ArrayIter { it: unsafe { slice::from_raw_parts(self.contents, self.count).iter() }, context: &self.context, @@ -340,8 +338,8 @@ impl<'a, P: 'a + CoreArrayWrapper<'a> + CoreArrayProvider> ArrayGuard

{ } } -impl<'a, P: 'a + CoreArrayWrapper<'a> + CoreArrayProvider> IntoIterator for &'a ArrayGuard

{ - type Item = P::Wrapped; +impl<'a, P: CoreArrayWrapper + CoreArrayProvider> IntoIterator for &'a ArrayGuard

{ + type Item = P::Wrapped<'a>; type IntoIter = ArrayIter<'a, P>; fn into_iter(self) -> Self::IntoIter { @@ -351,27 +349,27 @@ impl<'a, P: 'a + CoreArrayWrapper<'a> + CoreArrayProvider> IntoIterator for &'a pub struct ArrayIter<'a, P> where - P: 'a + CoreArrayWrapper<'a>, + P: CoreArrayWrapper, { it: slice::Iter<'a, P::Raw>, context: &'a P::Context, } -unsafe impl<'a, P> Send for ArrayIter<'a, P> +unsafe impl

Send for ArrayIter<'_, P> where - P: CoreArrayWrapper<'a>, + P: CoreArrayWrapper, P::Context: Sync, { } impl<'a, P> Iterator for ArrayIter<'a, P> where - P: 'a + CoreArrayWrapper<'a>, + P: 'a + CoreArrayWrapper, { - type Item = P::Wrapped; + type Item = P::Wrapped<'a>; #[inline] - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { self.it .next() .map(|r| unsafe { P::wrap_raw(r, self.context) }) @@ -385,7 +383,7 @@ where impl<'a, P> ExactSizeIterator for ArrayIter<'a, P> where - P: 'a + CoreArrayWrapper<'a>, + P: 'a + CoreArrayWrapper, { #[inline] fn len(&self) -> usize { @@ -395,10 +393,10 @@ where impl<'a, P> DoubleEndedIterator for ArrayIter<'a, P> where - P: 'a + CoreArrayWrapper<'a>, + P: 'a + CoreArrayWrapper, { #[inline] - fn next_back(&mut self) -> Option { + fn next_back(&mut self) -> Option> { self.it .next_back() .map(|r| unsafe { P::wrap_raw(r, self.context) }) @@ -412,20 +410,20 @@ use rayon::prelude::*; use rayon::iter::plumbing::*; #[cfg(feature = "rayon")] -impl<'a, P> Array

+impl

Array

where - P: 'a + CoreArrayWrapper<'a> + CoreOwnedArrayProvider, + P: CoreArrayWrapper + CoreOwnedArrayProvider, P::Context: Sync, - P::Wrapped: Send, + for<'a> P::Wrapped<'a>: Send, { - pub fn par_iter(&'a self) -> ParArrayIter<'a, P> { + pub fn par_iter(&self) -> ParArrayIter<'_, P> { ParArrayIter { it: self.iter() } } } #[cfg(feature = "rayon")] pub struct ParArrayIter<'a, P> where - P: 'a + CoreArrayWrapper<'a>, + P: 'a + CoreArrayWrapper, ArrayIter<'a, P>: Send, { it: ArrayIter<'a, P>, @@ -434,11 +432,11 @@ where #[cfg(feature = "rayon")] impl<'a, P> ParallelIterator for ParArrayIter<'a, P> where - P: 'a + CoreArrayWrapper<'a>, - P::Wrapped: Send, + P: 'a + CoreArrayWrapper, + P::Wrapped<'a>: Send, ArrayIter<'a, P>: Send, { - type Item = P::Wrapped; + type Item = P::Wrapped<'a>; fn drive_unindexed(self, consumer: C) -> C::Result where @@ -455,8 +453,8 @@ where #[cfg(feature = "rayon")] impl<'a, P> IndexedParallelIterator for ParArrayIter<'a, P> where - P: 'a + CoreArrayWrapper<'a>, - P::Wrapped: Send, + P: 'a + CoreArrayWrapper, + P::Wrapped<'a>: Send, ArrayIter<'a, P>: Send, { fn drive(self, consumer: C) -> C::Result @@ -481,7 +479,7 @@ where #[cfg(feature = "rayon")] struct ArrayIterProducer<'a, P> where - P: 'a + CoreArrayWrapper<'a>, + P: 'a + CoreArrayWrapper, ArrayIter<'a, P>: Send, { it: ArrayIter<'a, P>, @@ -490,10 +488,10 @@ where #[cfg(feature = "rayon")] impl<'a, P> Producer for ArrayIterProducer<'a, P> where - P: 'a + CoreArrayWrapper<'a>, + P: 'a + CoreArrayWrapper, ArrayIter<'a, P>: Send, { - type Item = P::Wrapped; + type Item = P::Wrapped<'a>; type IntoIter = ArrayIter<'a, P>; fn into_iter(self) -> ArrayIter<'a, P> { diff --git a/rust/src/references.rs b/rust/src/references.rs index 76ac449342..8b4ebb874e 100644 --- a/rust/src/references.rs +++ b/rust/src/references.rs @@ -1,6 +1,6 @@ use crate::architecture::CoreArchitecture; use crate::function::Function; -use crate::rc::{CoreArrayProvider, CoreArrayWrapper, CoreOwnedArrayProvider, Ref}; +use crate::rc::{CoreArrayProvider, CoreArrayWrapper, CoreOwnedArrayProvider, Guard, Ref}; use binaryninjacore_sys::{BNFreeCodeReferences, BNFreeDataReferences, BNReferenceSource}; use std::mem::ManuallyDrop; @@ -64,11 +64,11 @@ unsafe impl CoreOwnedArrayProvider for CodeReference { } } -unsafe impl<'a> CoreArrayWrapper<'a> for CodeReference { - type Wrapped = CodeReference; +unsafe impl CoreArrayWrapper for CodeReference { + type Wrapped<'a> = Guard<'a, CodeReference>; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { - CodeReference::new(raw) + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + Guard::new(CodeReference::new(raw), &()) } } @@ -85,10 +85,10 @@ unsafe impl CoreOwnedArrayProvider for DataReference { } } -unsafe impl<'a> CoreArrayWrapper<'a> for DataReference { - type Wrapped = DataReference; +unsafe impl CoreArrayWrapper for DataReference { + type Wrapped<'a> = DataReference; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { DataReference { address: *raw } } } diff --git a/rust/src/relocation.rs b/rust/src/relocation.rs index f9cbb3c527..17fd595823 100644 --- a/rust/src/relocation.rs +++ b/rust/src/relocation.rs @@ -1,3 +1,4 @@ +use crate::rc::Guard; use crate::string::BnStrCompatible; use crate::{ architecture::{Architecture, CoreArchitecture}, @@ -8,6 +9,7 @@ use crate::{ }; use binaryninjacore_sys::*; use std::borrow::Borrow; +use std::mem::MaybeUninit; use std::os::raw::c_void; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] @@ -227,10 +229,10 @@ unsafe impl CoreOwnedArrayProvider for Relocation { } } -unsafe impl<'a> CoreArrayWrapper<'a> for Relocation { - type Wrapped = Relocation; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { - Relocation(*raw) +unsafe impl CoreArrayWrapper for Relocation { + type Wrapped<'a> = Guard<'a, Relocation>; + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + Guard::new(Relocation(*raw), &()) } } @@ -501,12 +503,9 @@ where let name = name.into_bytes_with_nul(); - let uninit_handler = RelocationHandlerBuilder { - handler: unsafe { std::mem::zeroed() }, - }; - let raw = Box::into_raw(Box::new(uninit_handler)); + let raw = Box::leak(Box::new(MaybeUninit::>::zeroed())); let mut custom_handler = BNCustomRelocationHandler { - context: raw as *mut _, + context: raw.as_mut_ptr() as *mut _, freeObject: Some(cb_free::), getRelocationInfo: Some(cb_get_relocation_info::), applyRelocation: Some(cb_apply_relocation::), @@ -517,13 +516,12 @@ where assert!(!handle_raw.is_null()); let handle = CoreRelocationHandler(handle_raw); let custom_handle = CustomRelocationHandlerHandle { - handle: raw as *mut R, + handle: raw.as_mut_ptr() as *mut R, }; unsafe { - core::ptr::write( - &mut raw.as_mut().unwrap().handler, - func(custom_handle, CoreRelocationHandler(handle.0)), - ); + raw.write(RelocationHandlerBuilder { + handler: func(custom_handle, CoreRelocationHandler(handle.0)), + }); BNArchitectureRegisterRelocationHandler( arch.handle().as_ref().0, diff --git a/rust/src/section.rs b/rust/src/section.rs index 3e6a4c75e2..580d5a2140 100644 --- a/rust/src/section.rs +++ b/rust/src/section.rs @@ -182,10 +182,10 @@ unsafe impl CoreOwnedArrayProvider for Section { } } -unsafe impl<'a> CoreArrayWrapper<'a> for Section { - type Wrapped = Guard<'a, Section>; +unsafe impl CoreArrayWrapper for Section { + type Wrapped<'a> = Guard<'a, Section>; - unsafe fn wrap_raw(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a> { Guard::new(Section::from_raw(*raw), context) } } diff --git a/rust/src/segment.rs b/rust/src/segment.rs index 86ca3bc595..6ba2fbadf9 100644 --- a/rust/src/segment.rs +++ b/rust/src/segment.rs @@ -212,10 +212,10 @@ unsafe impl CoreOwnedArrayProvider for Segment { } } -unsafe impl<'a> CoreArrayWrapper<'a> for Segment { - type Wrapped = Guard<'a, Segment>; +unsafe impl CoreArrayWrapper for Segment { + type Wrapped<'a> = Guard<'a, Segment>; - unsafe fn wrap_raw(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a> { Guard::new(Segment::from_raw(*raw), context) } } diff --git a/rust/src/string.rs b/rust/src/string.rs index 1011ca498c..75942da919 100644 --- a/rust/src/string.rs +++ b/rust/src/string.rs @@ -169,10 +169,10 @@ unsafe impl CoreOwnedArrayProvider for BnString { } } -unsafe impl<'a> CoreArrayWrapper<'a> for BnString { - type Wrapped = &'a str; +unsafe impl CoreArrayWrapper for BnString { + type Wrapped<'a> = &'a str; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { CStr::from_ptr(*raw).to_str().unwrap() } } diff --git a/rust/src/symbol.rs b/rust/src/symbol.rs index c4056bb7fc..edb93b47d0 100644 --- a/rust/src/symbol.rs +++ b/rust/src/symbol.rs @@ -337,10 +337,10 @@ unsafe impl CoreOwnedArrayProvider for Symbol { } } -unsafe impl<'a> CoreArrayWrapper<'a> for Symbol { - type Wrapped = Guard<'a, Symbol>; +unsafe impl CoreArrayWrapper for Symbol { + type Wrapped<'a> = Guard<'a, Symbol>; - unsafe fn wrap_raw(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, context: &'a Self::Context) -> Self::Wrapped<'a> { Guard::new(Symbol::from_raw(*raw), context) } } diff --git a/rust/src/types.rs b/rust/src/types.rs index 991dc3a679..5b4ee7c34d 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -56,6 +56,7 @@ pub type MemberScope = BNMemberScope; //////////////// // Confidence +/// Compatible with the `BNType*WithConfidence` types pub struct Conf { pub contents: T, pub confidence: u8, @@ -260,15 +261,6 @@ impl From for Conf { } } -impl From>> for BNTypeWithConfidence { - fn from(conf: Conf>) -> Self { - Self { - type_: conf.contents.handle, - confidence: conf.confidence, - } - } -} - impl From> for BNTypeWithConfidence { fn from(conf: Conf<&Type>) -> Self { Self { @@ -422,7 +414,7 @@ impl TypeBuilder { pub fn parameters(&self) -> Result> { unsafe { - let mut count: usize = mem::zeroed(); + let mut count = 0; let parameters_raw = BNGetTypeBuilderParameters(self.handle, &mut count); if parameters_raw.is_null() { Err(()) @@ -698,6 +690,7 @@ impl Drop for TypeBuilder { ////////// // Type +#[repr(transparent)] pub struct Type { pub(crate) handle: *mut BNType, } @@ -794,7 +787,7 @@ impl Type { pub fn parameters(&self) -> Result> { unsafe { - let mut count: usize = mem::zeroed(); + let mut count = 0; let parameters_raw: *mut BNFunctionParameter = BNGetTypeParameters(self.handle, &mut count); if parameters_raw.is_null() { @@ -1235,7 +1228,7 @@ impl fmt::Debug for Type { BNGetTypeLines( self.handle, container, - "".as_ptr() as *const c_char, + "\x00".as_ptr() as *const c_char, 64, false, BNTokenEscapingType::NoTokenEscapingType, @@ -1454,10 +1447,10 @@ unsafe impl CoreOwnedArrayProvider for NamedTypedVariable { } } -unsafe impl<'a> CoreArrayWrapper<'a> for NamedTypedVariable { - type Wrapped = ManuallyDrop; +unsafe impl CoreArrayWrapper for NamedTypedVariable { + type Wrapped<'a> = ManuallyDrop; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { ManuallyDrop::new(NamedTypedVariable { var: raw.var, ty: raw.type_, @@ -1550,7 +1543,7 @@ impl EnumerationBuilder { pub fn members(&self) -> Vec { unsafe { - let mut count: usize = mem::zeroed(); + let mut count = 0; let members_raw = BNGetEnumerationBuilderMembers(self.handle, &mut count); let members: &[BNEnumerationMember] = slice::from_raw_parts(members_raw, count); @@ -1607,7 +1600,7 @@ impl Enumeration { pub fn members(&self) -> Vec { unsafe { - let mut count: usize = mem::zeroed(); + let mut count = 0; let members_raw = BNGetEnumerationMembers(self.handle, &mut count); let members: &[BNEnumerationMember] = slice::from_raw_parts(members_raw, count); @@ -1939,7 +1932,7 @@ impl Structure { pub fn members(&self) -> Result> { unsafe { - let mut count: usize = mem::zeroed(); + let mut count = 0; let members_raw: *mut BNStructureMember = BNGetStructureMembers(self.handle, &mut count); if members_raw.is_null() { @@ -2350,10 +2343,10 @@ unsafe impl CoreOwnedArrayProvider for QualifiedName { } } -unsafe impl<'a> CoreArrayWrapper<'a> for QualifiedName { - type Wrapped = &'a QualifiedName; +unsafe impl CoreArrayWrapper for QualifiedName { + type Wrapped<'a> = &'a QualifiedName; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { mem::transmute(raw) } } @@ -2392,10 +2385,10 @@ unsafe impl CoreOwnedArrayProvider for QualifiedNameAndType { } } -unsafe impl<'a> CoreArrayWrapper<'a> for QualifiedNameAndType { - type Wrapped = &'a QualifiedNameAndType; +unsafe impl CoreArrayWrapper for QualifiedNameAndType { + type Wrapped<'a> = &'a QualifiedNameAndType; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { mem::transmute(raw) } } @@ -2438,10 +2431,10 @@ unsafe impl CoreOwnedArrayProvider for QualifiedNameTypeAndId { } } -unsafe impl<'a> CoreArrayWrapper<'a> for QualifiedNameTypeAndId { - type Wrapped = &'a QualifiedNameTypeAndId; +unsafe impl CoreArrayWrapper for QualifiedNameTypeAndId { + type Wrapped<'a> = &'a QualifiedNameTypeAndId; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { mem::transmute(raw) } } @@ -2449,90 +2442,141 @@ unsafe impl<'a> CoreArrayWrapper<'a> for QualifiedNameTypeAndId { ////////////////////////// // NameAndType -pub struct NameAndType { - pub name: S, - pub t: Conf>, -} +pub struct NameAndType(pub(crate) BNNameAndType); -impl NameAndType { - pub(crate) fn from_raw(raw: &BNNameAndType) -> Self { - Self::new( - raw_to_string(raw.name).unwrap(), - unsafe { &Type::ref_from_raw(raw.type_) }, - raw.typeConfidence, - ) +impl NameAndType { + pub(crate) unsafe fn from_raw(raw: &BNNameAndType) -> Self { + Self ( *raw ) } } -impl NameAndType { - pub fn new(name: S, t: &Ref, confidence: u8) -> Self { - Self { - name, - t: Conf::new(t.clone(), confidence), +impl NameAndType { + pub fn new(name: S, t: &Type, confidence: u8) -> Ref { + unsafe { + Ref::new(Self(BNNameAndType { + name: BNAllocString(name.into_bytes_with_nul().as_ref().as_ptr() as *mut _), + type_: Ref::into_raw(t.to_owned()).handle, + typeConfidence: confidence, + })) } } - pub(crate) fn into_raw(self) -> BNNameAndType { - let t = self.t.clone(); - let res = BNNameAndType { - name: BnString::new(self.name).into_raw(), - type_: t.contents.handle, - typeConfidence: self.t.confidence, - }; - mem::forget(t); - res + pub fn name(&self) -> &str { + let c_str = unsafe { CStr::from_ptr(self.0.name) }; + c_str.to_str().unwrap() } - pub fn type_with_confidence(&self) -> Conf> { - self.t.clone() + pub fn t(&self) -> &Type { + unsafe { mem::transmute::<_, &Type>(&self.0.type_) } + } + + pub fn type_with_confidence(&self) -> Conf<&Type> { + Conf::new(self.t(), self.0.typeConfidence) } } -impl CoreArrayProvider for NameAndType { +impl ToOwned for NameAndType { + type Owned = Ref; + + fn to_owned(&self) -> Self::Owned { + unsafe { RefCountable::inc_ref(self) } + } +} + +unsafe impl RefCountable for NameAndType { + unsafe fn inc_ref(handle: &Self) -> Ref { + Self::new( + CStr::from_ptr(handle.0.name), + handle.t(), + handle.0.typeConfidence, + ) + } + + unsafe fn dec_ref(handle: &Self) { + unsafe { + BNFreeString(handle.0.name); + RefCountable::dec_ref(handle.t()); + } + } +} + +impl CoreArrayProvider for NameAndType { type Raw = BNNameAndType; type Context = (); } -unsafe impl CoreOwnedArrayProvider for NameAndType { +unsafe impl CoreOwnedArrayProvider for NameAndType { unsafe fn free(raw: *mut Self::Raw, count: usize, _context: &Self::Context) { BNFreeNameAndTypeList(raw, count); } } -unsafe impl<'a, S: 'a + BnStrCompatible> CoreArrayWrapper<'a> for NameAndType { - type Wrapped = &'a NameAndType; +unsafe impl CoreArrayWrapper for NameAndType { + type Wrapped<'a> = Guard<'a, NameAndType>; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { - mem::transmute(raw) + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + unsafe { Guard::new(NameAndType::from_raw(raw), raw) } } } ////////////////// // DataVariable -pub struct DataVariable { - pub address: u64, - pub t: Conf>, - pub auto_discovered: bool, -} +#[repr(transparent)] +pub struct DataVariable(pub(crate) BNDataVariable); // impl DataVariable { // pub(crate) fn from_raw(var: &BNDataVariable) -> Self { -// Self { -// address: var.address, -// t: Conf::new(unsafe { Type::ref_from_raw(var.type_) }, var.typeConfidence), -// auto_discovered: var.autoDiscovered, -// } +// let var = DataVariable(*var); +// Self(BNDataVariable { +// type_: unsafe { Ref::into_raw(var.t().to_owned()).handle }, +// ..var.0 +// }) // } // } impl DataVariable { - pub fn type_with_confidence(&self) -> Conf> { - Conf::new(self.t.contents.clone(), self.t.confidence) + pub fn address(&self) -> u64 { + self.0.address + } + + pub fn auto_discovered(&self) -> bool { + self.0.autoDiscovered + } + + pub fn t(&self) -> &Type { + unsafe { mem::transmute(&self.0.type_) } + } + + pub fn type_with_confidence(&self) -> Conf<&Type> { + Conf::new(self.t(), self.0.typeConfidence) } pub fn symbol(&self, bv: &BinaryView) -> Option> { - bv.symbol_by_address(self.address).ok() + bv.symbol_by_address(self.0.address).ok() + } +} + +impl ToOwned for DataVariable { + type Owned = Ref; + + fn to_owned(&self) -> Self::Owned { + unsafe { RefCountable::inc_ref(self) } + } +} + +unsafe impl RefCountable for DataVariable { + unsafe fn inc_ref(handle: &Self) -> Ref { + unsafe { + Ref::new(Self(BNDataVariable { + type_: Ref::into_raw(handle.t().to_owned()).handle, + ..handle.0 + })) + } + } + + unsafe fn dec_ref(handle: &Self) { + unsafe { BNFreeType(handle.0.type_) } } } @@ -2546,10 +2590,10 @@ unsafe impl CoreOwnedArrayProvider for DataVariable { } } -unsafe impl<'a> CoreArrayWrapper<'a> for DataVariable { - type Wrapped = &'a DataVariable; +unsafe impl CoreArrayWrapper for DataVariable { + type Wrapped<'a> = &'a DataVariable; - unsafe fn wrap_raw(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped { + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { mem::transmute(raw) } } diff --git a/ui/commands.h b/ui/commands.h index 441ca0ede3..b0798a9b6e 100644 --- a/ui/commands.h +++ b/ui/commands.h @@ -50,8 +50,8 @@ StructureRef BINARYNINJAUIAPI getInnerMostStructureContainingOffset(BinaryViewRe uint64_t BINARYNINJAUIAPI getInnerMostStructureOffset( BinaryViewRef data, StructureRef structure, const std::vector& nameList, size_t nameIndex); -// Auto generate a structure name -std::string BINARYNINJAUIAPI createStructureName(BinaryNinja::TypeContainer types); +// Auto generate a usable type name with the given prefix +std::string BINARYNINJAUIAPI createStructureName(BinaryNinja::TypeContainer types, const std::string& prefix = "struct_"); std::optional BINARYNINJAUIAPI getSplitVariableForAssignment( FunctionRef func, BNFunctionGraphType ilType, uint64_t location, const BinaryNinja::Variable& var); diff --git a/ui/createarraydialog.h b/ui/createarraydialog.h index 1f1114d7bc..61c896247c 100644 --- a/ui/createarraydialog.h +++ b/ui/createarraydialog.h @@ -12,8 +12,9 @@ class CreateArrayDialog : public QDialog BinaryViewRef m_data; uint64_t m_startAddress; + size_t m_size; TypeRef m_elementType; - uint64_t m_elementCount; + uint64_t m_elementCount{}; QLineEdit* m_startField; QComboBox* m_typeDropdown; @@ -22,15 +23,16 @@ class CreateArrayDialog : public QDialog QPushButton* m_cancelButton; QPushButton* m_createButton; - void validate(); + void update(); + size_t guessElementCount(size_t elementWidth); public: - explicit CreateArrayDialog(BinaryViewRef data, QWidget* parent = nullptr); + explicit CreateArrayDialog(BinaryViewRef data, BNAddressRange selection, QWidget* parent = nullptr); /// Set the initial start address, element type, and element count for /// the dialog. The element type may be null if no default is desired; a /// default will be chosen by the dialog. - void setInitialState(uint64_t start, const TypeRef& elementType, uint64_t count); + void setInitialState(); /// Get the desired start address from the accepted dialog. [[nodiscard]] uint64_t startAddress() const; diff --git a/ui/possiblevaluesetdialog.h b/ui/possiblevaluesetdialog.h index 79d56abdb4..8f37f5bb4e 100644 --- a/ui/possiblevaluesetdialog.h +++ b/ui/possiblevaluesetdialog.h @@ -23,10 +23,8 @@ class BINARYNINJAUIAPI PossibleValueSetDialog : public QDialog QLineEdit* m_input; QPushButton* m_acceptButton; QLabel* m_formatLabel; + QLabel* m_errorLabel; - std::map m_typeText; - std::map m_formatText; - std::map m_regValueTypes; BNRegisterValueType m_curRegValueType; BinaryNinja::PossibleValueSet m_valueSet; diff --git a/ui/util.h b/ui/util.h index a733acea3c..9713713a35 100644 --- a/ui/util.h +++ b/ui/util.h @@ -19,7 +19,7 @@ std::string BINARYNINJAUIAPI getStringForRegisterValue(ArchitectureRef arch, Bin std::string BINARYNINJAUIAPI getPossibleValueSetStateName(BNRegisterValueType state); std::string BINARYNINJAUIAPI getStringForIntegerValue(int64_t value); std::string BINARYNINJAUIAPI getStringForUIntegerValue(uint64_t value); -std::string BINARYNINJAUIAPI getStringForPossibleValueSet(ArchitectureRef arch, const BinaryNinja::PossibleValueSet& values); +std::string BINARYNINJAUIAPI getStringForPossibleValueSet(ArchitectureRef arch, const BinaryNinja::PossibleValueSet& values, bool pretty = true); std::string BINARYNINJAUIAPI getStringForInstructionDataflowDetails(BinaryViewRef data, ArchitectureRef arch, FunctionRef func, uint64_t address); std::optional BINARYNINJAUIAPI getPossibleValueSetForToken(View* view, BinaryViewRef data, ArchitectureRef arch, FunctionRef func, HighlightTokenState token, size_t instrIdx); diff --git a/view/elf/elfview.cpp b/view/elf/elfview.cpp index 84ca68f541..5d1b5cf29e 100644 --- a/view/elf/elfview.cpp +++ b/view/elf/elfview.cpp @@ -1283,6 +1283,9 @@ bool ElfView::Init() // handle long form symbols if (auto pos = entryName.find(".", 2); (pos != std::string::npos)) { + // These mapping symbols do not define actual names + if (entryName[0] == '$' && (entryName[1] == 'x' || entryName[1] == 'a' || entryName[1] == 'd')) + continue; entryName = entryName.substr(pos + 1); if (entryName.size()) DefineElfSymbol(isMappingFunctionSymbol ? FunctionSymbol : DataSymbol, entryName, entry->value, false, entry->binding, entry->size); @@ -2167,8 +2170,7 @@ bool ElfView::Init() DefineAutoSymbol(new Symbol(DataSymbol, "__elf_dynamic_table", adjustedVirtualAddr, NoBinding)); } - // Add types for the dynamic symbol table - if (m_auxSymbolTable.size) + if (m_auxSymbolTable.size || m_symbolTableSection.offset) { StructureBuilder symTableBuilder; if (m_elf32) @@ -2193,9 +2195,25 @@ bool ElfView::Init() Ref symTableType = Type::StructureType(symTableStruct); QualifiedName symTableName = m_elf32 ? string("Elf32_Sym") : string("Elf64_Sym"); const string symTableTypeId = Type::GenerateAutoTypeId("elf", symTableName); - QualifiedName symTableTypeName = DefineType(symTableTypeId, symTableName, symTableType); - DefineDataVariable(m_auxSymbolTable.offset, Type::ArrayType(Type::NamedType(this, symTableTypeName), m_auxSymbolTable.size / m_auxSymbolTableEntrySize)); - DefineAutoSymbol(new Symbol(DataSymbol, "__elf_symbol_table", m_auxSymbolTable.offset, NoBinding)); + + // Add types for the dynamic symbol table + if (m_auxSymbolTable.size) + { + auto defineAuxSymTableForView = [&](Ref view) { + QualifiedName symTableTypeName = view->DefineType(symTableTypeId, symTableName, symTableType); + view->DefineDataVariable(m_auxSymbolTable.offset, Type::ArrayType(Type::NamedType(this, symTableTypeName), m_auxSymbolTable.size / m_auxSymbolTableEntrySize)); + view->DefineAutoSymbol(new Symbol(DataSymbol, "__elf_symbol_table", m_auxSymbolTable.offset, NoBinding)); + }; + defineAuxSymTableForView(this); + defineAuxSymTableForView(GetParentView()); + } + + if (m_symbolTableSection.offset) + { + QualifiedName symTableTypeName = GetParentView()->DefineType(symTableTypeId, symTableName, symTableType); + GetParentView()->DefineDataVariable(m_symbolTableSection.offset, Type::ArrayType(Type::NamedType(this, symTableTypeName), m_symbolTableSection.size / m_auxSymbolTableEntrySize)); + GetParentView()->DefineAutoSymbol(new Symbol(DataSymbol, "__elf_symbol_table", m_symbolTableSection.offset, NoBinding)); + } } // In 32-bit mips with .got, add .extern symbol "RTL_Resolve"