diff --git a/CMakeLists.txt b/CMakeLists.txt index f6fa253e..6e8ec289 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,7 @@ set(Protobuf_USE_STATIC_LIBS TRUE) find_package(Boost 1.55 REQUIRED) find_package(Git) find_package(IdaSdk REQUIRED) +find_package(BinaryNinjaApi) find_package(OpenSSL 1.0.2 REQUIRED) if(BINEXPORT_ENABLE_POSTGRESQL) find_package(PostgreSQL 9.5 REQUIRED) @@ -343,6 +344,11 @@ ida_install(TARGETS ${binexport_plugin_name} RUNTIME DESTINATION binexport-prefix LIBRARY DESTINATION binexport-prefix) +# Binary Ninja plugin (experimental) +if(BinaryNinjaApi_FOUND) + add_subdirectory(binaryninja) +endif() + # BinExport reader library add_library(binexport_reader STATIC reader/call_graph.cc diff --git a/FindBinaryNinjaApi.cmake b/FindBinaryNinjaApi.cmake new file mode 100644 index 00000000..54c8a0b7 --- /dev/null +++ b/FindBinaryNinjaApi.cmake @@ -0,0 +1,101 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# FindBinaryNinjaApi +# ------------------ +# +# Locates and configures the Binary Ninja API. Needs version 1.2.1921 or +# later. +# +# Use this module by invoking find_package with the form: +# +# find_package(BinaryNinjaApi +# [REQUIRED] # Stop the build if Binary Ninja API is not found +# ) +# +# Defines the following variables: +# +# BinaryNinjaApi_INCLUDE_DIRS - Include directories for the Binary Ninja API. +# BinaryNinjaApi_LIBRARIES - Library files to link against +# +# This module reads hints about search locations from variables: +# +# BinaryNinjaApi_ROOT_DIR - Preferred installation prefix for the API +# ENV{BINJA_API_DIR} - Like BinaryNinjaApi_ROOT_DIR +# BinaryNinja_DIR - Installation prefix containing the Binary Ninja +# core library +# ENV{BINJA_DIR} - Like BinaryNinja_DIR +# +# Example: +# +# find_package(BinaryNinjaApi REQUIRED) +# +# # Builds targets plugin.dll +# add_library(exmaple_plugin myplugin.cc) +# target_link_libraries(exmaple_plugin PRIVATE BinaryNinja::API) + +include(CMakeParseArguments) +include(FindPackageHandleStandardArgs) + +find_path(BinaryNinjaApi_DIR + NAMES binaryninjaapi.h + HINTS "${BinaryNinjaApi_ROOT_DIR}" ENV BINJA_API_DIR + PATHS "${CMAKE_CURRENT_LIST_DIR}/third_party/binaryninja-api" + "$ENV{HOME}/binaryninja-api" + DOC "Location of the Binary Ninja API" + NO_DEFAULT_PATH +) +set(BinaryNinjaApi_INCLUDE_DIRS "${BinaryNinjaApi_DIR}") + +find_library(BinaryNinjaApi_LIBRARY + NAMES binaryninjaapi + PATHS "${BinaryNinjaApi_DIR}/bin" + NO_DEFAULT_PATH +) + +set(_binaryninjaapi_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) +set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX}) +find_library(BinaryNinjaCore_LIBRARY + NAMES binaryninjacore + NAMES_PER_DIR + HINTS "${BinaryNinja_DIR}" ENV BINJA_DIR + "$ENV{ProgramFiles}/Vector35/BinaryNinja" # Windows + "$ENV{HOME}/binaryninja" + PATHS "/Applications/Binary Ninja.app/Contents/MacOS" + "/opt/binaryninja" + DOC "Location of Binary Ninja" + NO_DEFAULT_PATH +) +set(CMAKE_FIND_LIBRARY_SUFFIXES ${_binaryninjaapi_FIND_LIBRARY_SUFFIXES}) + +if(BinaryNinjaCore_LIBRARY AND BinaryNinjaApi_LIBRARY) + list(APPEND BinaryNinjaApi_LIBRARIES + "${BinaryNinjaApi_LIBRARY}" + "${BinaryNinjaCore_LIBRARY}" + ) + add_library(binaryninjaapi INTERFACE IMPORTED GLOBAL) + add_library(BinaryNinja::API ALIAS binaryninjaapi) + target_include_directories(binaryninjaapi INTERFACE + ${BinaryNinjaApi_INCLUDE_DIRS}) + target_link_libraries(binaryninjaapi INTERFACE + ${BinaryNinjaApi_LIBRARIES}) +endif() + +find_package_handle_standard_args(BinaryNinjaApi + FOUND_VAR BinaryNinjaApi_FOUND + REQUIRED_VARS BinaryNinjaApi_DIR + BinaryNinjaApi_INCLUDE_DIRS + BinaryNinjaApi_LIBRARIES + FAIL_MESSAGE "Binary Ninja API not found, try setting BinaryNinjaApi_ROOT_DIR and/or BinaryNinja_DIR. Note that the API itself needs to be built first." +) diff --git a/FindIdaSdk.cmake b/FindIdaSdk.cmake index 44c8938f..79a686ff 100644 --- a/FindIdaSdk.cmake +++ b/FindIdaSdk.cmake @@ -15,7 +15,7 @@ # FindIdaSdk # ---------- # -# Locates and configures the IDA Pro SDK. Only support version 7.0 or hight. +# Locates and configures the IDA Pro SDK. Supports version 7.0 or higher. # # Use this module by invoking find_package with the form: # @@ -36,7 +36,6 @@ # Example (this assumes Windows): # # find_package(IdaSdk REQUIRED) -# include_directories(${IdaSdk_INCLUDE_DIRS}) # # # Builds targets plugin.dll and plugin64.dll # add_ida_plugin(plugin myplugin.cc) diff --git a/binaryninja/CMakeLists.txt b/binaryninja/CMakeLists.txt new file mode 100644 index 00000000..3984d44f --- /dev/null +++ b/binaryninja/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright 2019-2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_library(${binexport_plugin_name} SHARED + main_plugin.cc + main_plugin.h + log_sink.cc + log_sink.h +) +target_link_libraries(${binexport_plugin_name} PRIVATE + absl::strings + BinaryNinja::API + binexport_core + OpenSSL::SSL +) diff --git a/binaryninja/log_sink.cc b/binaryninja/log_sink.cc new file mode 100644 index 00000000..6498391d --- /dev/null +++ b/binaryninja/log_sink.cc @@ -0,0 +1,48 @@ +// Copyright 2019-2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "third_party/zynamics/binexport/binaryninja/log_sink.h" + +// clang-format off +#include "binaryninjaapi.h" // NOLINT +// clang-format on + +#include "base/logging.h" +#include "build/absl/absl/base/log_severity.h" +#include "build/absl/absl/strings/string_view.h" +#include "third_party/absl/strings/string_view.h" +#include "third_party/zynamics/binexport/util/logging.h" + +namespace security::binexport { + +void BinaryNinjaLogSink::Send(const not_absl::LogEntry& entry) { + BNLogLevel level; + switch (entry.log_severity()) { + case absl::LogSeverity::kInfo: + level = InfoLog; + break; + case absl::LogSeverity::kWarning: + level = WarningLog; + break; + case absl::LogSeverity::kError: + case absl::LogSeverity::kFatal: + default: + level = ErrorLog; + break; + } + absl::string_view message = entry.text_message(); + BinaryNinja::Log(level, "%*s", message.size(), message.data()); +} + +} // namespace security::binexport diff --git a/binaryninja/log_sink.h b/binaryninja/log_sink.h new file mode 100644 index 00000000..f402539c --- /dev/null +++ b/binaryninja/log_sink.h @@ -0,0 +1,30 @@ +// Copyright 2019-2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BINARYNINJA_LOG_SINK_H_ +#define BINARYNINJA_LOG_SINK_H_ + +#include "base/logging.h" +#include "third_party/zynamics/binexport/util/logging.h" + +namespace security::binexport { + +class BinaryNinjaLogSink : public not_absl::LogSink { + public: + void Send(const not_absl::LogEntry& entry) override; +}; + +} // namespace security::binexport + +#endif // BINARYNINJA_LOG_SINK_H_ diff --git a/binaryninja/main_plugin.cc b/binaryninja/main_plugin.cc new file mode 100644 index 00000000..a1b88e44 --- /dev/null +++ b/binaryninja/main_plugin.cc @@ -0,0 +1,487 @@ +// Copyright 2019-2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "third_party/zynamics/binexport/binaryninja/main_plugin.h" + +#include +#include + +#include "base/logging.h" +#include "openssl/sha.h" +#include "third_party/absl/status/status.h" +#include "third_party/absl/strings/escaping.h" +#include "third_party/absl/strings/match.h" +#include "third_party/absl/strings/str_cat.h" +#include "third_party/absl/strings/str_format.h" +#include "third_party/zynamics/binexport/binaryninja/log_sink.h" +#include "third_party/zynamics/binexport/binexport2_writer.h" +#include "third_party/zynamics/binexport/call_graph.h" +#include "third_party/zynamics/binexport/entry_point.h" +#include "third_party/zynamics/binexport/flow_analyzer.h" +#include "third_party/zynamics/binexport/flow_graph.h" +#include "third_party/zynamics/binexport/instruction.h" +#include "third_party/zynamics/binexport/util/filesystem.h" +#include "third_party/zynamics/binexport/util/format.h" +#include "third_party/zynamics/binexport/util/logging.h" +#include "third_party/zynamics/binexport/util/status_macros.h" +#include "third_party/zynamics/binexport/util/statusor.h" +#include "third_party/zynamics/binexport/util/timer.h" +#include "third_party/zynamics/binexport/version.h" + +namespace security::binexport { + +not_absl::StatusOr GetInputFileSha256( + BinaryNinja::BinaryView* view) { + auto raw_view = view->GetParentView(); + if (!raw_view) { + return absl::InternalError("Failed to load SHA256 hash of input file"); + } + BinaryNinja::DataBuffer buffer = + raw_view->ReadBuffer(0, raw_view->GetLength()); + + std::string sha256_hash(32, '\0'); + SHA256_CTX sha256_ctx; + SHA256_Init(&sha256_ctx); + + constexpr size_t kBufSize = 4096; + const size_t buf_len = buffer.GetLength(); + for (size_t off = 0; off < buf_len; off += kBufSize) { + SHA256_Update(&sha256_ctx, + reinterpret_cast(buffer.GetDataAt(off)), + std::min(buf_len - off, kBufSize)); + } + + SHA256_Final(reinterpret_cast(&sha256_hash[0]), &sha256_ctx); + return absl::BytesToHexString(sha256_hash); +} + +std::string GetArchitectureName(BinaryNinja::BinaryView* view) { + auto default_arch = view->GetDefaultArchitecture(); + std::string name = default_arch->GetName(); + std::string architecture; + if (absl::StartsWith(name, "x86")) { + architecture = "x86"; + } else if (absl::StartsWith(name, "arm") || name == "aarch64") { + architecture = "ARM"; + } else if (absl::StartsWith(name, "mips")) { + architecture = "MIPS"; + } else if (name == "ppc64") { + architecture = "PowerPC"; + } else { + architecture = "GENERIC"; + } + + if (default_arch->GetAddressSize() == 8) { + absl::StrAppend(&architecture, "-64"); + } else if (default_arch->GetAddressSize() == 4) { + absl::StrAppend(&architecture, "-32"); + } + return architecture; +} + +int GetArchitectureBitness(BinaryNinja::BinaryView* view) { + return view->GetDefaultArchitecture()->GetAddressSize() * 8; +} + +template +T GetBytes(BinaryNinja::BinaryView* view, uint64_t start, size_t length) { + BinaryNinja::DataBuffer buffer = view->ReadBuffer(start, length); + const size_t bytes_read = buffer.GetLength(); + + LOG_IF(ERROR, bytes_read != length) << absl::StrFormat( + "Expected %d bytes at %08X, got %d", length, start, bytes_read); + + auto* data = reinterpret_cast(buffer.GetData()); + return T(data, data + bytes_read); +} + +std::string GetBytes(BinaryNinja::BinaryView* view, + const Instruction& instruction) { + return GetBytes(view, instruction.GetAddress(), + instruction.GetSize()); +} + +std::vector GetSectionBytes(BinaryNinja::BinaryView* view, uint64_t start, + size_t length) { + return GetBytes>(view, start, length); +} + +int GetPermissions(BinaryNinja::BinaryView* view, + const BinaryNinja::Section& section) { + auto segment = view->GetSegmentAt(section.GetStart()); + CHECK(segment); + + int segment_flags = segment->GetFlags(); + int permissions = 0; + if (segment_flags & SegmentDenyExecute) { + permissions |= AddressSpace::kExecute; + } + if (segment_flags & SegmentWritable) { + permissions |= AddressSpace::kWrite; + } + if (segment_flags & SegmentReadable) { + permissions |= AddressSpace::kRead; + } + return permissions; +} + +std::string GetMnemonic( + const std::vector& instruction_tokens) { + for (const auto& token : instruction_tokens) { + if (token.type == BNInstructionTextTokenType::InstructionToken) { + return token.text; + } + } + return ""; +} + +Instruction ParseInstructionBinaryNinja( + Address address, const BinaryNinja::InstructionInfo& instruction, + const std::vector& instruction_tokens, + CallGraph* call_graph, FlowGraph* flow_graph) { + // TODO(cblichmann): Return if no code at address + + std::string mnemonic = GetMnemonic(instruction_tokens); + if (mnemonic.empty()) { + return Instruction(address); + } + + // TODO(cblichmann): Is this always the case in Binja? + const Address next_instruction = address + instruction.length; + + // TODO(cblichmann): Create expression trees for operands + std::string operand; + for (int i = 1; i < instruction_tokens.size(); ++i) { + const auto& token = instruction_tokens[i]; + absl::StrAppend(&operand, token.text); + } + absl::StripAsciiWhitespace(&mnemonic); + absl::StripAsciiWhitespace(&operand); + + Operands operands; + Expressions expressions; + if (!operand.empty()) { + expressions.push_back( + Expression::Create(0, operand, 0, Expression::TYPE_SYMBOL, 0)); + operands.push_back(Operand::CreateOperand(expressions)); + } + return Instruction(address, next_instruction, instruction.length, mnemonic, + operands); +} + +void AnalyzeFlow( + BinaryNinja::BinaryView* view, + const BinaryNinja::InstructionInfo& binja_instruction, + Instruction* instruction, FlowGraph* flow_graph, CallGraph* call_graph, + AddressReferences* address_references, + EntryPointAdder* entry_point_adder /*, const ModuleMap& modules*/) { + const Address address = instruction->GetAddress(); + bool has_flow = binja_instruction.branchCount == 0; + bool handled = false; + + for (int i = 0; i < binja_instruction.branchCount; ++i) { + const auto branch_target = binja_instruction.branchTarget[i]; + switch (binja_instruction.branchType[i]) { + case BNBranchType::UnconditionalBranch: + flow_graph->AddEdge(FlowGraphEdge(address, branch_target, + FlowGraphEdge::TYPE_UNCONDITIONAL)); + address_references->emplace_back( + address, GetSourceExpressionId(*instruction, branch_target), + branch_target, TYPE_UNCONDITIONAL); + entry_point_adder->Add(branch_target, EntryPoint::Source::JUMP_DIRECT); + handled = true; + break; + case BNBranchType::CallDestination: + // TODO(clichmann): Implement as IsPossibleFunction(branch_target, + // modules)) + if constexpr (false) { + call_graph->AddFunction(branch_target); + call_graph->AddEdge(address, branch_target); + entry_point_adder->Add(branch_target, + EntryPoint::Source::CALL_TARGET); + } + instruction->SetFlag(FLAG_CALL, true); + address_references->emplace_back( + address, GetSourceExpressionId(*instruction, branch_target), + branch_target, TYPE_CALL_DIRECT); + has_flow = true; + handled = true; + break; + case BNBranchType::TrueBranch: + flow_graph->AddEdge( + FlowGraphEdge(address, branch_target, FlowGraphEdge::TYPE_TRUE)); + address_references->emplace_back( + address, GetSourceExpressionId(*instruction, branch_target), + branch_target, TYPE_TRUE); + entry_point_adder->Add( + branch_target, + EntryPoint::Source::JUMP_DIRECT); // True is main branch + handled = true; + break; + case BNBranchType::FalseBranch: + flow_graph->AddEdge( + FlowGraphEdge(address, branch_target, FlowGraphEdge::TYPE_FALSE)); + address_references->emplace_back( + address, GetSourceExpressionId(*instruction, branch_target), + branch_target, TYPE_FALSE); + entry_point_adder->Add( + branch_target, + EntryPoint::Source::JUMP_DIRECT); // True is main branch + handled = true; + break; + default: + break; + } + } + + if (has_flow) { + // Regular code flow + entry_point_adder->Add(instruction->GetNextInstruction(), + EntryPoint::Source::CODE_FLOW); + } + + // TODO(cblichmann): Switch tables, address references, indirect calls... + if constexpr (false) { + const std::vector xrefs = + view->GetCodeReferences(address); + const std::vector callers = + view->GetCallers(address); + int num_out_edges = + (unconditional_jump ? 1 : 0) + xrefs.size() + callers.size(); + + if (binja_instruction.branchCount == 0) { + // Regular code flow + entry_point_adder->Add(instruction->GetNextInstruction(), + EntryPoint::Source::CODE_FLOW); + } else if (num_out_edges > 1) { // Switch jump table + auto table_address = std::numeric_limits
::max(); + for (const auto& xref : xrefs) { + flow_graph->AddEdge( + FlowGraphEdge(address, xref.addr, FlowGraphEdge::TYPE_SWITCH)); + address_references->emplace_back( + address, GetSourceExpressionId(*instruction, xref.addr), xref.addr, + AddressReferenceType::TYPE_SWITCH); + entry_point_adder->Add(xref.addr, EntryPoint::Source::JUMP_TABLE); + table_address = std::min(table_address, xref.addr); + handled = true; + } + // Add a data reference to first address in switch table + address_references->emplace_back( + address, GetSourceExpressionId(*instruction, table_address), + table_address, AddressReferenceType::TYPE_DATA); + } + } +} + +void AnalyzeFlowBinaryNinja(BinaryNinja::BinaryView* view, + EntryPoints* entry_points, Writer* writer, + detego::Instructions* instructions, + FlowGraph* flow_graph, CallGraph* call_graph) { + Timer<> timer; + AddressReferences address_references; + + // Add initial entry points as functions. + for (const auto& entry_point : *entry_points) { + if ((entry_point.IsFunctionPrologue() || entry_point.IsExternal() || + entry_point.IsCallTarget())) { + call_graph->AddFunction(entry_point.address_); + } + } + + AddressSpace address_space; + AddressSpace flags; + for (auto section_ref : view->GetSections()) { + const uint64_t section_start = section_ref->GetStart(); + const size_t section_length = section_ref->GetLength(); + const int section_permissions = GetPermissions(view, *section_ref); + address_space.AddMemoryBlock( + section_start, GetSectionBytes(view, section_start, section_length), + section_permissions); + flags.AddMemoryBlock(section_start, + AddressSpace::MemoryBlock(section_length), + section_permissions); + } + + Instruction::SetBitness(GetArchitectureBitness(view)); + Instruction::SetGetBytesCallback([view](const Instruction& instruction) { + return GetBytes(view, instruction); + }); + Instruction::SetMemoryFlags(&flags); + + LOG(INFO) << "flow analysis"; + // TODO(cblichmann): Support binaries with mixed archs where this makes sense + // (i.e. ARM/Thumb) + auto default_arch = view->GetDefaultArchitecture(); + const size_t max_instr_len = default_arch->GetMaxInstructionLength(); + for (EntryPointAdder entry_point_adder(entry_points, "flow analysis"); + !entry_points->empty();) { + const Address address = entry_points->back().address_; + entry_points->pop_back(); + + if (flags[address] & FLAG_VISITED) { + continue; + } + flags[address] |= FLAG_VISITED; + + auto instr_bytes = + GetBytes>(view, address, max_instr_len); + BinaryNinja::InstructionInfo binja_instruction; + if (instr_bytes.empty() || + !default_arch->GetInstructionInfo(&instr_bytes[0], address, + max_instr_len, binja_instruction)) { + continue; + } + + std::vector binja_tokens; + size_t instr_len = binja_instruction.length; + if (!default_arch->GetInstructionText(&instr_bytes[0], address, instr_len, + binja_tokens)) { + continue; + } + + Instruction new_instruction = ParseInstructionBinaryNinja( + address, binja_instruction, binja_tokens, call_graph, flow_graph); + if (new_instruction.HasFlag(FLAG_INVALID)) { + continue; + } + AnalyzeFlow(view, binja_instruction, &new_instruction, flow_graph, + call_graph, &address_references, &entry_point_adder); + // call_graph->AddStringReference(address, GetStringReference(address)); + // GetComments(ida_instruction, &call_graph->GetComments()); + + instructions->push_back(new_instruction); + } + + LOG(INFO) << "sorting instructions"; + SortInstructions(instructions); + + LOG(INFO) << "reconstructing flow graphs"; + std::sort(address_references.begin(), address_references.end()); + // TODO(cblichmann): Remove duplicates if any. + ReconstructFlowGraph(instructions, *flow_graph, call_graph); + + LOG(INFO) << "reconstructing functions"; + flow_graph->ReconstructFunctions(instructions, call_graph, + FlowGraph::NoReturnHeuristic::kNone); + + // Must be called after ReconstructFunctions() since that may remove source + // basic blocks for an edge. + flow_graph->PruneFlowGraphEdges(); + + // Note: PruneFlowGraphEdges might add comments to the callgraph so the + // post processing must happen afterwards. + call_graph->PostProcessComments(); + + const auto processing_time = absl::Seconds(timer.elapsed()); + timer.restart(); + + LOG(INFO) << "writing..."; + auto ignore_error(writer->Write(*call_graph, *flow_graph, *instructions, + address_references, /*type_system=*/nullptr, + address_space)); + + Operand::EmptyCache(); + Expression::EmptyCache(); + + const auto writing_time = absl::Seconds(timer.elapsed()); + LOG(INFO) << absl::StrCat(view->GetFile()->GetOriginalFilename(), ": ", + HumanReadableDuration(processing_time), + " processing, ", + HumanReadableDuration(writing_time), " writing"); +} + +absl::Status ExportBinaryView(BinaryNinja::BinaryView* view, Writer* writer) { + const std::string filename = view->GetFile()->GetOriginalFilename(); + LOG(INFO) << filename << ": starting export"; + Timer<> timer; + EntryPoints entry_points; + + { + EntryPointAdder function_adder(&entry_points, "functions"); + EntryPointAdder call_adder(&entry_points, "calls"); + for (auto func_ref : view->GetAnalysisFunctionList()) { + auto symbol_ref = func_ref->GetSymbol(); + switch (symbol_ref->GetType()) { + case BNSymbolType::FunctionSymbol: + function_adder.Add(symbol_ref->GetAddress(), + EntryPoint::Source::FUNCTION_PROLOGUE); + break; + case BNSymbolType::ImportedFunctionSymbol: + call_adder.Add(symbol_ref->GetAddress(), + EntryPoint::Source::CALL_TARGET); + break; + default: + LOG(WARNING) << symbol_ref->GetShortName() + << " has unimplemented type " << symbol_ref->GetType(); + } + } + } + + Instructions instructions; + FlowGraph flow_graph; + CallGraph call_graph; + AnalyzeFlowBinaryNinja(view, &entry_points, writer, &instructions, + &flow_graph, &call_graph); + + LOG(INFO) << absl::StrCat( + filename, ": exported ", flow_graph.GetFunctions().size(), + " functions with ", instructions.size(), " instructions in ", + HumanReadableDuration(timer.elapsed())); + return absl::OkStatus(); +} + +absl::Status ExportBinary(const std::string& filename, + BinaryNinja::BinaryView* view) { + NA_ASSIGN_OR_RETURN(std::string sha256_hash, GetInputFileSha256(view)); + + BinExport2Writer writer(filename, view->GetFile()->GetOriginalFilename(), + sha256_hash, GetArchitectureName(view)); + NA_RETURN_IF_ERROR(ExportBinaryView(view, &writer)); + return absl::OkStatus(); +} + +void Plugin::Run(BinaryNinja::BinaryView* view) { + LOG(INFO) << "Plugin::Run()"; + const std::string filename = + ReplaceFileExtension(view->GetFile()->GetFilename(), ".BinExport"); + if (auto status = ExportBinary(filename, view); !status.ok()) { + LOG(ERROR) << "Error exporting: " << std::string(status.message()); + } +} + +bool Plugin::Init() { + if (auto status = InitLogging(LoggingOptions{}, + absl::make_unique()); + !status.ok()) { + BinaryNinja::LogError( + "Error initializing logging, skipping BinExport plugin: %s", + std::string(status.message()).c_str()); + return false; + } + + LOG(INFO) << kBinExportName << " " << kBinExportDetailedVersion << ", " + << kBinExportCopyright; + + BinaryNinja::PluginCommand::Register( + kBinExportName, kDescription, + [](BinaryNinja::BinaryView* view) { Plugin::instance()->Run(view); }); + + return true; +} + +} // namespace security::binexport + +extern "C" BINARYNINJAPLUGIN bool CorePluginInit() { + return security::binexport::Plugin::instance()->Init(); +} diff --git a/binaryninja/main_plugin.h b/binaryninja/main_plugin.h new file mode 100644 index 00000000..084bb80a --- /dev/null +++ b/binaryninja/main_plugin.h @@ -0,0 +1,50 @@ +// Copyright 2019-2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BINARYNINJA_MAIN_PLUGIN_H_ +#define BINARYNINJA_MAIN_PLUGIN_H_ + +// clang-format off +#include "binaryninjaapi.h" // NOLINT +// clang-format on + +namespace security::binexport { + +class Plugin { + public: + static constexpr char kDescription[] = + "Export to BinDiff binary or text dump"; + + Plugin(const Plugin&) = delete; + Plugin& operator=(const Plugin&) = delete; + + static Plugin* instance() { + static auto* instance = new Plugin(); + return instance; + } + + bool Init(); + void Run(BinaryNinja::BinaryView* view); + + bool alsologtostderr() const { return alsologtostderr_; } + + private: + Plugin() = default; + + bool alsologtostderr_ = false; +}; + +} // namespace security::binexport + +#endif // BINARYNINJA_MAIN_PLUGIN_H_ diff --git a/ida/names.cc b/ida/names.cc index 20bdeeaf..3cde5600 100644 --- a/ida/names.cc +++ b/ida/names.cc @@ -67,7 +67,7 @@ enum Architecture { kPpc, kMips, kGeneric, - kDalvik + kDalvik, }; bool IsCode(Address address) { @@ -195,9 +195,10 @@ ModuleMap InitModuleMap() { } ImportData import_data = {ToString(ida_module_name), &modules}; enum_import_names( - i, static_cast([](ea_t ea, const char* /* name */, - uval_t /* ord */, - void* param) -> int { + i, + static_cast([](ea_t ea, const char* /* name */, + uval_t /* ord */, + void* param) -> int { auto& import_data = *static_cast(param); (*import_data.modules)[ea] = import_data.module_name; return 1; // Continue enumeration @@ -249,7 +250,7 @@ size_t GetOperandByteSize(const insn_t& instruction, const op_t& operand) { case dt_code: case dt_word: case dt_half: // ARM-only (b/70541404) - return 2; // 16 bit + return 2; // 16 bit case dt_dword: case dt_float: return 4; // 32 bit @@ -503,18 +504,17 @@ void AnalyzeFlow(const insn_t& ida_instruction, Instruction* instruction, if (xref.type == fl_JN || xref.type == fl_JF) { ++num_out_edges; } else if (unconditional_jump && xref.type == fl_F) { - // special case for weird IDA behaviour (fogbugz #4623): - // We had a switch jump statement (jmp[eax*4]) in flash11c.idb that - // had one unconditional outgoing edge (correct) and a second - // codeflow edge (incorrect! An unconditional jump should never have - // regular codeflow set). + // Special case for weird IDA behavior: We had a switch jump statement + // (jmp[eax*4]) in flash11c.idb that had one unconditional outgoing edge + // (correct) and a second codeflow edge (incorrect! An unconditional jump + // should never have regular codeflow set). // This is a workaround for that particular situation. ++num_out_edges; } } bool handled = false; - if (num_out_edges > 1) { // switch jump table + if (num_out_edges > 1) { // Switch jump table ea_t table_address = std::numeric_limits::max(); for (bool ok = xref.first_from(ida_instruction.ea, XREF_ALL); ok && xref.iscode; ok = xref.next_from()) { @@ -527,28 +527,27 @@ void AnalyzeFlow(const insn_t& ida_instruction, Instruction* instruction, table_address = std::min(table_address, xref.to); handled = true; } - // add a data reference to first address in switch table + // Add a data reference to first address in switch table address_references->emplace_back( ida_instruction.ea, GetSourceExpressionId(*instruction, table_address), table_address, TYPE_DATA); - } else { // normal xref + } else { // Normal xref for (bool ok = xref.first_from(ida_instruction.ea, XREF_ALL); ok && xref.iscode; ok = xref.next_from()) { - // regular code flow + // Regular code flow if (xref.type == fl_F || instruction->GetNextInstruction() == xref.to) { // We need the || above because IDA gives me xref type unknown for old // idbs. if (instruction->GetNextInstruction() != xref.to) { LOG(INFO) << absl::StrCat( - "warning: ", - absl::Hex(instruction->GetAddress(), absl::kZeroPad8), + "warning: ", FormatAddress(instruction->GetAddress()), " flow xref target != address + instruction size (or " "instruction is missing flow flag). Disassembly is " "likely erroneous."); } entry_point_adder->Add(xref.to, EntryPoint::Source::CODE_FLOW); } else if (xref.type == fl_CN || xref.type == fl_CF) { - // call targets + // Call targets if (IsPossibleFunction(xref.to, modules)) { call_graph->AddFunction(xref.to); call_graph->AddEdge(ida_instruction.ea, xref.to); @@ -560,7 +559,7 @@ void AnalyzeFlow(const insn_t& ida_instruction, Instruction* instruction, xref.to, TYPE_CALL_DIRECT); handled = true; } else if (xref.type == fl_JN || xref.type == fl_JF) { - // jump targets + // Jump targets if (IsPossibleFunction(xref.to, modules) && xref.type == fl_JF) { call_graph->AddEdge(ida_instruction.ea, xref.to); } @@ -603,8 +602,8 @@ void AnalyzeFlow(const insn_t& ida_instruction, Instruction* instruction, } entry_point_adder->Add(xref.to, EntryPoint::Source::JUMP_DIRECT); } else { - LOG(INFO) << absl::StrCat( - "unknown xref ", absl::Hex(ida_instruction.ea, absl::kZeroPad8)); + LOG(INFO) << absl::StrCat("unknown xref ", + FormatAddress(ida_instruction.ea)); } } } @@ -659,7 +658,7 @@ std::vector GetSectionBytes(ea_t segment_start_address) { if (ida_segment && is_loaded(ida_segment->start_ea)) { const ea_t undefined_bytes = next_that(ida_segment->start_ea, ida_segment->end_ea, HasNoValue, - nullptr /* user data */); + nullptr /* user data */); bytes.resize( (undefined_bytes == BADADDR ? ida_segment->end_ea : undefined_bytes) - ida_segment->start_ea); @@ -743,8 +742,8 @@ void AnalyzeFlowIda(EntryPoints* entry_points, const ModuleMap* modules, } LOG(INFO) << "flow analysis"; - EntryPointAdder entry_point_adder(entry_points, "flow analysis"); - while (!entry_points->empty()) { + for (EntryPointAdder entry_point_adder(entry_points, "flow analysis"); + !entry_points->empty();) { const Address address = entry_points->back().address_; entry_points->pop_back(); @@ -790,10 +789,9 @@ void AnalyzeFlowIda(EntryPoints* entry_points, const ModuleMap* modules, flow_graph->ReconstructFunctions(instructions, call_graph, noreturn_heuristic); - // Must be called after simplifyFlowGraphs since that will sometimes - // remove source basic blocks for an edge. Only happens when IDA completely - // fucked up its disassembly. - // see: https://zynamics.fogbugz.com/default.asp?2304#12584 + // Must be called after ReconstructFunctions() since that will sometimes + // remove source basic blocks for an edge. Only happens when IDA's disassembly + // is thoroughly broken. flow_graph->PruneFlowGraphEdges(); // Note: PruneFlowGraphEdges might add comments to the callgraph so the @@ -865,7 +863,7 @@ void GetRegularComments(Address address, Comments* comments) { ida_comment.c_str(), ida_comment.length())), Comment::REGULAR, false); } - if (get_cmt(&ida_comment, address, /*rptble=*/ true) > 0) { + if (get_cmt(&ida_comment, address, /*rptble=*/true) > 0) { comments->emplace_back(address, UA_MAXOP + 2, CallGraph::CacheString(std::string( ida_comment.c_str(), ida_comment.length())), diff --git a/instruction.h b/instruction.h index 201481f9..a99c3379 100644 --- a/instruction.h +++ b/instruction.h @@ -70,7 +70,7 @@ enum { #pragma pack(push, 1) class Instruction { public: - using GetBytesCallback = std::string (*)(const Instruction& instruction); + using GetBytesCallback = std::function; using StringCache = absl::node_hash_set; explicit Instruction(Address address, Address next_instruction = 0,