From b7dec7c28aa8fcce555fb11daf058512c06d8b8c Mon Sep 17 00:00:00 2001 From: xezon <4720891+xezon@users.noreply.github.com> Date: Sun, 13 Oct 2024 08:27:07 +0200 Subject: [PATCH] Move process_exe and process_pdb functions into a class and pass pdb symbols on to the exe processing step --- executable.cpp | 13 ++- executable.h | 19 ++++- main.cpp | 221 ++++++++++++++++++++++++++++++------------------- pdbreader.cpp | 21 ++++- pdbreader.h | 10 ++- util.cpp | 9 ++ util.h | 1 + 7 files changed, 192 insertions(+), 102 deletions(-) diff --git a/executable.cpp b/executable.cpp index c3e5192..111b6b1 100644 --- a/executable.cpp +++ b/executable.cpp @@ -27,7 +27,7 @@ const char *const s_objectSection = "objects"; ExeSymbol Executable::s_emptySymbol; -Executable::Executable(OutputFormats format, bool verbose) : m_outputFormat(format), m_verbose(verbose) {} +Executable::Executable() {} Executable::~Executable() {} @@ -223,15 +223,12 @@ const ExeSymbols &Executable::get_symbols() const void Executable::add_symbols(const ExeSymbols &symbols) { - uint32_t index = static_cast(m_symbols.size()); - const uint32_t size = index + symbols.size(); - m_symbols.insert(m_symbols.end(), symbols.begin(), symbols.end()); + const size_t size = m_symbols.size() + symbols.size(); + m_symbols.reserve(size); m_symbolAddressToIndexMap.reserve(size); - assert(m_symbols.size() == size); - - for (; index < size; ++index) { - m_symbolAddressToIndexMap[m_symbols[index].address] = index; + for (const ExeSymbol &symbol : symbols) { + add_symbol(symbol); } } diff --git a/executable.h b/executable.h index 8effc39..ccdd4ee 100644 --- a/executable.h +++ b/executable.h @@ -29,16 +29,19 @@ class Executable using Address64ToIndexMap = std::unordered_map; public: - enum OutputFormats + enum OutputFormat { OUTPUT_IGAS, OUTPUT_MASM, }; public: - Executable(OutputFormats format = OUTPUT_IGAS, bool verbose = false); + Executable(); ~Executable(); + void set_output_format(OutputFormat format) { m_outputFormat = format; } + void set_verbose(bool verbose) { m_verbose = verbose; } + bool read(const std::string &exe_file); void load_config(const char *file_name); @@ -55,7 +58,15 @@ class Executable const ExeSymbol &get_symbol(uint64_t addr) const; const ExeSymbol &get_nearest_symbol(uint64_t addr) const; const ExeSymbols &get_symbols() const; + + /* + * Adds series of new symbols if not already present. + */ void add_symbols(const ExeSymbols &symbols); + + /* + * Adds new symbol if not already present. + */ void add_symbol(const ExeSymbol &symbol); /** @@ -77,8 +88,8 @@ class Executable void dump_objects(nlohmann::json &js) const; private: - const OutputFormats m_outputFormat; - const bool m_verbose; + OutputFormat m_outputFormat = OUTPUT_IGAS; + bool m_verbose = false; bool m_addBase = false; std::unique_ptr m_binary; diff --git a/main.cpp b/main.cpp index 0cca221..b5d0894 100644 --- a/main.cpp +++ b/main.cpp @@ -15,6 +15,7 @@ #include "gitinfo.h" #include "pdbreader.h" #include "util.h" +#include #include #include #include @@ -54,30 +55,6 @@ void print_help() version); } -void print_sections(unassemblize::Executable &exe) -{ - const unassemblize::ExeSectionMap &map = exe.get_section_map(); - for (auto it = map.begin(); it != map.end(); ++it) { - printf( - "Name: %s, Address: 0x%" PRIx64 " Size: %" PRIu64 "\n", it->first.c_str(), it->second.address, it->second.size); - } -} - -void dump_function_to_file( - const std::string &file_name, unassemblize::Executable &exe, const char *section_name, uint64_t start, uint64_t end) -{ - if (!file_name.empty()) { - FILE *fp = fopen(file_name.c_str(), "w+"); - if (fp != nullptr) { - fprintf(fp, ".intel_syntax noprefix\n\n"); - exe.dissassemble_function(fp, section_name, start, end); - fclose(fp); - } - } else { - exe.dissassemble_function(nullptr, section_name, start, end); - } -} - const char *const auto_str = "auto"; // When output is set to "auto", then output name is chosen for input file name. enum class InputType @@ -101,86 +78,136 @@ struct ExeOptions bool verbose = false; }; -bool process_exe(const ExeOptions &o) +struct PdbOptions { - if (o.verbose) { - printf("Parsing exe file '%s'...\n", o.input_file.c_str()); - } + std::string input_file; + std::string config_file = "config.json"; + bool print_secs = false; + bool dump_syms = false; + bool verbose = false; +}; - // TODO implement default value where exe object decides internally what to do. - unassemblize::Executable::OutputFormats format = unassemblize::Executable::OUTPUT_IGAS; +class Runner +{ +public: + void print_sections(unassemblize::Executable &exe) + { + const unassemblize::ExeSectionMap &map = exe.get_section_map(); + for (auto it = map.begin(); it != map.end(); ++it) { + printf("Name: %s, Address: 0x%" PRIx64 " Size: %" PRIu64 "\n", + it->first.c_str(), + it->second.address, + it->second.size); + } + } - if (!o.format_str.empty()) { - if (0 == strcasecmp(o.format_str.c_str(), "igas")) { - format = unassemblize::Executable::OUTPUT_IGAS; - } else if (0 == strcasecmp(o.format_str.c_str(), "masm")) { - format = unassemblize::Executable::OUTPUT_MASM; + void dump_function_to_file( + const std::string &file_name, unassemblize::Executable &exe, const char *section_name, uint64_t start, uint64_t end) + { + if (!file_name.empty()) { + FILE *fp = fopen(file_name.c_str(), "w+"); + if (fp != nullptr) { + fprintf(fp, ".intel_syntax noprefix\n\n"); + exe.dissassemble_function(fp, section_name, start, end); + fclose(fp); + } + } else { + exe.dissassemble_function(nullptr, section_name, start, end); } } - unassemblize::Executable exe(format, o.verbose); + bool process_exe(const ExeOptions &o) + { + if (o.verbose) { + printf("Parsing exe file '%s'...\n", o.input_file.c_str()); + } - if (!exe.read(o.input_file)) { - return false; - } + // TODO implement default value where exe object decides internally what to do. + unassemblize::Executable::OutputFormat format = unassemblize::Executable::OUTPUT_IGAS; - if (o.print_secs) { - print_sections(exe); - return true; - } + if (!o.format_str.empty()) { + if (0 == strcasecmp(o.format_str.c_str(), "igas")) { + format = unassemblize::Executable::OUTPUT_IGAS; + } else if (0 == strcasecmp(o.format_str.c_str(), "masm")) { + format = unassemblize::Executable::OUTPUT_MASM; + } + } - if (o.dump_syms) { - exe.save_config(o.config_file.c_str()); - return true; - } + m_executable.set_output_format(format); + m_executable.set_verbose(o.verbose); + + if (!m_executable.read(o.input_file)) { + return false; + } - exe.load_config(o.config_file.c_str()); + if (o.print_secs) { + print_sections(m_executable); + return true; + } - if (o.start_addr == 0 && o.end_addr == 0) { - for (const unassemblize::ExeSymbol &symbol : exe.get_symbols()) { - std::string sanitized_symbol_name = symbol.name; + unassemblize::ExeSymbols pdb_exe_symbols = m_pdbReader.build_exe_symbols(); + if (!pdb_exe_symbols.empty()) { + m_executable.add_symbols(pdb_exe_symbols); + } + + if (o.dump_syms) { + m_executable.save_config(o.config_file.c_str()); + return true; + } + + m_executable.load_config(o.config_file.c_str()); + + if (o.start_addr == 0 && o.end_addr == 0) { + for (const unassemblize::ExeSymbol &symbol : m_executable.get_symbols()) { + std::string sanitized_symbol_name = symbol.name; #if defined(WIN32) - util::remove_characters(sanitized_symbol_name, "\\/:*?\"<>|"); + util::remove_characters(sanitized_symbol_name, "\\/:*?\"<>|"); #endif - std::string file_name; - if (!o.output_file.empty()) { - // program.symbol.S - file_name = util::get_remove_file_ext(o.output_file) + "." + sanitized_symbol_name + ".S"; + std::string file_name; + if (!o.output_file.empty()) { + // program.symbol.S + file_name = util::get_remove_file_ext(o.output_file) + "." + sanitized_symbol_name + ".S"; + } + dump_function_to_file( + file_name, m_executable, o.section_name.c_str(), symbol.address, symbol.address + symbol.size); } - dump_function_to_file(file_name, exe, o.section_name.c_str(), symbol.address, symbol.address + symbol.size); + } else { + dump_function_to_file(o.output_file, m_executable, o.section_name.c_str(), o.start_addr, o.end_addr); } - } else { - dump_function_to_file(o.output_file, exe, o.section_name.c_str(), o.start_addr, o.end_addr); + + return true; } - return true; -} + bool process_pdb(const PdbOptions &o) + { + m_pdbReader.set_verbose(o.verbose); -struct PdbOptions -{ - std::string input_file; - std::string config_file = "config.json"; - bool print_secs = false; - bool dump_syms = false; - bool verbose = false; -}; + // Currently does not read back config file here. -bool process_pdb(const PdbOptions &o) -{ - unassemblize::PdbReader pdb_reader(o.verbose); + if (!m_pdbReader.read(o.input_file)) { + return false; + } - // Currently does not read back config file here. + if (o.dump_syms) { + m_pdbReader.save_config(o.config_file); + } - if (!pdb_reader.read(o.input_file)) { - return false; + return true; } - if (o.dump_syms) { - pdb_reader.save_config(o.config_file); + std::string get_pdb_exe_file_name() + { + unassemblize::PdbExeInfo exe_info = m_pdbReader.get_exe_info(); + assert(!exe_info.exeFileName.empty()); + assert(!exe_info.pdbFilePath.empty()); + + return util::get_file_path(exe_info.pdbFilePath) + "/" + exe_info.exeFileName + ".exe"; } - return true; -} +private: + unassemblize::Executable m_executable; + unassemblize::PdbReader m_pdbReader; +}; std::string get_config_file_name(const std::string &input_file, const std::string &config_file) { @@ -325,6 +352,8 @@ int main(int argc, char **argv) return 1; } + Runner runner; + if (InputType::Exe == type) { ExeOptions o; o.input_file = input_file; @@ -337,15 +366,33 @@ int main(int argc, char **argv) o.print_secs = print_secs; o.dump_syms = dump_syms; o.verbose = verbose; - return process_exe(o) ? 0 : 1; + return runner.process_exe(o) ? 0 : 1; } else if (InputType::Pdb == type) { - PdbOptions o; - o.input_file = input_file; - o.config_file = get_config_file_name(o.input_file, config_file); - o.print_secs = print_secs; - o.dump_syms = dump_syms; - o.verbose = verbose; - return process_pdb(o) ? 0 : 1; + bool success; + { + PdbOptions o; + o.input_file = input_file; + o.config_file = get_config_file_name(o.input_file, config_file); + o.print_secs = print_secs; + o.dump_syms = dump_syms; + o.verbose = verbose; + success = runner.process_pdb(o); + } + if (success) { + ExeOptions o; + o.input_file = runner.get_pdb_exe_file_name(); + o.config_file = get_config_file_name(o.input_file, config_file); + o.output_file = get_output_file_name(o.input_file, output_file); + o.section_name = section_name; + o.format_str = format_string; + o.start_addr = start_addr; + o.end_addr = end_addr; + o.print_secs = print_secs; + o.dump_syms = dump_syms; + o.verbose = verbose; + success = runner.process_exe(o); + } + return success ? 0 : 1; } else { // Impossible return 1; diff --git a/pdbreader.cpp b/pdbreader.cpp index 874ca4d..bf285c2 100644 --- a/pdbreader.cpp +++ b/pdbreader.cpp @@ -24,7 +24,7 @@ const char *const s_sourceFiles = "pdb_source_files"; const char *const s_functions = "pdb_functions"; const char *const s_exe = "pdb_exe"; -PdbReader::PdbReader(bool verbose) : m_verbose(verbose), m_dwMachineType(CV_CFL_80386) {} +PdbReader::PdbReader() : m_dwMachineType(CV_CFL_80386) {} bool PdbReader::read(const std::string &pdb_file) { @@ -38,6 +38,25 @@ bool PdbReader::read(const std::string &pdb_file) return success; } +ExeSymbols PdbReader::build_exe_symbols() const +{ + ExeSymbols symbols; + symbols.reserve(m_functions.size()); + for (const PdbFunctionInfo &function : m_functions) { + ExeSymbol symbol; + symbol.name = function.decoratedName; + symbol.address = function.address.relVirtual; + symbol.size = function.length; + symbols.emplace_back(std::move(symbol)); + } + return symbols; +} + +const PdbExeInfo &PdbReader::get_exe_info() const +{ + return m_exe; +} + void PdbReader::load_json(const nlohmann::json &js) { js.at(s_compilands).get_to(m_compilands); diff --git a/pdbreader.h b/pdbreader.h index 128e9e2..c947154 100644 --- a/pdbreader.h +++ b/pdbreader.h @@ -28,10 +28,16 @@ class PdbReader using StringToIndexMapT = std::unordered_map; public: - PdbReader(bool verbose = false); + PdbReader(); + + void set_verbose(bool verbose) { m_verbose = verbose; } bool read(const std::string &pdb_file); + ExeSymbols build_exe_symbols() const; + + const PdbExeInfo &get_exe_info() const; + void load_json(const nlohmann::json &js); bool load_config(const std::string &file_name); void save_json(nlohmann::json &js, bool overwrite_sections = false); @@ -62,12 +68,12 @@ class PdbReader void read_line(PdbFunctionInfo &function_info, IDiaLineNumber *pLine); private: - const bool m_verbose = false; IDiaDataSource *m_pDiaSource = nullptr; IDiaSession *m_pDiaSession = nullptr; IDiaSymbol *m_pDiaSymbol = nullptr; uint32_t m_dwMachineType = 0; bool m_coInitialized = false; + bool m_verbose = false; // Compilands indices match DIA2 indices. // Source Files indices do not match DIA2 indices (aka "unique id"). diff --git a/util.cpp b/util.cpp index cbf3afd..e03ee29 100644 --- a/util.cpp +++ b/util.cpp @@ -57,6 +57,15 @@ std::string get_remove_file_ext(const std::string &file_name) return file_name; } +std::string get_file_path(const std::string &file_path) +{ + const size_t pos = file_path.find_last_of("/\\"); + if (pos != std::string::npos) { + return file_path.substr(0, pos); + } + return file_path; +} + std::string get_file_ext(const std::string &file_name) { const size_t pos = file_name.find_last_of("."); diff --git a/util.h b/util.h index 3f3809f..04cc253 100644 --- a/util.h +++ b/util.h @@ -22,6 +22,7 @@ std::wstring to_utf16(const char *utf8); std::wstring to_utf16(const std::string &utf8); void remove_characters(std::string &s, const std::string &chars); std::string get_remove_file_ext(const std::string &file_name); +std::string get_file_path(const std::string &file_path); std::string get_file_ext(const std::string &file_name); } // namespace util