From be514ed8fd8a0f4be6726d320dc22fd8f9484702 Mon Sep 17 00:00:00 2001 From: patphzhang Date: Thu, 7 Nov 2024 11:13:49 +0800 Subject: [PATCH] [BOLT] support mold linker generated PLT in disassembling --- bolt/include/bolt/Utils/CommandLineOpts.h | 1 + bolt/lib/Rewrite/RewriteInstance.cpp | 30 +- bolt/lib/Utils/CommandLineOpts.cpp | 6 + bolt/test/X86/Inputs/plt-mold-header.yaml | 399 ++++++++++++++++++++++ bolt/test/X86/plt-mold-header.test | 7 + 5 files changed, 442 insertions(+), 1 deletion(-) create mode 100644 bolt/test/X86/Inputs/plt-mold-header.yaml create mode 100644 bolt/test/X86/plt-mold-header.test diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 04bf7db5de9527..3b0c0db1bd089e 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -34,6 +34,7 @@ extern llvm::cl::opt AggregateOnly; extern llvm::cl::opt BucketsPerLine; extern llvm::cl::opt DiffOnly; extern llvm::cl::opt EnableBAT; +extern llvm::cl::opt UseMold; extern llvm::cl::opt EqualizeBBCounts; extern llvm::cl::opt RemoveSymtab; extern llvm::cl::opt ExecutionCountThreshold; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 32ec7abe8b666a..a7118be5dc263a 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1672,7 +1672,35 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section, const uint64_t SectionAddress = Section.getAddress(); const uint64_t SectionSize = Section.getSize(); - for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize; + uint64_t EntryStartOffset = 0; + if (opts::UseMold) { + // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50) + // generates a unique format for the PLT. + // The first entry of the mold-style PLT is 32 bytes long, while the remaining entries + // are 16 bytes long. We need to parse the first entry with a special offset limit setting. + uint64_t HeaderSize = 32; + outs() << "BOLT-INFO: parsing PLT header for mold\n"; + MCInst Instruction; + uint64_t InstrSize, InstrOffset = EntryStartOffset; + while (InstrOffset < HeaderSize) { + disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize); + if (BC->MIB->isIndirectBranch(Instruction)) + break; + InstrOffset += InstrSize; + } + uint64_t TargetAddress; + if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, + SectionAddress + InstrOffset, + InstrSize)) { + errs() << "BOLT-ERROR: error evaluating PLT instruction for the mold header at offset 0x" + << Twine::utohexstr(SectionAddress + InstrOffset) << '\n'; + exit(1); + } + createPLTBinaryFunction(TargetAddress, SectionAddress, HeaderSize); + EntryStartOffset += HeaderSize; + } + + for (uint64_t EntryOffset = EntryStartOffset; EntryOffset + EntrySize <= SectionSize; EntryOffset += EntrySize) { MCInst Instruction; uint64_t InstrSize, InstrOffset = EntryOffset; diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index de82420a167131..356e530c9ca361 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -72,6 +72,12 @@ EnableBAT("enable-bat", cl::ZeroOrMore, cl::cat(BoltCategory)); +cl::opt UseMold("use-mold", + cl::desc("the binary is generated by the mold linker"), + cl::init(false), + cl::ZeroOrMore, + cl::cat(BoltCategory)); + cl::opt EqualizeBBCounts( "equalize-bb-counts", cl::desc("use same count for BBs that should have equivalent count (used " diff --git a/bolt/test/X86/Inputs/plt-mold-header.yaml b/bolt/test/X86/Inputs/plt-mold-header.yaml new file mode 100644 index 00000000000000..be6eabeccbba8f --- /dev/null +++ b/bolt/test/X86/Inputs/plt-mold-header.yaml @@ -0,0 +1,399 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 + Entry: 0x13D0 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_R ] + VAddr: 0x40 + Align: 0x8 + - Type: PT_INTERP + Flags: [ PF_R ] + FirstSec: .interp + LastSec: .interp + VAddr: 0x270 + - Type: PT_LOAD + Flags: [ PF_R ] + FirstSec: .interp + LastSec: .rodata.str + Align: 0x1000 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + FirstSec: .plt + LastSec: .text + VAddr: 0x13A0 + Align: 0x1000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .relro_padding + VAddr: 0x23F8 + Align: 0x1000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .got.plt + LastSec: .got.plt + VAddr: 0x3550 + Align: 0x1000 + - Type: PT_DYNAMIC + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x23F8 + Align: 0x8 + - Type: PT_GNU_EH_FRAME + Flags: [ PF_R ] + FirstSec: .eh_frame_hdr + LastSec: .eh_frame_hdr + VAddr: 0x37C + Align: 0x4 + - Type: PT_GNU_STACK + Flags: [ PF_W, PF_R ] + - Type: PT_GNU_RELRO + Flags: [ PF_R ] + FirstSec: .dynamic + LastSec: .relro_padding + VAddr: 0x23F8 +Sections: + - Name: .interp + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x270 + AddressAlign: 0x1 + Content: 2F6C696236342F6C642D6C696E75782D7838362D36342E736F2E3200 + - Name: .gnu.hash + Type: SHT_GNU_HASH + Flags: [ SHF_ALLOC ] + Address: 0x290 + Link: .dynsym + AddressAlign: 0x8 + Header: + SymNdx: 0x2 + Shift2: 0x1A + BloomFilter: [ 0x0 ] + HashBuckets: [ 0x0 ] + HashValues: [ ] + - Name: .dynsym + Type: SHT_DYNSYM + Flags: [ SHF_ALLOC ] + Address: 0x2B0 + Link: .dynstr + AddressAlign: 0x8 + - Name: .dynstr + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + Address: 0x2E0 + AddressAlign: 0x1 + - Name: .gnu.version + Type: SHT_GNU_versym + Flags: [ SHF_ALLOC ] + Address: 0x2FE + Link: .dynsym + AddressAlign: 0x2 + Entries: [ 0, 2 ] + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Address: 0x308 + Link: .dynstr + AddressAlign: 0x8 + Dependencies: + - Version: 1 + File: libc.so.6 + Entries: + - Name: GLIBC_2.2.5 + Hash: 157882997 + Flags: 0 + Other: 2 + - Name: .rela.plt + Type: SHT_RELA + Flags: [ SHF_ALLOC, SHF_INFO_LINK ] + Address: 0x328 + Link: .dynsym + AddressAlign: 0x8 + Info: .got.plt + Relocations: + - Offset: 0x3568 + Symbol: printf + Type: R_X86_64_JUMP_SLOT + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x340 + AddressAlign: 0x8 + Content: 1400000000000000017A5200017810011B0C0708900100001C0000001C000000701000002500000000410E108602430D06600C070800000000000000 + - Name: .eh_frame_hdr + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x37C + AddressAlign: 0x4 + Content: 011B033BC0FFFFFF0100000054100000DCFFFFFF + - Name: .rodata.str + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x390 + AddressAlign: 0x1 + Content: 48656C6C6F20776F726C64210A00 + - Name: .plt + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x13A0 + AddressAlign: 0x10 + Content: F30F1EFA4153FF35CC3C1602FF25CE3C1602CCCCCCCCCCCCCCCCCCCCCCCCCCCCF30F1EFA41BB00000000FF2598210000 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x13D0 + AddressAlign: 0x10 + Content: 554889E54883EC10C745FC00000000488D3DAAEFFFFFB000E8D3FFFFFF31C04883C4105DC3 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x23F8 + Link: .dynstr + AddressAlign: 0x8 + Entries: + - Tag: DT_NEEDED + Value: 0x1 + - Tag: DT_JMPREL + Value: 0x328 + - Tag: DT_PLTRELSZ + Value: 0x18 + - Tag: DT_PLTREL + Value: 0x7 + - Tag: DT_PLTGOT + Value: 0x3550 + - Tag: DT_SYMTAB + Value: 0x2B0 + - Tag: DT_SYMENT + Value: 0x18 + - Tag: DT_STRTAB + Value: 0x2E0 + - Tag: DT_STRSZ + Value: 0x1E + - Tag: DT_VERSYM + Value: 0x2FE + - Tag: DT_VERNEED + Value: 0x308 + - Tag: DT_VERNEEDNUM + Value: 0x1 + - Tag: DT_GNU_HASH + Value: 0x290 + - Tag: DT_FLAGS_1 + Value: 0x8000000 + - Tag: DT_DEBUG + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Name: .got + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2548 + AddressAlign: 0x8 + Content: '0000000000000000' + - Name: .relro_padding + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2550 + AddressAlign: 0x1 + Size: 0xAB0 + - Name: .got.plt + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x3550 + AddressAlign: 0x8 + Content: F82300000000000000000000000000000000000000000000A013000000000000 + - Name: .rela.text + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .text + Relocations: + - Offset: 0x13E2 + Symbol: .L.str + Type: R_X86_64_PC32 + Addend: -4 + - Offset: 0x13E9 + Symbol: printf + Type: R_X86_64_PLT32 + Addend: -4 + - Type: SectionHeaderTable + Sections: + - Name: .interp + - Name: .gnu.hash + - Name: .dynsym + - Name: .dynstr + - Name: .gnu.version + - Name: .gnu.version_r + - Name: .rela.plt + - Name: .eh_frame + - Name: .eh_frame_hdr + - Name: .rodata.str + - Name: .plt + - Name: .text + - Name: .rela.text + - Name: .dynamic + - Name: .got + - Name: .relro_padding + - Name: .got.plt + - Name: .symtab + - Name: .strtab + - Name: .shstrtab +Symbols: + - Name: .interp + Type: STT_SECTION + Section: .interp + Value: 0x270 + - Name: .gnu.hash + Type: STT_SECTION + Section: .gnu.hash + Value: 0x290 + - Name: .dynsym + Type: STT_SECTION + Section: .dynsym + Value: 0x2B0 + - Name: .dynstr + Type: STT_SECTION + Section: .dynstr + Value: 0x2E0 + - Name: .gnu.version + Type: STT_SECTION + Section: .gnu.version + Value: 0x2FE + - Name: .gnu.version_r + Type: STT_SECTION + Section: .gnu.version_r + Value: 0x308 + - Name: .rela.plt + Type: STT_SECTION + Section: .rela.plt + Value: 0x328 + - Name: .eh_frame + Type: STT_SECTION + Section: .eh_frame + Value: 0x340 + - Name: .eh_frame_hdr + Type: STT_SECTION + Section: .eh_frame_hdr + Value: 0x37C + - Name: .rodata.str + Type: STT_SECTION + Section: .rodata.str + Value: 0x390 + - Name: .plt + Type: STT_SECTION + Section: .plt + Value: 0x13A0 + - Name: .text + Type: STT_SECTION + Section: .text + Value: 0x13D0 + - Name: .dynamic + Type: STT_SECTION + Section: .dynamic + Value: 0x23F8 + - Name: .got + Type: STT_SECTION + Section: .got + Value: 0x2548 + - Name: .relro_padding + Type: STT_SECTION + Section: .relro_padding + Value: 0x2550 + - Name: .got.plt + Type: STT_SECTION + Section: .got.plt + Value: 0x3550 + - Name: 'printf$plt' + Type: STT_FUNC + Section: .plt + Value: 0x13C0 + - Name: hello.c + Type: STT_FILE + Index: SHN_ABS + - Name: .L.str + Type: STT_OBJECT + Section: .rodata.str + Value: 0x390 + - Name: main + Type: STT_FUNC + Section: .text + Value: 0x13D0 + Size: 0x25 + - Name: __ehdr_start + Section: .interp + - Name: __init_array_start + Index: SHN_ABS + - Name: __init_array_end + Index: SHN_ABS + - Name: __fini_array_start + Index: SHN_ABS + - Name: __fini_array_end + Index: SHN_ABS + - Name: __preinit_array_start + Index: SHN_ABS + - Name: __preinit_array_end + Index: SHN_ABS + - Name: _DYNAMIC + Section: .dynamic + Value: 0x23F8 + - Name: _GLOBAL_OFFSET_TABLE_ + Section: .got.plt + Value: 0x3550 + - Name: _PROCEDURE_LINKAGE_TABLE_ + Section: .plt + Value: 0x13A0 + - Name: __bss_start + Index: SHN_ABS + - Name: _end + Section: .got.plt + Value: 0x3570 + - Name: _etext + Section: .text + Value: 0x13F5 + - Name: _edata + Section: .got.plt + Value: 0x3570 + - Name: __executable_start + Section: .interp + - Name: __rela_iplt_start + Index: SHN_ABS + - Name: __rela_iplt_end + Index: SHN_ABS + - Name: __GNU_EH_FRAME_HDR + Section: .eh_frame_hdr + Value: 0x37C + - Name: end + Section: .got.plt + Value: 0x3570 + - Name: etext + Section: .text + Value: 0x13F5 + - Name: edata + Section: .got.plt + Value: 0x3570 + - Name: __dso_handle + Section: .interp + - Name: _TLS_MODULE_BASE_ + Section: .interp + - Name: printf + Binding: STB_GLOBAL +DynamicSymbols: + - Name: printf + Type: STT_FUNC + Binding: STB_GLOBAL +... diff --git a/bolt/test/X86/plt-mold-header.test b/bolt/test/X86/plt-mold-header.test new file mode 100644 index 00000000000000..8cbbed8711cbce --- /dev/null +++ b/bolt/test/X86/plt-mold-header.test @@ -0,0 +1,7 @@ +# RUN: yaml2obj %p/Inputs/plt-mold-header.yaml &> %t.exe +# RUN: llvm-bolt -use-mold %t.exe --print-cfg --print-only=main.* -o %t.out | FileCheck %s + +## Check that llvm-bolt correctly parses PLT header created by mold linker. +## Without the '-use-mold' option, "BOLT-ERROR: unable to disassemble instruction in PLT section .plt at offset 0x10" will be reported. +## The only call instruction in main() should be a call to printf() in PLT. +CHECK: callq "printf$plt