Skip to content

Commit

Permalink
(isla-axiomatic) use nm to get label locations
Browse files Browse the repository at this point in the history
Fixes a bug where the objdump output does not contain labels,
but those labels are still in the ELF.

Using `nm` is just a much more robust way of getting names out of
a binary.
  • Loading branch information
bensimner committed Aug 30, 2023
1 parent 473f998 commit dc0cc64
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 31 deletions.
2 changes: 2 additions & 0 deletions configs/armv9.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ os = "macos"
arch = "aarch64"
assembler = "as --target=aarch64-unknown-linux-gnu"
objdump = "/opt/homebrew/opt/llvm/bin/llvm-objdump"
nm = "/opt/homebrew/opt/llvm/bin/llvm-nm"
linker = "/opt/homebrew/opt/llvm/bin/ld.lld"

[[toolchain]]
name = "default"
assembler = "aarch64-linux-gnu-as -march=armv8.1-a"
objdump = "aarch64-linux-gnu-objdump"
nm = "aarch64-linux-gnu-nm"
linker = "aarch64-linux-gnu-ld"

[mmu]
Expand Down
2 changes: 2 additions & 0 deletions configs/armv9_mmu_on.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ os = "macos"
arch = "aarch64"
assembler = "as --target=aarch64-unknown-linux-gnu"
objdump = "/opt/homebrew/opt/llvm/bin/llvm-objdump"
nm = "/opt/homebrew/opt/llvm/bin/llvm-nm"
linker = "/opt/homebrew/opt/llvm/bin/ld.lld"

[[toolchain]]
name = "default"
assembler = "aarch64-linux-gnu-as -march=armv8.1-a"
objdump = "aarch64-linux-gnu-objdump"
nm = "aarch64-linux-gnu-nm"
linker = "aarch64-linux-gnu-ld"

[mmu]
Expand Down
6 changes: 3 additions & 3 deletions isla-axiomatic/src/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use crate::axiomatic::model::Model;
use crate::axiomatic::relations;
use crate::axiomatic::{AxEvent, ExecutionInfo, Pairs, ThreadId};
use crate::footprint_analysis::Footprint;
use crate::litmus::instruction_from_objdump;
use crate::litmus::{Objdump, instruction_from_objdump};
use crate::litmus::{Litmus, LitmusGraphOpts};
use crate::page_table::PageAttrs;
use crate::sexp::{InterpretError, SexpVal};
Expand Down Expand Up @@ -190,7 +190,7 @@ pub struct GraphEvent {
event_kind: GraphEventKind,
}

fn event_kind<B: BV>(_objdump: &str, ev: &AxEvent<B>) -> GraphEventKind {
fn event_kind<B: BV>(_objdump: &Objdump, ev: &AxEvent<B>) -> GraphEventKind {
match ev.base.last() {
Some(Event::WriteMem { region, .. }) => {
if region == &"stage 1" {
Expand Down Expand Up @@ -386,7 +386,7 @@ impl GraphEvent {
/// Create an event to display in a user-visible graph from an
/// underlying axiomatic event. For display, we use the objdump
/// output to find the human-readable assembly instruction
pub fn from_axiomatic<'a, B: BV>(ev: &'a AxEvent<B>, objdump: &str, value: Option<GraphValue>) -> Self {
pub fn from_axiomatic<'a, B: BV>(ev: &'a AxEvent<B>, objdump: &Objdump, value: Option<GraphValue>) -> Self {
let instr = instruction_from_objdump(&format!("{:x}", ev.opcode), objdump);
GraphEvent {
instr,
Expand Down
72 changes: 50 additions & 22 deletions isla-axiomatic/src/litmus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,24 @@ fn generate_linker_script<B>(
script
}

pub type AssembledThreads = (HashMap<ThreadName, (u64, Vec<u8>)>, Vec<(u64, Vec<u8>)>, String);
#[derive(Clone, Debug)]
pub struct Objdump {
/// The output of `objdump` on the assembled binary
pub objdump: String,
/// The output of `nm` on the assembled binary
pub names: String,
}

impl Objdump {
pub fn empty() -> Self {
Objdump {
objdump: "".to_string(),
names: "".to_string(),
}
}
}

pub type AssembledThreads = (HashMap<ThreadName, (u64, Vec<u8>)>, Vec<(u64, Vec<u8>)>, Objdump);

#[cfg(feature = "sandbox")]
fn validate_code(code: &str) -> Result<(), String> {
Expand Down Expand Up @@ -281,7 +298,7 @@ fn assemble<B>(
use goblin::Object;

if !requires_assembly(threads) && sections.is_empty() {
return Ok((HashMap::new(), Vec::new(), "".to_string()));
return Ok((HashMap::new(), Vec::new(), Objdump::empty() ));
}

let objfile = tmpfile::TmpFile::new();
Expand Down Expand Up @@ -327,7 +344,7 @@ fn assemble<B>(
return Err(format!("Assembler failed with: {}", String::from_utf8_lossy(&output.stderr)));
}

let (mut objfile, objdump) = if reloc {
let (mut objfile, objdump, names) = if reloc {
let objfile_reloc = tmpfile::TmpFile::new();
let linker_script = tmpfile::TmpFile::new();
{
Expand Down Expand Up @@ -362,13 +379,23 @@ fn assemble<B>(
}
};

// Invoke nm to get the list of locations in human readable form.
// This failing is a hard error, as we need the location information.
let names = SandboxedCommand::from_tool(&isa.nm)
.arg(objfile_reloc.path())
.output()
.map(|o| String::from_utf8_lossy(&o.stdout).to_string())
.map_err(|err| {
format!("Failed to invoke nm {}. Got error: {}", &isa.nm.executable.display(), err)
})?;

if linker_status.success() {
(objfile_reloc, objdump)
(objfile_reloc, objdump, names)
} else {
return Err(format!("Linker failed with exit code {}", linker_status));
}
} else {
(objfile, "Objdump not available unless linker was used".to_string())
(objfile, "Objdump not available unless linker was used".to_string(), "Names not available unless linker was used".to_string())
};

let buffer = objfile.read_to_end().map_err(|_| "Failed to read generated ELF file".to_string())?;
Expand Down Expand Up @@ -406,23 +433,24 @@ fn assemble<B>(
};

log!(log::LITMUS, &format!("Objdump:\n{}", objdump));
log!(log::LITMUS, &format!("Names:\n{}", names));

Ok((assembled_threads, assembled_sections, objdump))
Ok((assembled_threads, assembled_sections, Objdump { objdump, names }))
}

/// For error reporting it's very helpful to be able to turn the raw
/// opcodes we work with into actual human-readable assembly. To do
/// this we use a regex to pair up the opcode with it's disassembly in
/// objdump output for the litmus test.
pub fn instruction_from_objdump<'obj>(opcode: &str, objdump: &'obj str) -> Option<String> {
pub fn instruction_from_objdump<'obj>(opcode: &str, objdump: &'obj Objdump) -> Option<String> {
use regex::Regex;
let instr_re = Regex::new(&format!(r"[0-9a-zA-Z]+:\s0*{}\s+(.*)", opcode)).unwrap();

// Find all instructions for an opcode in the objdump output. Return None if
// for some reason they are non-unique
// (this could happen if e.g. relocations have not been applied tojumps).
let mut instr: Option<&'obj str> = None;
for caps in instr_re.captures_iter(objdump) {
for caps in instr_re.captures_iter(&objdump.objdump) {
if let Some(prev) = instr {
if prev == caps.get(1)?.as_str().trim() {
continue;
Expand All @@ -438,26 +466,26 @@ pub fn instruction_from_objdump<'obj>(opcode: &str, objdump: &'obj str) -> Optio
Some(whitespace_re.replace_all(instr?, " ").to_string())
}

pub fn opcode_from_objdump<B: BV>(addr: B, objdump: &str) -> Option<B> {
pub fn opcode_from_objdump<B: BV>(addr: B, objdump: &Objdump) -> Option<B> {
use regex::Regex;
let opcode_re = Regex::new(&format!(r"{:x}:\t([0-9a-fA-F]+) \t", addr)).unwrap();

if let Some(caps) = opcode_re.captures(objdump) {
if let Some(caps) = opcode_re.captures(&objdump.objdump) {
B::from_str(&format!("0x{}", caps.get(1)?.as_str()))
} else {
None
}
}

fn label_from_objdump(label: &str, objdump: &str) -> Option<u64> {
fn label_from_nm(label: &str, nm: &str) -> Option<u64> {
use regex::Regex;
let label_re = Regex::new(&format!(r"([0-9a-fA-F]+) <{}>:", label)).unwrap();
let nm_re = Regex::new(&format!(r"([0-9a-fA-F]+) t {}", label)).unwrap();
let c = nm_re.captures(nm)?;
u64::from_str_radix(c.get(1)?.as_str(), 16).ok()
}

if let Some(caps) = label_re.captures(objdump) {
u64::from_str_radix(caps.get(1)?.as_str(), 16).ok()
} else {
None
}
fn label_from_objdump(label: &str, objdump: &Objdump) -> Option<u64> {
label_from_nm(label, &objdump.names)
}

pub fn assemble_instruction<B>(instr: &str, isa: &ISAConfig<B>) -> Result<Vec<u8>, String> {
Expand Down Expand Up @@ -579,7 +607,7 @@ fn parse_init<B>(
reg: &str,
value: &Value,
symbolic_addrs: &HashMap<String, u64>,
objdump: &str,
objdump: &Objdump,
symtab: &Symtab,
isa: &ISAConfig<B>,
) -> Result<(Name, u64), String> {
Expand Down Expand Up @@ -655,7 +683,7 @@ type ThreadInit = (Vec<(Name, u64)>, HashMap<Loc<Name>, exp::Exp<String>>);
fn parse_thread_initialization<B: BV>(
thread: &Value,
symbolic_addrs: &HashMap<String, u64>,
objdump: &str,
objdump: &Objdump,
symtab: &Symtab,
isa: &ISAConfig<B>,
) -> Result<ThreadInit, String> {
Expand All @@ -679,7 +707,7 @@ fn parse_thread_initialization<B: BV>(
Ok((init, reset))
}

fn parse_self_modify_region<B: BV>(toml_region: &Value, objdump: &str) -> Result<Region<B>, String> {
fn parse_self_modify_region<B: BV>(toml_region: &Value, objdump: &Objdump) -> Result<Region<B>, String> {
let table = toml_region.as_table().ok_or("Each self_modify element must be a TOML table")?;
let address =
table.get("address").and_then(Value::as_str).ok_or("self_modify element must have a `address` field")?;
Expand Down Expand Up @@ -713,7 +741,7 @@ fn parse_self_modify_region<B: BV>(toml_region: &Value, objdump: &str) -> Result
))
}

fn parse_self_modify<B: BV>(toml: &Value, objdump: &str) -> Result<Vec<Region<B>>, String> {
fn parse_self_modify<B: BV>(toml: &Value, objdump: &Objdump) -> Result<Vec<Region<B>>, String> {
if let Some(value) = toml.get("self_modify") {
let array = value.as_array().ok_or_else(|| "self_modify section must be a TOML array".to_string())?;
Ok(array.iter().map(|v| parse_self_modify_region(v, objdump)).collect::<Result<_, _>>()?)
Expand Down Expand Up @@ -844,7 +872,7 @@ pub struct Litmus<B> {
pub threads: Vec<Thread>,
pub sections: Vec<AssembledSection>,
pub self_modify_regions: Vec<Region<B>>,
pub objdump: String,
pub objdump: Objdump,
pub final_assertion: exp::Exp<String>,
pub graph_opts: LitmusGraphOpts,
}
Expand Down
10 changes: 5 additions & 5 deletions isla-axiomatic/src/litmus/exp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use isla_lib::primop;
use isla_lib::smt::Solver;
use isla_lib::source_loc::SourceLoc;

use super::label_from_objdump;
use super::{Objdump, label_from_objdump};
use crate::page_table::{self, PageAttrs, S1PageAttrs, S2PageAttrs, TranslationTableWalk, VirtualAddress};

pub enum ExpParseError {
Expand Down Expand Up @@ -591,7 +591,7 @@ pub fn partial_eval<B: BV>(
memory: &Memory<B>,
addrs: &HashMap<String, u64>,
pas: &HashMap<String, u64>,
objdump: &str,
objdump: &Objdump,
solver: &mut Solver<B>,
) -> Result<Partial<u64, B>, ExecError> {
use Partial::*;
Expand Down Expand Up @@ -698,7 +698,7 @@ pub fn eval<B: BV>(
exp: &Exp<String>,
memory: &Memory<B>,
addrs: &HashMap<String, u64>,
objdump: &str,
objdump: &Objdump,
solver: &mut Solver<B>,
) -> Result<Val<B>, ExecError> {
match partial_eval(exp, memory, addrs, &HashMap::new(), objdump, solver)? {
Expand All @@ -707,9 +707,9 @@ pub fn eval<B: BV>(
}
}

pub fn reset_eval<B: BV>(exp: &Exp<String>, addrs: &HashMap<String, u64>, objdump: &str) -> Reset<B> {
pub fn reset_eval<B: BV>(exp: &Exp<String>, addrs: &HashMap<String, u64>, objdump: &Objdump) -> Reset<B> {
let exp = exp.clone();
let addrs = addrs.clone();
let objdump = objdump.to_string();
let objdump = objdump.clone();
Arc::new(move |memory, _, solver| eval(&exp, memory, &addrs, &objdump, solver))
}
8 changes: 7 additions & 1 deletion isla-lib/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ fn get_tool_path(config: &Value, tool: &str) -> Result<Tool, String> {
struct Toolchain {
assembler: Tool,
objdump: Tool,
nm: Tool,
linker: Tool,
}

Expand All @@ -130,12 +131,13 @@ fn get_toolchain(config: &Value, chosen: Option<&str>) -> Result<Toolchain, Stri
return Ok(Toolchain {
assembler: get_tool_path(config, "assembler")?,
objdump: get_tool_path(config, "objdump")?,
nm: get_tool_path(config, "nm")?,
linker: get_tool_path(config, "linker")?,
})
};

for toolchain in toolchains {
allowed_keys(toolchain, "[[toolchain]]", &["name", "os", "arch", "assembler", "objdump", "linker"])?;
allowed_keys(toolchain, "[[toolchain]]", &["name", "os", "arch", "assembler", "objdump", "linker", "nm"])?;
}

for toolchain in toolchains {
Expand Down Expand Up @@ -170,6 +172,7 @@ fn get_toolchain(config: &Value, chosen: Option<&str>) -> Result<Toolchain, Stri
return Ok(Toolchain {
assembler: get_tool_path(toolchain, "assembler")?,
objdump: get_tool_path(toolchain, "objdump")?,
nm: get_tool_path(toolchain, "nm")?,
linker: get_tool_path(toolchain, "linker")?,
});
}
Expand Down Expand Up @@ -561,6 +564,8 @@ pub struct ISAConfig<B> {
pub assembler: Tool,
/// A path to an objdump for the architecture
pub objdump: Tool,
/// A path to an nm for the architecture
pub nm: Tool,
/// A path to a linker for the architecture
pub linker: Tool,
/// The base address for the page tables
Expand Down Expand Up @@ -634,6 +639,7 @@ impl<B: BV> ISAConfig<B> {
register_event_sets: get_register_event_sets(&config, symtab)?,
assembler: toolchain.assembler,
objdump: toolchain.objdump,
nm: toolchain.nm,
linker: toolchain.linker,
page_table_base: get_table_value(&config, "mmu", "page_table_base")?,
page_size: get_table_value(&config, "mmu", "page_size")?,
Expand Down

0 comments on commit dc0cc64

Please sign in to comment.