Skip to content

Commit

Permalink
Witness extraction and validation (#71)
Browse files Browse the repository at this point in the history
* feat(witness extraction): merge witness extraction into main

* feat(witness extraction): add example files for bachelor thesis witness extraction

* chore: Add bachelor_thesis_haslauer.pdf and thesis abstract to README.md

---------

Co-authored-by: haslauerbe <[email protected]>
  • Loading branch information
Haslauerbe and haslauerbe authored Jul 22, 2024
1 parent 5f498a1 commit 3a9ffcd
Show file tree
Hide file tree
Showing 14 changed files with 367 additions and 38 deletions.
29 changes: 29 additions & 0 deletions examples/division-by-input.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
Example in Bernhard Haslauer's Bachelor Thesis.
Input == #b00110000 (== 48 == '0') or
Input == #b00110010 (== 50 == '2')
*/

uint64_t main() {
uint64_t a;
uint64_t* x;

x = malloc(8);
*x = 0;

read(0, x, 1);

*x = *x - 48;

// division by zero if the input was '0' (== 48)
a = 41 + (1 / *x);

// division by zero if the input was '2' (== 50)
if (*x == 2)
a = 41 + (1 / 0);

if (a == 42)
return 1;
else
return 0;
}
19 changes: 19 additions & 0 deletions examples/eight-byte-input.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Example in Bernhard Haslauer's Bachelor Thesis.
// input "11111111" (8 '1's) lead to division by zero
uint64_t main() {
uint64_t a;
uint64_t* x;

x = malloc(8);
*x = 0;

read(0, x, 8);

a = 3544668469065756977;
// input == "11111111" == 3544668469065756977
if (*x == a)
*x = 42 / 0;

return 0;
}

22 changes: 22 additions & 0 deletions examples/memory-access.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Example in Bernhard Haslauer's Bachelor Thesis.
// invalid memory access if the input was '1' (== 49) or '2' (==50)
uint64_t main() {
uint64_t a;
uint64_t* x;

x = malloc(8);

*x = 0;

read(0, x, 1);

if (*x == 49)
// address outside virtual address space -> invalid memory access
*(x + (4 * 1024 * 1024 * 1024)) = 0;
if (*x == 50)
// address between data and heap -> invalid memory access
*(x + -2) = 0;

return 0;
}

22 changes: 22 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ pub fn args() -> Command {
.default_value(DEFAULT_MEMORY_SIZE)
.value_parser(value_parser_memory_size()),
)
.arg(
Arg::new("stdin")
.long("u8_stdin")
.help("Provides the first bytes for the stdin and allows to emulate \
the inputs via arguments")
.value_name("BYTES")
.num_args(1..)
)
.arg(
Arg::new("extras")
.help("Arguments passed to emulated program")
Expand Down Expand Up @@ -265,6 +273,13 @@ pub fn args() -> Command {
.allow_hyphen_values(true)
.num_args(1..)
)
.arg(
Arg::new("witness")
.help("enables witness extraction")
.short('w')
.long("witness")
.num_args(0)
)
)
.subcommand(
Command::new("qubot")
Expand Down Expand Up @@ -448,6 +463,13 @@ pub fn collect_arg_values(m: &ArgMatches, arg: &str) -> Vec<String> {
}
}

pub fn collect_arg_values_as_usize(m: &ArgMatches, arg: &str) -> Vec<usize> {
match m.get_many::<String>(arg) {
Some(iter) => iter.map(|it| it.parse::<usize>().unwrap()).collect(),
None => vec![],
}
}

fn value_parser_memory_size() -> clap::builder::RangedU64ValueParser {
value_parser!(u64).range(1_u64..=1024_u64)
}
Expand Down
70 changes: 52 additions & 18 deletions src/emulate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@ pub struct EmulatorState {
running: bool,
stdin: Stdin,
stdout: Stdout,
std_inputs: Vec<usize>,
}

impl Clone for EmulatorState {
fn clone(&self) -> Self {
EmulatorState {
registers: self.registers.clone(),
memory: self.memory.clone(),
program_counter: self.program_counter,
program_break: self.program_break,
opened: Vec::new(),
running: self.running,
stdin: io::stdin(),
stdout: io::stdout(),
std_inputs: self.std_inputs.clone(),
}
}
}

impl EmulatorState {
Expand All @@ -37,6 +54,7 @@ impl EmulatorState {
running: false,
stdin: io::stdin(),
stdout: io::stdout(),
std_inputs: Vec::new(),
}
}

Expand All @@ -50,6 +68,15 @@ impl EmulatorState {
self.load_data_segment(program);
self.load_stack_segment(argv);
}
// The emulator reads from this vector instead of the Stdin when the
// read syscall (with file direction 0) is called in the emulated code.
// Will call the syscall again as soon the std_inputs is empty.
pub fn set_stdin(&mut self, inputs: Vec<usize>) {
self.std_inputs = inputs;
}
pub fn get_stdin(&self) -> &Vec<usize> {
&self.std_inputs
}

// Partially prepares the emulator with the code segment from the
// given `program`. This can be used in conjunction with other
Expand Down Expand Up @@ -924,26 +951,33 @@ fn syscall_read(state: &mut EmulatorState) {
let buffer = state.get_reg(Register::A1);
let size = state.get_reg(Register::A2);

// Check provided address is valid, iterate through the buffer word
// by word, and emulate `read` system call via `std::io::Read`.
assert!(buffer & WORD_SIZE_MASK == 0, "buffer pointer aligned");
let mut total_bytes = 0; // counts total bytes read
let mut tmp_buffer: Vec<u8> = vec![0; 8]; // scratch buffer
for adr in (buffer..buffer + size).step_by(riscu::WORD_SIZE) {
let bytes_to_read = min(size as usize - total_bytes, riscu::WORD_SIZE);
LittleEndian::write_u64(&mut tmp_buffer, state.get_mem(adr));
let bytes = &mut tmp_buffer[0..bytes_to_read]; // only for safety
let bytes_read = state.fd_read(fd).read(bytes).expect("read success");
state.set_mem(adr, LittleEndian::read_u64(&tmp_buffer));
total_bytes += bytes_read; // tally all bytes
if bytes_read != bytes_to_read {
break;
if fd == 0 && !state.std_inputs.is_empty() {
for adr in (buffer..buffer + size).step_by(riscu::WORD_SIZE) {
let byte = state.std_inputs.pop().unwrap();
state.set_mem(adr, byte as EmulatorValue);
}
}
let result = total_bytes as u64;
} else {
// Check provided address is valid, iterate through the buffer word
// by word, and emulate `read` system call via `std::io::Read`.
assert!(buffer & WORD_SIZE_MASK == 0, "buffer pointer aligned");
let mut total_bytes = 0; // counts total bytes read
let mut tmp_buffer: Vec<u8> = vec![0; 8]; // scratch buffer
for adr in (buffer..buffer + size).step_by(riscu::WORD_SIZE) {
let bytes_to_read = min(size as usize - total_bytes, riscu::WORD_SIZE);
LittleEndian::write_u64(&mut tmp_buffer, state.get_mem(adr));
let bytes = &mut tmp_buffer[0..bytes_to_read]; // only for safety
let bytes_read = state.fd_read(fd).read(bytes).expect("read success");
state.set_mem(adr, LittleEndian::read_u64(&tmp_buffer));
total_bytes += bytes_read; // tally all bytes
if bytes_read != bytes_to_read {
break;
}
}
let result = total_bytes as u64;

state.set_reg(Register::A0, result);
debug!("read({},{:#x},{}) -> {}", fd, buffer, size, result);
state.set_reg(Register::A0, result);
debug!("read({},{:#x},{}) -> {}", fd, buffer, size, result);
}
}

fn syscall_write(state: &mut EmulatorState) {
Expand Down
33 changes: 29 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ use ::unicorn::disassemble::disassemble;
use ::unicorn::emulate::EmulatorState;
use anyhow::{Context, Result};
use bytesize::ByteSize;
use cli::{collect_arg_values, expect_arg, expect_optional_arg, LogLevel, SatType, SmtType};
use cli::{
collect_arg_values, collect_arg_values_as_usize, expect_arg, expect_optional_arg, LogLevel,
SatType, SmtType,
};
use env_logger::{Env, TimestampPrecision};
use riscu::load_object_file;
use std::{
Expand Down Expand Up @@ -66,7 +69,11 @@ fn main() -> Result<()> {
let argv = [vec![arg0], extras].concat();
let program = load_object_file(input)?;
let mut emulator = EmulatorState::new(memory_size as usize);
let mut std_inputs = collect_arg_values_as_usize(args, "stdin");
std_inputs.reverse();

emulator.bootstrap(&program, &argv);
emulator.set_stdin(std_inputs);
emulator.run();

Ok(())
Expand Down Expand Up @@ -96,14 +103,15 @@ fn main() -> Result<()> {
let emulate_model = is_beator && args.get_flag("emulate");
let arg0 = expect_arg::<String>(args, "input-file")?;
let extras = collect_arg_values(args, "extras");
let argv = [vec![arg0], extras].concat();

let extract_witness = args.get_flag("witness");
let flag_32bit = args.get_flag("32-bit");

let mut model = if !input_is_dimacs {
let mut model = if !input_is_btor2 {
let program = load_object_file(&input)?;
is64_bit = program.is64;
let argv = [vec![arg0], extras].concat();
generate_model(
&program,
memory_size,
Expand Down Expand Up @@ -199,6 +207,7 @@ fn main() -> Result<()> {
let mut emulator = EmulatorState::new(memory_size as usize);
emulator.prepare(&program); // only loads the code
load_model_into_emulator(&mut emulator, &model.unwrap());

emulator.run();
return Ok(());
}
Expand All @@ -215,10 +224,26 @@ fn main() -> Result<()> {
if !discretize {
replace_memory(model.as_mut().unwrap());
}
let gate_model = bitblast_model(&model.unwrap(), true, 64);

let input = expect_arg::<PathBuf>(args, "input-file")?;
let program = load_object_file(input)?;
let mut emulator = EmulatorState::new(memory_size as usize);

if extract_witness {
emulator.bootstrap(&program, &argv);
}

let gate_model = bitblast_model(model.as_ref().unwrap(), true, 64);

if sat_solver != SatType::None {
solve_bad_states(&gate_model, sat_solver, terminate_on_bad, one_query)?
solve_bad_states(
&gate_model,
sat_solver,
terminate_on_bad,
one_query,
emulator,
extract_witness,
)?
}

if output_to_stdout {
Expand Down
12 changes: 11 additions & 1 deletion src/unicorn/bitblasting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,20 @@ pub struct GateModel {
pub mapping: HashMap<HashableNodeRef, Vec<GateRef>>, // maps a btor2 operator to its resulting bitvector of gates
pub mapping_adders: HashMap<HashableGateRef, GateRef>,
pub constraint_based_dependencies: HashMap<HashableGateRef, (NodeRef, NodeRef)>, // used or division and remainder, and when qubot whats to test an input (InputEvaluator).
pub witness: Option<Witness>,
}

#[derive(Debug, Eq, PartialEq)]
pub struct Witness {
pub name: String,
pub bad_state_gate: GateRef,
pub gate_assignment: HashMap<HashableGateRef, bool>,
pub input_bytes: Vec<usize>,
}

pub type GateRef = Rc<RefCell<Gate>>;

#[derive(Debug)]
#[derive(Debug, Eq, PartialEq)]
pub enum Gate {
ConstTrue,
ConstFalse,
Expand Down Expand Up @@ -1482,6 +1491,7 @@ impl<'a> BitBlasting<'a> {
mapping: self.mapping,
mapping_adders: self.mapping_adders,
constraint_based_dependencies: self.constraint_based_dependencies,
witness: None,
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/unicorn/bitblasting_dimacs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ impl CNFContainer for DimacsContainer {
fn record_variable_name(&mut self, var: Self::Variable, name: String) {
self.variable_names.push((var, name));
}

fn record_input(&mut self, _var: Self::Variable, _gate: &GateRef) {}
}

impl DimacsWriter {
Expand Down
2 changes: 1 addition & 1 deletion src/unicorn/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ impl ModelBuilder {
nid: self.current_nid,
memory: self.memory_counter_node.clone(),
address: address.clone(),
value: next_value_node.clone(),
value: next_value_node,
});

self.memory_counter_flow = self.new_ite(
Expand Down
6 changes: 6 additions & 0 deletions src/unicorn/cnf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub trait CNFContainer {
fn new_var(&mut self) -> Self::Variable;
fn add_clause(&mut self, literals: &[Self::Literal]);
fn record_variable_name(&mut self, var: Self::Variable, name: String);
fn record_input(&mut self, var: Self::Variable, gate: &GateRef);
}

pub struct CNFBuilder<C: CNFContainer> {
Expand Down Expand Up @@ -67,6 +68,10 @@ impl<C: CNFContainer> CNFBuilder<C> {
self.container.record_variable_name(var, name);
}

fn record_input(&mut self, var: C::Variable, gate: &GateRef) {
self.container.record_input(var, gate);
}

#[rustfmt::skip]
fn process(&mut self, gate: &GateRef) -> C::Variable {
match &*gate.borrow() {
Expand All @@ -83,6 +88,7 @@ impl<C: CNFContainer> CNFBuilder<C> {
Gate::InputBit { name } => {
let gate_var = self.next_var();
self.record_variable_name(gate_var, name.clone());
self.record_input(gate_var, gate);
gate_var
}
Gate::Not { value } => {
Expand Down
1 change: 1 addition & 0 deletions src/unicorn/dimacs_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ impl DimacsParser {
mapping: HashMap::new(),
mapping_adders: HashMap::new(),
constraint_based_dependencies: HashMap::new(),
witness: None,
}
}

Expand Down
Loading

0 comments on commit 3a9ffcd

Please sign in to comment.