From 1e0f9ca198046897a9b111d8c1df6a705a4f0c20 Mon Sep 17 00:00:00 2001 From: Michael Rodler Date: Thu, 28 Sep 2023 13:54:33 +0000 Subject: [PATCH 1/6] Refactored mutation functions to offer a generic toolkit for mutating data. * Added a `TextInput` type that implements mutators useful only for text-based inputs. * Added several havoc mutation helper functions usable by custom mutators. * Added helper functions to mutate arrays/vec of numbers. * Generalized a couple of helper functions to be generic over all copy types, etc. --- Cargo.toml | 2 + src/fuzz_input.rs | 705 +---------------- src/fuzzer.rs | 1 - src/input_types/bytes.rs | 700 +++++++++++++++++ src/input_types/mod.rs | 11 + src/input_types/text.rs | 305 ++++++++ src/lib.rs | 3 +- src/mutators.rs | 502 ------------ .../bytes/expensive.rs} | 4 +- src/mutators/bytes/helpers.rs | 560 ++++++++++++++ src/mutators/bytes/mod.rs | 242 ++++++ src/mutators/havoc.rs | 723 ++++++++++++++++++ src/mutators/helpers.rs | 141 ++++ src/mutators/mod.rs | 7 + src/mutators/numbers.rs | 261 +++++++ src/mutators/text/helpers.rs | 464 +++++++++++ src/mutators/text/mod.rs | 273 +++++++ src/utils.rs | 112 ++- 18 files changed, 3804 insertions(+), 1212 deletions(-) create mode 100644 src/input_types/bytes.rs create mode 100644 src/input_types/mod.rs create mode 100644 src/input_types/text.rs delete mode 100644 src/mutators.rs rename src/{expensive_mutators.rs => mutators/bytes/expensive.rs} (97%) create mode 100644 src/mutators/bytes/helpers.rs create mode 100644 src/mutators/bytes/mod.rs create mode 100644 src/mutators/havoc.rs create mode 100644 src/mutators/helpers.rs create mode 100644 src/mutators/mod.rs create mode 100644 src/mutators/numbers.rs create mode 100644 src/mutators/text/helpers.rs create mode 100644 src/mutators/text/mod.rs diff --git a/Cargo.toml b/Cargo.toml index da0858e..53efff0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,8 @@ ahash = "0.8" rustc-hash = "1.1.0" indexmap = "2.1" smallvec = { version = "1", features = ["union", "const_generics"] } +regex = "1" +num-traits = "0.2" # For addr2line implementation addr2line = "0.21" diff --git a/src/fuzz_input.rs b/src/fuzz_input.rs index 8625667..d88b4f5 100644 --- a/src/fuzz_input.rs +++ b/src/fuzz_input.rs @@ -1,30 +1,22 @@ //! Provides an abstraction around various types of fuzz inputs -use crate::expensive_mutators; -use crate::feedback::FeedbackLog; - -use crate::mutators; -use crate::rng::Rng; - use anyhow::Result; -use rand::{Rng as _, RngCore}; - use serde::{Deserialize, Serialize}; use serde_hex::{CompactPfx, SerHex}; +#[cfg(feature = "redqueen")] +use rustc_hash::FxHashSet; + use std::fmt::Debug; use std::hash::Hash; use std::path::Path; use std::sync::{Arc, RwLock}; -#[cfg(feature = "redqueen")] -use crate::cmp_analysis::RedqueenRule; - -#[cfg(feature = "redqueen")] -use crate::cmp_analysis::RedqueenCoverage; +use crate::feedback::FeedbackLog; +use crate::rng::Rng; #[cfg(feature = "redqueen")] -use rustc_hash::FxHashSet; +use crate::cmp_analysis::RedqueenRule; /// Returned by the [`FuzzInput::minimize`] function to signal how to progress further. #[derive(Debug, Clone, Copy, Default)] @@ -249,691 +241,6 @@ impl NullMinimizerState { } } -impl FuzzInput for Vec { - #[cfg(feature = "redqueen")] - type RuleCandidate = (usize, Endian); - - fn from_bytes(bytes: &[u8]) -> Result { - Ok(bytes.to_vec()) - } - - fn to_bytes(&self, output: &mut Vec) -> Result<()> { - // Write this Vec into the output allocation - output.extend(self); - - Ok(()) - } - - fn mutate( - input: &mut Self, - corpus: &[Arc>], - rng: &mut Rng, - dictionary: &Option>>, - min_length: usize, - max_length: usize, - max_mutations: u64, - #[cfg(feature = "redqueen")] redqueen_rules: Option<&FxHashSet>, - ) -> Vec { - // Get the number of changes to make to the input - let num_change = (rng.next_u64() % max_mutations).max(1); - - // Mutations applied to this input - let mut mutations: Vec = Vec::new(); - - // Perform some number of mutations on the input - for _ in 0..num_change { - // Special case the redqueen mutation if there are available rules - #[cfg(feature = "redqueen")] - if let Some(rules) = redqueen_rules { - let total_mutators = Self::mutators().len() + Self::expensive_mutators().len(); - - if !rules.is_empty() && rng.gen::() % total_mutators == 0 { - // Select one of the redqueen rules - let rule_index = rng.gen::() % rules.len(); - let Some(curr_rule) = rules.iter().nth(rule_index) else { - continue; - }; - - // Select one of the possible locations in the input to apply this rule - let candidates = input.get_redqueen_rule_candidates(curr_rule); - if candidates.is_empty() { - /* - log::warn!( - "Found no candidates for this rule: {:#x} {curr_rule:x?}", - input.fuzz_hash() - ); - */ - continue; - } - - let candidate_index = rng.gen::() % candidates.len(); - let curr_candidate = &candidates[candidate_index]; - - // Apply the redqueen rule to the current input - if let Some(mutation) = input.apply_redqueen_rule(curr_rule, curr_candidate) { - mutations.push(format!("RQ_MUTATE_{mutation}")); - } - - continue; - } - } - - // Choose which mutators to use for this mutation. Expensive mutators are - // harder to hit since they are a bit more costly - let curr_mutators = if rng.next_u64() % max_mutations * 5 == 0 { - Self::expensive_mutators() - } else { - Self::mutators() - }; - - // Select one of the mutators - let mutator_index = rng.gen::() % curr_mutators.len(); - let mutator_func = curr_mutators[mutator_index]; - - // Execute the mutator - if let Some(mutation) = mutator_func(input, corpus, rng, dictionary) { - mutations.push(mutation); - } - } - - // Ensure the input fits in the maximum length - input.truncate(max_length); - - // Extend the input to minimum length - for _byte in 0..min_length.saturating_sub(input.len()) { - input.push(rng.next() as u8); - } - - // Return the mutation applied - mutations - } - - type MinState = BytesMinimizeState; - - fn init_minimize(&mut self) -> (Self::MinState, MinimizeControlFlow) { - ( - if self.is_empty() { - BytesMinimizeState::End - } else { - BytesMinimizeState::StartTruncate - }, - MinimizeControlFlow::Continue, - ) - } - - /// Minimize a `Vec` with a variety of different techniques - fn minimize( - &mut self, - state: &mut Self::MinState, - current_iteration: u32, - last_successful_iteration: u32, - rng: &mut Rng, - ) -> MinimizeControlFlow { - // Cannot minimize an empty input - if self.is_empty() { - *state = BytesMinimizeState::End; - return MinimizeControlFlow::Stop; - } - let last_succeeded = current_iteration == (last_successful_iteration + 1); - use BytesMinimizeState::*; - let mut cf = MinimizeControlFlow::Continue; - // Advance the state machine - *state = match *state { - // == deterministic minimization steps === - StartTruncate => { - cf = MinimizeControlFlow::one_more(); - FindTruncate(0, self.len()) - } - FindTruncate(low, high) => { - if low < high && low < self.len() { - cf = MinimizeControlFlow::one_more(); - let index = (low + high) / 2; - if last_succeeded { - FindTruncate(low, index - 1) - } else { - FindTruncate(index + 1, high) - } - } else { - // otherwise we transition to the next strategy. Replace with a constant - // starting from the back. We start by replacing whole - if self.len() >= 8 { - cf = MinimizeControlFlow::ContinueFor((self.len() / 8).try_into().unwrap()); - ReplaceConstBytes(self.len() - 8, &[b'A'; 8]) - } else { - cf = MinimizeControlFlow::ContinueFor(self.len().try_into().unwrap()); - Replace(self.len() - 1, b'A') - } - } - } - ReplaceConstBytes(0, data) => { - if data == [b'A'; 8] && self.len() >= 8 { - cf = MinimizeControlFlow::ContinueFor((self.len() / 8).try_into().unwrap()); - ReplaceConstBytes(self.len() - 8, &[0_u8; 8]) - } else { - cf = MinimizeControlFlow::ContinueFor(self.len().try_into().unwrap()); - Replace(self.len() - 1, 0) - } - } - ReplaceConstBytes(index, data) => ReplaceConstBytes(index.saturating_sub(8), data), - Replace(0, b'A') => { - cf = MinimizeControlFlow::ContinueFor(self.len().try_into().unwrap()); - Replace(self.len() - 1, 0) - } - Replace(0, 0) => Slices, - Replace(index, what) => Replace(index - 1, what), - // == probabilistic minimization steps === - // loop endlessly between states as long as the fuzzer wants - Slices => MultiBytes, - MultiBytes => SingleBytes, - SingleBytes => Slices, - // == end state === - End => End, - }; - - log::trace!("minimize with {:?}", state); - - // Perform the minimization strategy for this state - match *state { - StartTruncate => { - return MinimizeControlFlow::Skip; - } - FindTruncate(low, high) => { - let index = (low + high) / 2; - self.truncate(index); - } - ReplaceConstBytes(index, byte_slice) => { - let repl_len = byte_slice.len(); - if self[index..].len() >= repl_len { - self[index..(index + repl_len)].copy_from_slice(byte_slice); - } else { - return MinimizeControlFlow::Skip; - } - } - Replace(index, byte) => { - if let Some(b) = self.get_mut(index) { - if *b != byte { - *b = byte; - } else { - return MinimizeControlFlow::Skip; - } - } else { - return MinimizeControlFlow::Skip; - } - } - Slices => { - let curr_input_len = self.len(); - - let a = rng.gen_range(0..curr_input_len); - let b = rng.gen_range(0..curr_input_len); - let (first, second) = if a < b { (a, b) } else { (b, a) }; - self.splice(first..second, []); - } - MultiBytes => { - let count = rng.gen_range(0u32..32); - for _ in 0..count { - let curr_input_len = self.len(); - if curr_input_len > 1 { - let index = rng.gen_range(0..curr_input_len); - self.remove(index); - } else { - break; - } - } - } - SingleBytes => { - let index = rng.gen_range(0..self.len()); - self.remove(index); - } - End => { - return MinimizeControlFlow::Stop; - } - } - - if current_iteration > (3 * last_successful_iteration) - && matches!(state, Slices | MultiBytes | SingleBytes) - { - log::debug!("At iteration {current_iteration} and no progress since {last_successful_iteration} - giving up"); - *state = End; - MinimizeControlFlow::Stop - } else { - cf - } - } - - #[cfg(feature = "redqueen")] - fn apply_redqueen_rule( - &mut self, - rule: &RedqueenRule, - candidate: &Self::RuleCandidate, - ) -> Option { - let (index, endian) = candidate; - - match rule { - RedqueenRule::Primitive(from, to) => { - // Use the minimum number of bytes to compare values (removing the leading zeros) - let from_min_bytes = from.minimum_bytes(); - let new_size = to.minimum_bytes().max(from.minimum_bytes()); - let bytes: &[u8] = &to.as_bytes()[..new_size]; - - if from_min_bytes == new_size { - // Ensure we can actually fit the rule in the current input - if *index + new_size >= self.len() { - return None; - } - - match endian { - Endian::Little => { - self[*index..*index + new_size].copy_from_slice(&bytes); - } - Endian::Big => { - for (offset, byte) in bytes.iter().rev().enumerate() { - self[*index + offset] = *byte; - } - } - }; - } else { - // If the lengths are different, replace the from bytes with the to - // bytes via .splice() - - /* - log::info!( - "Replacing {index:#x} {from:?} {:?} {:x?}", - *index..*index + from_min_bytes, - &bytes - ); - - let end = (*index + 0x10).min(self.len()); - log::info!("BEFORE: {:x?}", &self[*index..end]); - */ - match endian { - Endian::Little => { - self.splice(*index..*index + from_min_bytes, bytes.iter().copied()); - } - Endian::Big => { - self.splice( - *index..*index + from_min_bytes, - bytes.iter().rev().copied(), - ); - } - }; - /* - let end = (*index + 0x10).min(self.len()); - log::info!("AFTER: {:x?}", &self[*index..end]); - */ - } - - Some(format!("{to:x?}_offset_{index:#x}")) - } - RedqueenRule::SingleF32(_from, to) => { - let size = to.len(); - - // Ensure we can actually fit the rule in the current input - if *index + size >= self.len() { - return None; - } - - let bytes = to; - - match endian { - Endian::Little => { - self[*index..*index + size].copy_from_slice(bytes); - } - Endian::Big => { - for (offset, byte) in bytes.iter().rev().enumerate() { - self[*index + offset] = *byte; - } - } - }; - - Some(format!("f32_{to:x?}_offset_{index:#x}")) - } - RedqueenRule::SingleF64(_from, to) => { - let size = to.len(); - - // Ensure we can actually fit the rule in the current input - if *index + size >= self.len() { - return None; - } - - let bytes = to; - - match endian { - Endian::Little => { - self[*index..*index + size].copy_from_slice(bytes); - } - Endian::Big => { - for (offset, byte) in bytes.iter().rev().enumerate() { - self[*index + offset] = *byte; - } - } - }; - - Some(format!("f64_{to:x?}_offset_{index:#x}")) - } - RedqueenRule::SingleF80(_from, to) => { - let size = to.len(); - - // Ensure we can actually fit the rule in the current input - if *index + size >= self.len() { - return None; - } - - let val = extended::Extended::from_le_bytes(*to).to_f64(); - - self[*index..*index + size].copy_from_slice(to); - Some(format!("f80_{to:x?}_{val}_offset_{index:#x}")) - } - RedqueenRule::Bytes(from, to) => { - let index: usize = *index; - - let len = if to.len() == from.len() { - // Both from and to are the same size, directly copy the bytes - let size = to.len(); - self[index..index + size].copy_from_slice(to); - - to.len() - } else { - // If the "to" bytes is longer than the "from" needle, splice the "to" bytes - // where the from needle was - let mut new_length = self.len(); - if to.len() > from.len() { - new_length += to.len() - from.len(); - } - if from.len() > to.len() { - new_length -= from.len() - to.len(); - } - - let mut new_self = vec![0_u8; new_length]; - new_self[..index].copy_from_slice(&self[..index]); - new_self[index..index + to.len()].copy_from_slice(to); - new_self[index + to.len()..].copy_from_slice(&self[index + from.len()..]); - *self = new_self; - - new_length - }; - - Some(format!("Bytes_offset_{index:#x}_len_{len:#x}")) - } - } - } - - #[cfg(feature = "redqueen")] - fn entropy_limit(&self) -> usize { - self.len() - } - - #[allow(clippy::cast_possible_truncation, clippy::doc_markdown)] - #[cfg(feature = "redqueen")] - fn increase_entropy(&mut self, rng: &mut Rng, start: usize, end: usize) -> Result<()> { - use rand::Fill; - - // Randomize these bytes - Ok(self[start..end].try_fill(rng)?) - } - - /// Get a list of all of the [`RuleCandidate`]s that the given `rule` can be applied to. These - /// candidates are then passed to `apply_redqueen_rule` to deterministically search the - /// applicable redqueen search space for this input - #[allow( - unused_variables, - clippy::cast_possible_truncation, - clippy::absurd_extreme_comparisons, - clippy::unnecessary_cast - )] - #[cfg(feature = "redqueen")] - fn get_redqueen_rule_candidates(&self, rule: &RedqueenRule) -> Vec { - get_redqueen_rule_candidates_for_vec(self, rule, GetRuleMode::All) - } - - #[cfg(feature = "redqueen")] - fn has_redqueen_rule_candidates(&self, rule: &RedqueenRule) -> bool { - !get_redqueen_rule_candidates_for_vec(self, rule, GetRuleMode::Fast).is_empty() - } - - /// Current mutators available for mutation - fn mutators() -> &'static [Self::MutatorFunc] { - &[ - mutators::bit_flip, - mutators::byte_flip, - mutators::byte_delete, - mutators::set_random_u8, - mutators::set_random_u16, - mutators::set_random_u32, - mutators::set_random_u64, - mutators::splice_corpus, - mutators::byte_inc, - mutators::byte_dec, - mutators::splice_input, - mutators::replace_with_interesting, - mutators::set_input_slice, - mutators::overwrite_from_dictionary, - mutators::byte_insert, - ] - } - - /// Current expensive mutators available for mutation (typically those which allocate) - fn expensive_mutators() -> &'static [Self::MutatorFunc] { - &[ - expensive_mutators::splice_corpus_extend, - expensive_mutators::splice_from_dictionary_extend, - expensive_mutators::remove_slice, - ] - } - - /// Generate a random `Vec` of `max_length` size - fn generate( - _corpus: &[Arc>], - rng: &mut Rng, - _dictionary: &Option>>, - min_length: usize, - max_length: usize, - ) -> InputWithMetadata { - debug_assert!(max_length > 1); - - // generate input with random length, but make it a power of two most of the time - let mut len = rng.gen_range(min_length..max_length); - if rng.gen_bool(0.8) { - len = len.next_power_of_two().max(max_length); - } - - // in 80% of the cases; generate high entropy random input - let result = if rng.gen_bool(0.8) { - let mut result = vec![0u8; len]; - rng.fill_bytes(&mut result); - result - } else { - // in 10% use a low entropy input - let b = 0x41 + rng.gen_range(0..26); - vec![b; len] - }; - InputWithMetadata::from_input(result) - } - - /// return shannon byte entropy for the bytes slice - fn entropy_metric(&self) -> Option { - Some(crate::utils::byte_entropy(self)) - } - - /// just return the length of the current byte buffer - fn len(&self) -> Option { - Some(self.len()) - } -} - -/// The mode to get redqueen rule candidates for Vec -#[cfg(feature = "redqueen")] -#[derive(Copy, Clone, Eq, PartialEq)] -enum GetRuleMode { - /// Return from the function after the first found candidate - Fast, - - /// Return after all candidates are found - All, -} - -/// Get the rule candidates for a Vec. If mode is Fast, return the candidates on the first found -/// candidate. -#[cfg(feature = "redqueen")] -fn get_redqueen_rule_candidates_for_vec( - input: &Vec, - rule: &RedqueenRule, - mode: GetRuleMode, -) -> Vec< as FuzzInput>::RuleCandidate> { - let mut candidates = Vec::new(); - - let fast = matches!(mode, GetRuleMode::Fast); - - match rule { - RedqueenRule::Primitive(from, _to) => { - // Use the minimum number of bytes to compare values (removing the leading zeros) - let from_min_bytes = from.minimum_bytes(); - // let to_min_bytes = to.minimum_bytes(); - - let size = from_min_bytes; - let from_le_bytes = &from.as_bytes()[..size]; - - let same_big_endian = from_le_bytes - .iter() - .zip(from_le_bytes.iter().rev()) - .all(|(x, y)| *x == *y); - - for index in 0..input.len().saturating_sub(size) { - let curr_window = &input[index..index + size]; - - if curr_window == from_le_bytes { - candidates.push((index, Endian::Little)); - if fast { - return candidates; - } - } - - // Only look for big endian operand redqueen if big != little endians - if !same_big_endian { - let from_be_bytes = from_le_bytes.iter().rev(); - - if curr_window.iter().zip(from_be_bytes).all(|(x, y)| *x == *y) { - candidates.push((index, Endian::Big)); - if fast { - return candidates; - } - } - } - } - } - RedqueenRule::SingleF32(from, _to) => { - if input.len() >= from.len() { - for i in 0..input.len().saturating_sub(from.len() - 1) { - if &input[i..i + from.len()] == from.as_slice() { - candidates.push((i, Endian::Little)); - if fast { - return candidates; - } - } - } - } - } - RedqueenRule::SingleF64(from, _to) => { - if input.len() >= from.len() { - for i in 0..input.len().saturating_sub(from.len() - 1) { - if &input[i..i + from.len()] == from { - candidates.push((i, Endian::Little)); - if fast { - return candidates; - } - } - } - } - } - RedqueenRule::SingleF80(from, _to) => { - if input.len() >= from.len() { - for i in 0..input.len().saturating_sub(from.len() - 1) { - if &input[i..i + from.len()] == from { - candidates.push((i, Endian::Little)); - if fast { - return candidates; - } - } - } - } - } - RedqueenRule::Bytes(from, _to) => { - if input.len() >= from.len() { - for i in 0..input.len().saturating_sub(from.len() - 1) { - if &input[i..i + from.len()] == from { - candidates.push((i, Endian::Little)); - if fast { - return candidates; - } - } - } - } - } - } - - candidates -} - -/// Stages for the minimization process of a byte string (e.g., `Vec`). -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub enum BytesMinimizeState { - /// Start a binary search to identify the right length to truncate the testcase to. - StartTruncate, - - /// Attempt to truncate to the given size. - FindTruncate(usize, usize), - - /// Replace unused sub-slices in the input with a constant slice. - ReplaceConstBytes(usize, &'static [u8]), - - /// Replace unused bytes in the input with a constant byte. - Replace(usize, u8), - - /// Delete a randomly-selected sub-slice of the input to make the input smaller. - Slices, - - /// Randomly select and delete multiple bytes in the input to make the input smaller. - MultiBytes, - - /// Delete a single randomly-selected byte in the input to make the input smaller. - SingleBytes, - - /// signal immediate stop of minimization. - End, -} - -impl MinimizerState for BytesMinimizeState { - /// test if the given state represents a stop state - fn is_stop_state(&self) -> bool { - matches!(self, Self::End) - } -} - -impl Default for BytesMinimizeState { - fn default() -> Self { - Self::End - } -} - -/// Endianness for the redqueen rules -#[derive(Debug)] -pub enum Endian { - /// Little endian - Little, - /// Big endian - Big, -} - -/// A particular type of new coverage -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum CoverageType { - /// Only a new address was found - Address(u64), - - /// A new coverage with rflags and hit count provided by redqueen - #[cfg(feature = "redqueen")] - Redqueen(RedqueenCoverage), -} - /// Metadata about a crashing input or a new input that found new coverage. #[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Default)] pub struct InputMetadata { diff --git a/src/fuzzer.rs b/src/fuzzer.rs index b99d000..6f759e1 100644 --- a/src/fuzzer.rs +++ b/src/fuzzer.rs @@ -15,7 +15,6 @@ use std::sync::{Arc, RwLock}; use crate::addrs::{Cr3, VirtAddr}; use crate::cmp_analysis::RedqueenRule; -use crate::expensive_mutators; use crate::feedback::FeedbackTracker; use crate::filesystem::FileSystem; use crate::fuzz_input::{FuzzInput, InputMetadata, InputWithMetadata}; diff --git a/src/input_types/bytes.rs b/src/input_types/bytes.rs new file mode 100644 index 0000000..04c9598 --- /dev/null +++ b/src/input_types/bytes.rs @@ -0,0 +1,700 @@ +//! Implementation of the [`FuzzInput`] trait for [`Vec`]. + +use anyhow::Result; +use rand::{Rng as _, RngCore}; +#[cfg(feature = "redqueen")] +use rustc_hash::FxHashSet; +use std::fmt::Debug; +use std::sync::Arc; + +use crate::fuzz_input::{FuzzInput, InputWithMetadata, MinimizeControlFlow, MinimizerState}; +use crate::mutators; +use crate::rng::Rng; + +#[cfg(feature = "redqueen")] +use crate::cmp_analysis::{RedqueenCoverage, RedqueenRule}; + +impl FuzzInput for Vec { + #[cfg(feature = "redqueen")] + type RuleCandidate = (usize, Endian); + + fn from_bytes(bytes: &[u8]) -> Result { + Ok(bytes.to_vec()) + } + + fn to_bytes(&self, output: &mut Vec) -> Result<()> { + // Write this Vec into the output allocation + output.extend(self); + + Ok(()) + } + + fn mutate( + input: &mut Self, + corpus: &[Arc>], + rng: &mut Rng, + dictionary: &Option>>, + min_length: usize, + max_length: usize, + max_mutations: u64, + #[cfg(feature = "redqueen")] redqueen_rules: Option<&FxHashSet>, + ) -> Vec { + // Get the number of changes to make to the input + let num_change = (rng.next_u64() % max_mutations).max(1); + + // Mutations applied to this input + let mut mutations: Vec = Vec::new(); + + // Perform some number of mutations on the input + for _ in 0..num_change { + // Special case the redqueen mutation if there are available rules + #[cfg(feature = "redqueen")] + if let Some(rules) = redqueen_rules { + let total_mutators = Self::mutators().len() + Self::expensive_mutators().len(); + + if !rules.is_empty() && rng.gen::() % total_mutators == 0 { + // Select one of the redqueen rules + let rule_index = rng.gen::() % rules.len(); + let Some(curr_rule) = rules.iter().nth(rule_index) else { + continue; + }; + + // Select one of the possible locations in the input to apply this rule + let candidates = input.get_redqueen_rule_candidates(curr_rule); + if candidates.is_empty() { + /* + log::warn!( + "Found no candidates for this rule: {:#x} {curr_rule:x?}", + input.fuzz_hash() + ); + */ + continue; + } + + let candidate_index = rng.gen::() % candidates.len(); + let curr_candidate = &candidates[candidate_index]; + + // Apply the redqueen rule to the current input + if let Some(mutation) = input.apply_redqueen_rule(curr_rule, curr_candidate) { + mutations.push(format!("RQ_MUTATE_{mutation}")); + } + + continue; + } + } + + // Choose which mutators to use for this mutation. Expensive mutators are + // harder to hit since they are a bit more costly + let curr_mutators = if rng.next_u64() % max_mutations * 5 == 0 { + Self::expensive_mutators() + } else { + Self::mutators() + }; + + // Select one of the mutators + let mutator_index = rng.gen::() % curr_mutators.len(); + let mutator_func = curr_mutators[mutator_index]; + + // Execute the mutator + if let Some(mutation) = mutator_func(input, corpus, rng, dictionary) { + mutations.push(mutation); + } + } + + // Ensure the input fits in the maximum length + input.truncate(max_length); + + // Extend the input to minimum length + for _byte in 0..min_length.saturating_sub(input.len()) { + input.push(rng.next() as u8); + } + + // Return the mutation applied + mutations + } + + type MinState = BytesMinimizeState; + + fn init_minimize(&mut self) -> (Self::MinState, MinimizeControlFlow) { + ( + if self.is_empty() { + BytesMinimizeState::End + } else { + BytesMinimizeState::StartTruncate + }, + MinimizeControlFlow::Continue, + ) + } + + /// Minimize a `Vec` with a variety of different techniques + fn minimize( + &mut self, + state: &mut Self::MinState, + current_iteration: u32, + last_successful_iteration: u32, + rng: &mut Rng, + ) -> MinimizeControlFlow { + // Cannot minimize an empty input + if self.is_empty() { + *state = BytesMinimizeState::End; + return MinimizeControlFlow::Stop; + } + let last_succeeded = current_iteration == (last_successful_iteration + 1); + use BytesMinimizeState::*; + let mut cf = MinimizeControlFlow::Continue; + // Advance the state machine + *state = match *state { + // == deterministic minimization steps === + StartTruncate => { + cf = MinimizeControlFlow::one_more(); + FindTruncate(0, self.len()) + } + FindTruncate(low, high) => { + if low < high && low < self.len() { + cf = MinimizeControlFlow::one_more(); + let index = (low + high) / 2; + if last_succeeded { + FindTruncate(low, index - 1) + } else { + FindTruncate(index + 1, high) + } + } else { + // otherwise we transition to the next strategy. Replace with a constant + // starting from the back. We start by replacing whole + if self.len() >= 8 { + cf = MinimizeControlFlow::ContinueFor((self.len() / 8).try_into().unwrap()); + ReplaceConstBytes(self.len() - 8, &[b'A'; 8]) + } else { + cf = MinimizeControlFlow::ContinueFor(self.len().try_into().unwrap()); + Replace(self.len() - 1, b'A') + } + } + } + ReplaceConstBytes(0, data) => { + if data == [b'A'; 8] && self.len() >= 8 { + cf = MinimizeControlFlow::ContinueFor((self.len() / 8).try_into().unwrap()); + ReplaceConstBytes(self.len() - 8, &[0_u8; 8]) + } else { + cf = MinimizeControlFlow::ContinueFor(self.len().try_into().unwrap()); + Replace(self.len() - 1, 0) + } + } + ReplaceConstBytes(index, data) => ReplaceConstBytes(index.saturating_sub(8), data), + Replace(0, b'A') => { + cf = MinimizeControlFlow::ContinueFor(self.len().try_into().unwrap()); + Replace(self.len() - 1, 0) + } + Replace(0, 0) => Slices, + Replace(index, what) => Replace(index - 1, what), + // == probabilistic minimization steps === + // loop endlessly between states as long as the fuzzer wants + Slices => MultiBytes, + MultiBytes => SingleBytes, + SingleBytes => Slices, + // == end state === + End => End, + }; + + log::trace!("minimize with {:?}", state); + + // Perform the minimization strategy for this state + match *state { + StartTruncate => { + return MinimizeControlFlow::Skip; + } + FindTruncate(low, high) => { + let index = (low + high) / 2; + self.truncate(index); + } + ReplaceConstBytes(index, byte_slice) => { + let repl_len = byte_slice.len(); + if self[index..].len() >= repl_len { + self[index..(index + repl_len)].copy_from_slice(byte_slice); + } else { + return MinimizeControlFlow::Skip; + } + } + Replace(index, byte) => { + if let Some(b) = self.get_mut(index) { + if *b != byte { + *b = byte; + } else { + return MinimizeControlFlow::Skip; + } + } else { + return MinimizeControlFlow::Skip; + } + } + Slices => { + let curr_input_len = self.len(); + + let a = rng.gen_range(0..curr_input_len); + let b = rng.gen_range(0..curr_input_len); + let (first, second) = if a < b { (a, b) } else { (b, a) }; + self.splice(first..second, []); + } + MultiBytes => { + let count = rng.gen_range(0u32..32); + for _ in 0..count { + let curr_input_len = self.len(); + if curr_input_len > 1 { + let index = rng.gen_range(0..curr_input_len); + self.remove(index); + } else { + break; + } + } + } + SingleBytes => { + let index = rng.gen_range(0..self.len()); + self.remove(index); + } + End => { + return MinimizeControlFlow::Stop; + } + } + + if current_iteration > (3 * last_successful_iteration) + && matches!(state, Slices | MultiBytes | SingleBytes) + { + log::debug!("At iteration {current_iteration} and no progress since {last_successful_iteration} - giving up"); + *state = End; + MinimizeControlFlow::Stop + } else { + cf + } + } + + #[cfg(feature = "redqueen")] + fn apply_redqueen_rule( + &mut self, + rule: &RedqueenRule, + candidate: &Self::RuleCandidate, + ) -> Option { + let (index, endian) = candidate; + + match rule { + RedqueenRule::Primitive(from, to) => { + // Use the minimum number of bytes to compare values (removing the leading zeros) + let from_min_bytes = from.minimum_bytes(); + let new_size = to.minimum_bytes().max(from.minimum_bytes()); + let bytes: &[u8] = &to.as_bytes()[..new_size]; + + if from_min_bytes == new_size { + // Ensure we can actually fit the rule in the current input + if *index + new_size >= self.len() { + return None; + } + + match endian { + Endian::Little => { + self[*index..*index + new_size].copy_from_slice(&bytes); + } + Endian::Big => { + for (offset, byte) in bytes.iter().rev().enumerate() { + self[*index + offset] = *byte; + } + } + }; + } else { + // If the lengths are different, replace the from bytes with the to + // bytes via .splice() + + /* + log::info!( + "Replacing {index:#x} {from:?} {:?} {:x?}", + *index..*index + from_min_bytes, + &bytes + ); + + let end = (*index + 0x10).min(self.len()); + log::info!("BEFORE: {:x?}", &self[*index..end]); + */ + match endian { + Endian::Little => { + self.splice(*index..*index + from_min_bytes, bytes.iter().copied()); + } + Endian::Big => { + self.splice( + *index..*index + from_min_bytes, + bytes.iter().rev().copied(), + ); + } + }; + /* + let end = (*index + 0x10).min(self.len()); + log::info!("AFTER: {:x?}", &self[*index..end]); + */ + } + + Some(format!("{to:x?}_offset_{index:#x}")) + } + RedqueenRule::SingleF32(_from, to) => { + let size = to.len(); + + // Ensure we can actually fit the rule in the current input + if *index + size >= self.len() { + return None; + } + + let bytes = to; + + match endian { + Endian::Little => { + self[*index..*index + size].copy_from_slice(bytes); + } + Endian::Big => { + for (offset, byte) in bytes.iter().rev().enumerate() { + self[*index + offset] = *byte; + } + } + }; + + Some(format!("f32_{to:x?}_offset_{index:#x}")) + } + RedqueenRule::SingleF64(_from, to) => { + let size = to.len(); + + // Ensure we can actually fit the rule in the current input + if *index + size >= self.len() { + return None; + } + + let bytes = to; + + match endian { + Endian::Little => { + self[*index..*index + size].copy_from_slice(bytes); + } + Endian::Big => { + for (offset, byte) in bytes.iter().rev().enumerate() { + self[*index + offset] = *byte; + } + } + }; + + Some(format!("f64_{to:x?}_offset_{index:#x}")) + } + RedqueenRule::SingleF80(_from, to) => { + let size = to.len(); + + // Ensure we can actually fit the rule in the current input + if *index + size >= self.len() { + return None; + } + + let val = extended::Extended::from_le_bytes(*to).to_f64(); + + self[*index..*index + size].copy_from_slice(to); + Some(format!("f80_{to:x?}_{val}_offset_{index:#x}")) + } + RedqueenRule::Bytes(from, to) => { + let index: usize = *index; + + let len = if to.len() == from.len() { + // Both from and to are the same size, directly copy the bytes + let size = to.len(); + self[index..index + size].copy_from_slice(to); + + to.len() + } else { + // If the "to" bytes is longer than the "from" needle, splice the "to" bytes + // where the from needle was + let mut new_length = self.len(); + if to.len() > from.len() { + new_length += to.len() - from.len(); + } + if from.len() > to.len() { + new_length -= from.len() - to.len(); + } + + let mut new_self = vec![0_u8; new_length]; + new_self[..index].copy_from_slice(&self[..index]); + new_self[index..index + to.len()].copy_from_slice(to); + new_self[index + to.len()..].copy_from_slice(&self[index + from.len()..]); + *self = new_self; + + new_length + }; + + Some(format!("Bytes_offset_{index:#x}_len_{len:#x}")) + } + } + } + + #[cfg(feature = "redqueen")] + fn entropy_limit(&self) -> usize { + self.len() + } + + #[allow(clippy::cast_possible_truncation, clippy::doc_markdown)] + #[cfg(feature = "redqueen")] + fn increase_entropy(&mut self, rng: &mut Rng, start: usize, end: usize) -> Result<()> { + use rand::Fill; + + // Randomize these bytes + Ok(self[start..end].try_fill(rng)?) + } + + /// Get a list of all of the [`RuleCandidate`]s that the given `rule` can be applied to. These + /// candidates are then passed to `apply_redqueen_rule` to deterministically search the + /// applicable redqueen search space for this input + #[allow( + unused_variables, + clippy::cast_possible_truncation, + clippy::absurd_extreme_comparisons, + clippy::unnecessary_cast + )] + #[cfg(feature = "redqueen")] + fn get_redqueen_rule_candidates(&self, rule: &RedqueenRule) -> Vec { + get_redqueen_rule_candidates_for_vec(self, rule, GetRuleMode::All) + } + + #[cfg(feature = "redqueen")] + fn has_redqueen_rule_candidates(&self, rule: &RedqueenRule) -> bool { + !get_redqueen_rule_candidates_for_vec(self, rule, GetRuleMode::Fast).is_empty() + } + + /// Current mutators available for mutation + fn mutators() -> &'static [Self::MutatorFunc] { + &[ + mutators::bytes::bit_flip, + mutators::bytes::byte_flip, + mutators::bytes::byte_inc, + mutators::bytes::byte_dec, + mutators::bytes::set_random_u8, + mutators::bytes::set_random_u16, + mutators::bytes::set_random_u32, + mutators::bytes::set_random_u64, + mutators::bytes::set_input_slice, + mutators::bytes::replace_with_interesting, + mutators::bytes::overwrite_from_dictionary, + mutators::bytes::splice_input, + mutators::bytes::splice_corpus, + mutators::bytes::byte_delete, + mutators::bytes::byte_insert, + ] + } + + /// Current expensive mutators available for mutation (typically those which allocate) + fn expensive_mutators() -> &'static [Self::MutatorFunc] { + &[ + mutators::bytes::expensive::splice_corpus_extend, + mutators::bytes::expensive::splice_from_dictionary_extend, + mutators::bytes::expensive::remove_slice, + ] + } + + /// Generate a random `Vec` of `max_length` size + fn generate( + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, + min_length: usize, + max_length: usize, + ) -> InputWithMetadata { + debug_assert!(max_length > 1); + + // generate input with random length, but make it a power of two most of the time + let mut len = rng.gen_range(min_length..max_length); + if rng.gen_bool(0.8) { + len = len.next_power_of_two().max(max_length); + } + + // in 80% of the cases; generate high entropy random input + let result = if rng.gen_bool(0.8) { + let mut result = vec![0u8; len]; + rng.fill_bytes(&mut result); + result + } else { + // in 10% use a low entropy input + let b = 0x41 + rng.gen_range(0..26); + vec![b; len] + }; + InputWithMetadata::from_input(result) + } + + /// return shannon byte entropy for the bytes slice + fn entropy_metric(&self) -> Option { + Some(crate::utils::byte_entropy(self)) + } + + /// just return the length of the current byte buffer + fn len(&self) -> Option { + Some(self.len()) + } +} + +/// The mode to get redqueen rule candidates for Vec +#[cfg(feature = "redqueen")] +#[derive(Copy, Clone, Eq, PartialEq)] +enum GetRuleMode { + /// Return from the function after the first found candidate + Fast, + + /// Return after all candidates are found + All, +} + +/// Get the rule candidates for a Vec. If mode is Fast, return the candidates on the first found +/// candidate. +#[cfg(feature = "redqueen")] +fn get_redqueen_rule_candidates_for_vec( + input: &Vec, + rule: &RedqueenRule, + mode: GetRuleMode, +) -> Vec< as FuzzInput>::RuleCandidate> { + let mut candidates = Vec::new(); + + let fast = matches!(mode, GetRuleMode::Fast); + + match rule { + RedqueenRule::Primitive(from, _to) => { + // Use the minimum number of bytes to compare values (removing the leading zeros) + let from_min_bytes = from.minimum_bytes(); + // let to_min_bytes = to.minimum_bytes(); + + let size = from_min_bytes; + let from_le_bytes = &from.as_bytes()[..size]; + + let same_big_endian = from_le_bytes + .iter() + .zip(from_le_bytes.iter().rev()) + .all(|(x, y)| *x == *y); + + for index in 0..input.len().saturating_sub(size) { + let curr_window = &input[index..index + size]; + + if curr_window == from_le_bytes { + candidates.push((index, Endian::Little)); + if fast { + return candidates; + } + } + + // Only look for big endian operand redqueen if big != little endians + if !same_big_endian { + let from_be_bytes = from_le_bytes.iter().rev(); + + if curr_window.iter().zip(from_be_bytes).all(|(x, y)| *x == *y) { + candidates.push((index, Endian::Big)); + if fast { + return candidates; + } + } + } + } + } + RedqueenRule::SingleF32(from, _to) => { + if input.len() >= from.len() { + for i in 0..input.len().saturating_sub(from.len() - 1) { + if &input[i..i + from.len()] == from.as_slice() { + candidates.push((i, Endian::Little)); + if fast { + return candidates; + } + } + } + } + } + RedqueenRule::SingleF64(from, _to) => { + if input.len() >= from.len() { + for i in 0..input.len().saturating_sub(from.len() - 1) { + if &input[i..i + from.len()] == from { + candidates.push((i, Endian::Little)); + if fast { + return candidates; + } + } + } + } + } + RedqueenRule::SingleF80(from, _to) => { + if input.len() >= from.len() { + for i in 0..input.len().saturating_sub(from.len() - 1) { + if &input[i..i + from.len()] == from { + candidates.push((i, Endian::Little)); + if fast { + return candidates; + } + } + } + } + } + RedqueenRule::Bytes(from, _to) => { + if input.len() >= from.len() { + for i in 0..input.len().saturating_sub(from.len() - 1) { + if &input[i..i + from.len()] == from { + candidates.push((i, Endian::Little)); + if fast { + return candidates; + } + } + } + } + } + } + + candidates +} + +/// Stages for the minimization process of a byte string (e.g., `Vec`). +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub enum BytesMinimizeState { + /// Start a binary search to identify the right length to truncate the testcase to. + StartTruncate, + + /// Attempt to truncate to the given size. + FindTruncate(usize, usize), + + /// Replace unused sub-slices in the input with a constant slice. + ReplaceConstBytes(usize, &'static [u8]), + + /// Replace unused bytes in the input with a constant byte. + Replace(usize, u8), + + /// Delete a randomly-selected sub-slice of the input to make the input smaller. + Slices, + + /// Randomly select and delete multiple bytes in the input to make the input smaller. + MultiBytes, + + /// Delete a single randomly-selected byte in the input to make the input smaller. + SingleBytes, + + /// signal immediate stop of minimization. + End, +} + +impl MinimizerState for BytesMinimizeState { + /// test if the given state represents a stop state + fn is_stop_state(&self) -> bool { + matches!(self, Self::End) + } +} + +impl Default for BytesMinimizeState { + fn default() -> Self { + Self::End + } +} + +/// Endianness for the redqueen rules +#[derive(Debug)] +pub enum Endian { + /// Little endian + Little, + /// Big endian + Big, +} + +/// A particular type of new coverage +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum CoverageType { + /// Only a new address was found + Address(u64), + + /// A new coverage with rflags and hit count provided by redqueen + #[cfg(feature = "redqueen")] + Redqueen(RedqueenCoverage), +} diff --git a/src/input_types/mod.rs b/src/input_types/mod.rs new file mode 100644 index 0000000..8c56220 --- /dev/null +++ b/src/input_types/mod.rs @@ -0,0 +1,11 @@ +//! Implementations of the [`FuzzInput`] trait for input types: +//! +//! * [`Vec`] - classic bytes only mutators. +//! * [`TextInput`] - Wrapper around `Vec` to provide mutations for text-based formats. +//! + +pub mod bytes; +pub mod text; + +pub use bytes::*; +pub use text::*; diff --git a/src/input_types/text.rs b/src/input_types/text.rs new file mode 100644 index 0000000..653439d --- /dev/null +++ b/src/input_types/text.rs @@ -0,0 +1,305 @@ +//! Implementation of the [`TextInput`], which represents a primarily ascii/text based input. +//! The trait [`FuzzInput`] is implemented for use with snapchange. + +#![allow(missing_docs)] + +use crate::fuzz_input::{FuzzInput, InputWithMetadata, MinimizeControlFlow, MinimizerState}; +use crate::mutators; +use crate::rng::Rng; + +use anyhow::Result; +#[cfg(feature = "redqueen")] +use rand::seq::SliceRandom; +use rand::{Rng as _, RngCore}; +#[cfg(feature = "redqueen")] +use rustc_hash::FxHashSet; +use std::fmt::Debug; +use std::hash::Hash; +use std::sync::Arc; + +#[cfg(feature = "redqueen")] +use crate::cmp_analysis::RedqueenRule; + +#[derive(Hash, PartialEq, Eq, Debug, Default, Clone)] +pub struct TextInput { + pub data: Vec, + pub(crate) delimiters: Option)>>, +} + +impl From for TextInput { + fn from(s: String) -> Self { + Self { + data: s.into_bytes(), + ..Default::default() + } + } +} + +impl From> for TextInput { + fn from(b: Vec) -> Self { + Self { + data: b, + ..Default::default() + } + } +} + +impl From<&[u8]> for TextInput { + fn from(b: &[u8]) -> Self { + Self { + data: b.to_vec(), + ..Default::default() + } + } +} + +impl<'a> TextInput { + pub fn len(&self) -> usize { + self.data.len() + } + + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + pub fn data_mut(&'a mut self) -> &'a mut Vec { + &mut self.data + } + + pub fn data(&'a self) -> &'a [u8] { + &self.data + } +} + +impl FuzzInput for TextInput { + fn from_bytes(bytes: &[u8]) -> Result { + Ok(bytes.into()) + } + + fn to_bytes(&self, output: &mut Vec) -> Result<()> { + output.clear(); + output.extend_from_slice(&self.data); + Ok(()) + } + + fn mutate( + input: &mut Self, + corpus: &[Arc>], + rng: &mut Rng, + dictionary: &Option>>, + min_length: usize, + max_length: usize, + max_mutations: u64, + #[cfg(feature = "redqueen")] redqueen_rules: Option<&FxHashSet>, + ) -> Vec { + assert!(max_length != 0); + + if input.is_empty() { + *input = Self::generate(corpus, rng, dictionary, min_length, max_length).input; + return vec!["Generate".to_string()]; + } + + // Get the number of changes to make to the input + let num_change = (rng.next_u64() % max_mutations).max(1) as usize; + + // Mutations applied to this input + let mut mutations: Vec = Vec::with_capacity(num_change); + + // Perform some number of mutations on the input + for _ in 0..num_change { + // Special case the redqueen mutation if there are available rules + #[cfg(feature = "redqueen")] + if let Some(rules) = redqueen_rules { + let total_mutators = Self::mutators().len() + Self::expensive_mutators().len(); + + if !rules.is_empty() && rng.gen::() % total_mutators == 0 { + // Select one of the redqueen rules + let rule_index = rng.gen::() % rules.len(); + let Some(curr_rule) = rules.iter().nth(rule_index) else { + continue; + }; + + // Select one of the possible locations in the input to apply this rule + let candidates = input.get_redqueen_rule_candidates(curr_rule); + if candidates.is_empty() { + /* + log::warn!( + "Found no candidates for this rule: {:#x} {curr_rule:x?}", + input.fuzz_hash() + ); + */ + continue; + } + + let curr_candidate = candidates.choose(rng).unwrap(); + + // Apply the redqueen rule to the current input + if let Some(mutation) = input.apply_redqueen_rule(curr_rule, curr_candidate) { + mutations.push(format!("RQ_MUTATE_{mutation}")); + } + + continue; + } + } + + // Choose which mutators to use for this mutation. Expensive mutators are + // harder to hit since they are a bit more costly + let curr_mutators = if rng.next_u64() % max_mutations * 5 == 0 { + Self::expensive_mutators() + } else { + Self::mutators() + }; + + // Select one of the mutators + let mutator_index = rng.gen::() % curr_mutators.len(); + let mutator_func = curr_mutators[mutator_index]; + + // Execute the mutator + if let Some(mutation) = mutator_func(input, corpus, rng, dictionary) { + mutations.push(mutation); + } + } + + // Ensure the input fits in the maximum length + input.data.truncate(max_length); + + // Return the mutation applied + mutations + } + + /// Current mutators available for mutation + fn mutators() -> &'static [Self::MutatorFunc] { + &[ + // basic mutators + mutators::text::char_replace, + mutators::text::replace_integer, + mutators::text::replace_hex_integer, + mutators::text::splice_within, + mutators::text::havoc_as_bytes, + // insert random strings, with the const param, being an upper bound to the number of + // inserted bytes. This ensures that we will do small modification much more often. + mutators::text::insert_repeated_chars::<4>, + mutators::text::insert_repeated_chars::<1024>, + mutators::text::insert_random_string::<4>, + mutators::text::insert_random_string::<8>, + mutators::text::insert_random_string::<1024>, + // dictionary-based mutations + mutators::text::splice_from_dictionary, + mutators::text::insert_from_dictionary, + // advanced text-focused insertion operators + mutators::text::insert_from_dictionary_separated_by::<'\n'>, + mutators::text::insert_from_dictionary_separated_by::<'\t'>, + mutators::text::insert_from_dictionary_separated_by::<' '>, + mutators::text::insert_from_dictionary_separated_by::<';'>, + mutators::text::insert_from_corpus_separated_by::<'\n'>, + mutators::text::insert_from_corpus_separated_by::<' '>, + mutators::text::insert_from_corpus_separated_by::<'\t'>, + mutators::text::insert_from_corpus_separated_by::<';'>, + // text-focused mutation operations: + // line-focused + mutators::text::dup_between_separator::<'\n'>, + mutators::text::delete_between_separator::<'\n'>, + // word-focused + mutators::text::dup_between_separator::<' '>, + mutators::text::delete_between_separator::<' '>, + mutators::text::dup_between_separator::<'\t'>, + mutators::text::delete_between_separator::<'\t'>, + // interesting for programming languages: + mutators::text::dup_between_separator::<';'>, + mutators::text::delete_between_separator::<';'>, + mutators::text::dup_between_separator::<','>, + mutators::text::delete_between_separator::<','>, + ] + } + + /// Current expensive mutators available for mutation (typically those which allocate) + fn expensive_mutators() -> &'static [Self::MutatorFunc] { + &[] + } + + fn generate( + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, + _min_length: usize, + max_length: usize, + ) -> InputWithMetadata { + InputWithMetadata::from_input( + mutators::text::helpers::random_ascii_string(rng, max_length).into(), + ) + } + + /// return shannon byte entropy for the bytes slice + fn entropy_metric(&self) -> Option { + Some(crate::utils::byte_entropy(self.data())) + } + + /// just return the length of the current byte buffer + fn len(&self) -> Option { + Some(self.data().len()) + } + + /// Redqueen implementation mostly wraps the bytes redqueen implementation. + /// TODO: implement text transformations, i.e., itoa/atoi. + + #[cfg(feature = "redqueen")] + type RuleCandidate = as FuzzInput>::RuleCandidate; + + /// Apply the given [`RedqueenRule`] to the current input using the given candidate + /// returning the mutation done + #[cfg(feature = "redqueen")] + fn apply_redqueen_rule( + &mut self, + rule: &RedqueenRule, + candidate: &Self::RuleCandidate, + ) -> Option { + let bytes = self.data_mut(); + bytes.apply_redqueen_rule(rule, candidate) + } + + /// Upper bound for the ranges produced during increasing entropy for redqueen + fn entropy_limit(&self) -> usize { + let bytes = &self.data; + bytes.entropy_limit() + } + + /// Increase entropy of the input between the given start and end values + #[cfg(feature = "redqueen")] + fn increase_entropy(&mut self, rng: &mut Rng, start: usize, end: usize) -> Result<()> { + let bytes = self.data_mut(); + bytes.increase_entropy(rng, start, end) + } + + /// Get a list of all of the `RuleCandidate`s that the given `rule` can be applied to. These + /// candidates are then passed to `apply_redqueen_rule` to deterministically search the + /// applicable redqueen search space for this input + #[cfg(feature = "redqueen")] + fn get_redqueen_rule_candidates(&self, rule: &RedqueenRule) -> Vec { + self.data.get_redqueen_rule_candidates(rule) + } + + /// Returns true if the given rule can be applied to this input. Used as a fast path instead of + /// using get_redqueen_rule_candidates. + #[cfg(feature = "redqueen")] + fn has_redqueen_rule_candidates(&self, rule: &RedqueenRule) -> bool { + self.data.has_redqueen_rule_candidates(rule) + } + + /// TODO: create a proper minimizer for the text input type + + type MinState = crate::fuzz_input::NullMinimizerState; + + fn minimize( + &mut self, + _state: &mut Self::MinState, + _current_iteration: u32, + _last_successful_iteration: u32, + _rng: &mut Rng, + ) -> MinimizeControlFlow { + MinimizeControlFlow::Stop + } + + fn init_minimize(&mut self) -> (Self::MinState, MinimizeControlFlow) { + crate::fuzz_input::NullMinimizerState::init() + } +} diff --git a/src/lib.rs b/src/lib.rs index 8b01d31..322b379 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -183,7 +183,6 @@ mod colors; mod commands; mod coverage_analysis; -pub mod expensive_mutators; pub mod feedback; mod filesystem; pub mod fuzz_input; @@ -195,6 +194,8 @@ pub use utils::write_crash_input; pub use fuzz_input::{FuzzInput, InputWithMetadata}; +pub mod input_types; + #[macro_use] mod try_macros; pub mod cmp_analysis; diff --git a/src/mutators.rs b/src/mutators.rs deleted file mode 100644 index 51767e1..0000000 --- a/src/mutators.rs +++ /dev/null @@ -1,502 +0,0 @@ -//! Various methods used to mutate inputs -#![allow(clippy::ptr_arg)] - -use crate::rng::Rng; -use rand::Rng as _; - -use crate::fuzz_input::InputWithMetadata; -use std::sync::Arc; - -/// Flip a random bit in the input -pub fn bit_flip( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - // Get the length of the input string - let len = input.len(); - - // If the input is empty, ignore - if len == 0 { - return None; - } - - // Get the byte/bit offset to flip - let byte_offset = rng.gen::() % len; - let bit_offset = rng.gen::() & 7; - - // Flip the random bit - // SAFETY: byte_offset is within the length of the input - unsafe { - *input.get_unchecked_mut(byte_offset) ^= 1 << bit_offset; - } - - // Output mutation - Some(format!( - "BitFlip_offset_{byte_offset:#x}_bit_{bit_offset:#x}" - )) -} - -/// Replace a random byte in the input with a new byte -pub fn byte_flip( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.is_empty() { - return None; - } - - // Get the random offset and byte - let offset = rng.gen::() % input.len(); - let rand_byte = rng.gen::(); - - // Set the new byte - input[offset] = rand_byte; - - // Output mutation - Some(format!("ByteFlip_offset_{offset:#x}_byte_{rand_byte:#x}")) -} - -/// Insert a random byte into the input with a new byte -pub fn byte_insert( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.is_empty() { - return None; - } - - // Get the random offset and byte - let offset = rng.gen::() % input.len(); - let rand_byte = rng.gen::(); - - // Insert the new byte - input.insert(offset, rand_byte); - - // Output mutation - Some(format!("ByteInsert_offset_{offset:#x}_byte_{rand_byte:#x}")) -} - -/// Delete a random byte in the input -pub fn byte_delete( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.is_empty() { - return None; - } - - // Get the random offset and byte - let offset = rng.gen::() % input.len(); - - // Insert the new byte - input.remove(offset); - - // Output mutation - Some(format!("ByteDelete_offset_{offset:#x}")) -} - -/// Impl functions to randomly set a given type -macro_rules! set_random { - ($name:ident, $typ:ty) => { - /// Replace a random u8 into the input with a new word - pub fn $name( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, - ) -> Option { - const SIZE: usize = std::mem::size_of::<$typ>(); - - if input.len() <= SIZE { - return None; - } - - // Get the random offset and byte ensuring there is room to fill - let offset = rng.gen::() % (input.len() - SIZE); - let rand_word = rng.gen::<$typ>(); - - // Replace the new word - #[allow(clippy::range_plus_one)] - input[offset..offset + SIZE].copy_from_slice(&rand_word.to_le_bytes()); - - // Output mutation - Some(format!( - "{}_offset_{offset:#x}_data_{rand_word:#x}", - stringify!($name) - )) - } - }; -} - -set_random!(set_random_u8, u8); -set_random!(set_random_u16, u16); -set_random!(set_random_u32, u32); -set_random!(set_random_u64, u64); - -/// Copy a random slice from the corpus into the input -pub fn splice_corpus( - input: &mut Vec, - corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.len() < 4 || corpus.is_empty() { - return None; - } - - // Pre-compute random numbers to avoid borrowing from the &mut fuzzer - let rand_num1 = rng.gen::(); - let rand_num2 = rng.gen::(); - let rand_num3 = rng.gen::(); - let rand_num4 = rng.gen::(); - - // Get the input which the comes will come from from the corpus - let splice_from = &corpus[rand_num1 % corpus.len()]; - - let max_splice_len = std::cmp::min(splice_from.input.len(), input.len()); - - // Assume the length will be larger than 4 bytes - if max_splice_len < 4 || input.len() < 4 || splice_from.input.len() < 4 { - return None; - } - - let splice_len = rand_num2 % max_splice_len; - let splice_offset = rand_num3 % (splice_from.input.len() - splice_len); - let input_offset = rand_num4 % (input.len() - splice_len); - - // Splice the found - input[input_offset..input_offset + splice_len] - .copy_from_slice(&splice_from.input[splice_offset..splice_offset + splice_len]); - - // Output mutation - Some(format!("SpliceCorpus_offset_{input_offset:#x}")) -} - -/// Increment a random byte -pub fn byte_inc( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.is_empty() { - return None; - } - - // Get the random offset and byte - let offset = rng.gen::() % input.len(); - - // Set the new byte - input[offset] = input[offset].wrapping_add(1); - - // Output mutation - Some(format!("ByteInc_offset_{offset:#x}")) -} - -/// Decrement a random byte -pub fn byte_dec( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.is_empty() { - return None; - } - - // Get the random offset and byte - let offset = rng.gen::() % input.len(); - - // Set the new byte - input[offset] = input[offset].wrapping_sub(1); - - // Output mutation - Some(format!("ByteDec_offset_{offset:#x}")) -} - -/// Copy a random slice from the current input into itself -pub fn splice_input( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.len() < 4 { - return None; - } - - // Pre-compute random numbers to avoid borrowing from the &mut fuzzer - #[allow(clippy::cast_possible_wrap)] - let src = (rng.gen::() % input.len()) as isize; - - #[allow(clippy::cast_possible_wrap)] - let dst = (rng.gen::() % input.len()) as isize; - - // Get the larger of the two positions - let largest = std::cmp::max(src, dst); - - // Get the maximum slice that is not out of bounds - let max_len = input.len() - usize::try_from(largest).ok()?; - - // Randomly choose a length of slice to copy that is in bounds - let len = rng.gen::() % max_len; - - // Copy the slice internally. These buffers could overlap - // SAFETY: src and dst are within the bounds of input - unsafe { - std::ptr::copy( - input.as_mut_ptr().offset(src), - input.as_mut_ptr().offset(dst), - len, - ); - } - - // Output mutation - Some(format!( - "SpliceInput_srcoffset_{src:#x}_dstoffset_{dst:#x}_len_{len:#x}" - )) -} - -/// Interesting `u8` values to insert into a test input -const INTERESTING_U8: [u8; 10] = [ - u8::MAX, - u8::MAX - 1, - u8::MAX - 2, - u8::MAX - 3, - u8::MAX - 4, - u8::MIN, - u8::MIN + 1, - u8::MIN + 2, - u8::MIN + 3, - u8::MIN + 4, -]; - -/// Interesting `u16` values to insert into a test input -const INTERESTING_U16: [u16; 20] = [ - (u8::MAX) as u16, - (u8::MAX - 1) as u16, - (u8::MAX - 2) as u16, - (u8::MAX - 3) as u16, - (u8::MAX - 4) as u16, - (u8::MIN) as u16, - (u8::MIN + 1) as u16, - (u8::MIN + 2) as u16, - (u8::MIN + 3) as u16, - (u8::MIN + 4) as u16, - u16::MAX, - u16::MAX - 1, - u16::MAX - 2, - u16::MAX - 3, - u16::MAX - 4, - u16::MIN, - u16::MIN + 1, - u16::MIN + 2, - u16::MIN + 3, - u16::MIN + 4, -]; - -/// Interesting `u32` values to insert into a test input -const INTERESTING_U32: [u32; 30] = [ - (u8::MAX) as u32, - (u8::MAX - 1) as u32, - (u8::MAX - 2) as u32, - (u8::MAX - 3) as u32, - (u8::MAX - 4) as u32, - (u8::MIN) as u32, - (u8::MIN + 1) as u32, - (u8::MIN + 2) as u32, - (u8::MIN + 3) as u32, - (u8::MIN + 4) as u32, - (u16::MAX) as u32, - (u16::MAX - 1) as u32, - (u16::MAX - 2) as u32, - (u16::MAX - 3) as u32, - (u16::MAX - 4) as u32, - (u16::MIN) as u32, - (u16::MIN + 1) as u32, - (u16::MIN + 2) as u32, - (u16::MIN + 3) as u32, - (u16::MIN + 4) as u32, - u32::MAX, - u32::MAX - 1, - u32::MAX - 2, - u32::MAX - 3, - u32::MAX - 4, - u32::MIN, - u32::MIN + 1, - u32::MIN + 2, - u32::MIN + 3, - u32::MIN + 4, -]; - -/// Interesting `u64` values to insert into a test input -const INTERESTING_U64: [u64; 40] = [ - (u8::MAX) as u64, - (u8::MAX - 1) as u64, - (u8::MAX - 2) as u64, - (u8::MAX - 3) as u64, - (u8::MAX - 4) as u64, - (u8::MIN) as u64, - (u8::MIN + 1) as u64, - (u8::MIN + 2) as u64, - (u8::MIN + 3) as u64, - (u8::MIN + 4) as u64, - (u16::MAX) as u64, - (u16::MAX - 1) as u64, - (u16::MAX - 2) as u64, - (u16::MAX - 3) as u64, - (u16::MAX - 4) as u64, - (u16::MIN) as u64, - (u16::MIN + 1) as u64, - (u16::MIN + 2) as u64, - (u16::MIN + 3) as u64, - (u16::MIN + 4) as u64, - (u32::MAX) as u64, - (u32::MAX - 1) as u64, - (u32::MAX - 2) as u64, - (u32::MAX - 3) as u64, - (u32::MAX - 4) as u64, - (u32::MIN) as u64, - (u32::MIN + 1) as u64, - (u32::MIN + 2) as u64, - (u32::MIN + 3) as u64, - (u32::MIN + 4) as u64, - u64::MAX, - u64::MAX - 1, - u64::MAX - 2, - u64::MAX - 3, - u64::MAX - 4, - u64::MIN, - u64::MIN + 1, - u64::MIN + 2, - u64::MIN + 3, - u64::MIN + 4, -]; - -/// Replace bytes in the input with interesting values that trigger common bugs such as -/// off by one -pub fn replace_with_interesting( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.len() < 9 { - return None; - } - - // Randomly choose which size of number to create (u8, u16, u32, u64) - let size = match rng.gen::() % 4 { - 0 => 1, - 1 => 2, - 2 => 4, - 3 => 8, - _ => unreachable!(), - }; - - // Get the offset to replace with a random interesting value ensuring the random - // number can fit in the input - let offset = rng.gen::() % (input.len() - size); - - // Get the random value to replace in the input - match size { - 1 => { - let val = INTERESTING_U8[rng.gen::() % INTERESTING_U8.len()]; - input[offset..offset + size].copy_from_slice(&val.to_le_bytes()); - Some(format!("InterestingVal_{offset:#x}_val_{val:#x}")) - } - 2 => { - let val = INTERESTING_U16[rng.gen::() % INTERESTING_U16.len()]; - input[offset..offset + size].copy_from_slice(&val.to_le_bytes()); - Some(format!("InterestingVal_{offset:#x}_val_{val:#x}")) - } - 4 => { - let val = INTERESTING_U32[rng.gen::() % INTERESTING_U32.len()]; - input[offset..offset + size].copy_from_slice(&val.to_le_bytes()); - Some(format!("InterestingVal_{offset:#x}_val_{val:#x}")) - } - 8 => { - let val = INTERESTING_U64[rng.gen::() % INTERESTING_U64.len()]; - input[offset..offset + size].copy_from_slice(&val.to_le_bytes()); - Some(format!("InterestingVal_{offset:#x}_val_{val:#x}")) - } - _ => unreachable!(), - } -} - -/// Sets a random slice in the input to a random byte (ala memset) -pub fn set_input_slice( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - _dictionary: &Option>>, -) -> Option { - if input.len() < 4 { - return None; - } - - // Pre-compute random numbers to avoid borrowing from the &mut fuzzer - #[allow(clippy::cast_possible_wrap)] - let dst = (rng.gen::() % input.len()) as isize; - - // Get the maximum slice that is not out of bounds - let max_len = input.len() - usize::try_from(dst).ok()?; - - // Randomly choose a length of slice to copy that is in bounds - let len = rng.gen::() % max_len; - - // Copy the slice internally - // SAFETY: dst offset is within the bounds of input - #[allow(clippy::cast_possible_truncation)] - unsafe { - let val = rng.gen::() as u8; - std::ptr::write_bytes(input.as_mut_ptr().offset(dst), val, len); - Some(format!( - "SetInputSlice_offset_{dst:#x}_len_{len:#x}_val_{val:#x}" - )) - } -} - -/// Insert an element from the dictionary into the input -pub fn overwrite_from_dictionary( - input: &mut Vec, - _corpus: &[Arc>>], - rng: &mut Rng, - dictionary: &Option>>, -) -> Option { - match dictionary { - None => None, - Some(dict) => { - // Pre-compute random numbers to avoid borrowing from the &mut fuzzer - let rand_num1 = rng.gen::(); - let rand_num2 = rng.gen::(); - - // Get the dictionary element to insert - let element = &dict[rand_num1 % dict.len()]; - - let element_len = element.len(); - let input_offset = rand_num2 % (input.len().saturating_sub(element_len) + 1); - let needed_len = input_offset + element_len; - - // The current input isn't large enough to hold the found element. Resize the input. - if input.len() < needed_len { - input.resize(needed_len + 1, 0); - } - - // Splice the found - input[input_offset..input_offset + element_len].copy_from_slice(element); - - Some(format!( - "InsertFromDictionary_offset_{input_offset:#x}_{element:x?}" - )) - } - } -} diff --git a/src/expensive_mutators.rs b/src/mutators/bytes/expensive.rs similarity index 97% rename from src/expensive_mutators.rs rename to src/mutators/bytes/expensive.rs index cab4501..ff812eb 100644 --- a/src/expensive_mutators.rs +++ b/src/mutators/bytes/expensive.rs @@ -10,7 +10,7 @@ use crate::fuzz_input::InputWithMetadata; use crate::rng::Rng; /// Insert a random slice from the corpus into the `input`, expanding the `input` -pub(crate) fn splice_corpus_extend( +pub fn splice_corpus_extend( input: &mut Vec, corpus: &[Arc>>], rng: &mut Rng, @@ -53,7 +53,7 @@ pub(crate) fn splice_corpus_extend( } /// Insert a random dictionary entry into the `input`, potentially expanding the `input`. -pub(crate) fn splice_from_dictionary_extend( +pub fn splice_from_dictionary_extend( input: &mut Vec, _corpus: &[Arc>>], rng: &mut Rng, diff --git a/src/mutators/bytes/helpers.rs b/src/mutators/bytes/helpers.rs new file mode 100644 index 0000000..2b521f8 --- /dev/null +++ b/src/mutators/bytes/helpers.rs @@ -0,0 +1,560 @@ +use rand::seq::SliceRandom; +use rand::Rng; + +/// flip a random bit in the input +#[inline] +pub fn bit_flip>(input: &mut T, rng: &mut impl rand::Rng) -> Option<(usize, u64)> { + let input = input.as_mut(); + let len = input.len(); + // If the input is empty, ignore + if len == 0 { + return None; + } + + // Get the byte/bit offset to flip + let byte_offset = rng.gen_range(0..len); + let bit_offset = rng.gen::() & 7; + + // Flip the random bit + // SAFETY: byte_offset is within the length of the input + unsafe { + *input.get_unchecked_mut(byte_offset) ^= 1 << bit_offset; + } + + Some((byte_offset, bit_offset)) +} + +/// replace a random byte in the input +#[inline] +pub fn byte_flip>(input: &mut T, rng: &mut impl rand::Rng) -> Option<(usize, u8)> { + let input = input.as_mut(); + if input.is_empty() { + return None; + } + + // Get the random offset and byte + let len = input.len(); + let offset = rng.gen_range(0..len); + let rand_byte = rng.gen::(); + + // Set the new byte + input[offset] = rand_byte; + + Some((offset, rand_byte)) +} + +/// Insert a random byte into the input with a new byte +#[inline] +pub fn byte_insert(input: &mut Vec, rng: &mut impl rand::Rng) -> Option<(usize, u8)> { + if input.is_empty() { + return None; + } + + // Get the random offset and byte + let len = input.len(); + let offset = rng.gen_range(0..len); + let rand_byte = rng.gen::(); + + // Insert the new byte + input.insert(offset, rand_byte); + + // Output mutation + Some((offset, rand_byte)) +} + +/// Delete a random byte in the input +#[inline] +pub fn byte_delete(input: &mut Vec, rng: &mut impl rand::Rng) -> Option { + if input.is_empty() { + return None; + } + + // Get the random offset and byte + let len = input.len(); + let offset = rng.gen_range(0..len); + // Insert the new byte + input.remove(offset); + Some(offset) +} + +/// Increment a random byte +pub fn inc>(input: &mut T, rng: &mut impl rand::Rng) -> Option { + let input = input.as_mut(); + if input.is_empty() { + return None; + } + + // Get the random offset and byte + let offset = rng.gen_range(0..input.len()); + + // Set the new byte + input[offset] = input[offset].wrapping_add(1); + + Some(offset) +} + +/// Decrement a random byte +pub fn dec>(input: &mut T, rng: &mut impl rand::Rng) -> Option { + let input = input.as_mut(); + if input.is_empty() { + return None; + } + + // Get the random offset and byte + let offset = rng.gen_range(0..input.len()); + + // Set the new byte + input[offset] = input[offset].wrapping_sub(1); + + Some(offset) +} + +/// Impl functions to randomly set a given type +macro_rules! set_random { + ($name:ident, $typ:ty) => { + /// Replace a random u8 into the input with a new word + pub fn $name(input: &mut Vec, rng: &mut impl rand::Rng) -> Option<(usize, $typ)> { + const SIZE: usize = std::mem::size_of::<$typ>(); + + if input.len() <= SIZE { + return None; + } + + // Get the random offset and byte ensuring there is room to fill + let last = input.len() - SIZE; + let offset = rng.gen_range(0..last); + let rand_word = rng.gen::<$typ>(); + + // Replace the new word + #[allow(clippy::range_plus_one)] + input[offset..offset + SIZE].copy_from_slice(&rand_word.to_le_bytes()); + + Some((offset, rand_word)) + } + }; +} + +set_random!(set_random_u8, u8); +set_random!(set_random_u16, u16); +set_random!(set_random_u32, u32); +set_random!(set_random_u64, u64); + +/// For an input with length `len` choose a random integer size: 1, 2, 4, 8 +fn choose_integer_size(len: usize, rng: &mut impl Rng) -> usize { + // the hot path will never loop, only for small inputs we do another loop iteration to identify + // a smaller size. + loop { + let mut i = 4; + let size = match rng.gen_range(0..i) { + 0 => 1, + 1 => 2, + 2 => 4, + 3 => 8, + _ => unreachable!(), + }; + if size <= len { + return size; + } else { + i -= 1; + } + if i == 0 { + return 0; + } + } +} + +/// Replace bytes in the input with interesting values that trigger common bugs such as +/// off by one +pub fn set_random_word>( + input: &mut T, + rng: &mut impl rand::Rng, +) -> Option<(usize, u8, u64)> { + let input = input.as_mut(); + if input.is_empty() { + return None; + } + + // Randomly choose which size of number to create (u8, u16, u32, u64) + let size = choose_integer_size(input.len(), rng); + if size == 0 { + return None; + } + + // Get the offset to replace with a random interesting value ensuring the random + // number can fit in the input + let offset = rng.gen_range(0..=(input.len() - size)); + + // Get the random value to replace in the input + let val = match size { + 1 => { + let val: u8 = rng.gen(); + input[offset] = val; + val as u64 + } + 2 => { + let val: u16 = rng.gen(); + input[offset..offset + size].copy_from_slice(&val.to_le_bytes()); + val as u64 + } + 4 => { + let val: u32 = rng.gen(); + input[offset..offset + size].copy_from_slice(&val.to_le_bytes()); + val as u64 + } + 8 => { + let val: u64 = rng.gen(); + input[offset..offset + size].copy_from_slice(&val.to_le_bytes()); + val as u64 + } + _ => unreachable!(), + }; + + Some((offset, size as u8, val as u64)) +} + +/// Interesting `u8` values to insert into a test input +pub const INTERESTING_U8: [u8; 10] = [ + u8::MAX, + u8::MAX - 1, + u8::MAX - 2, + u8::MAX - 3, + u8::MAX - 4, + u8::MIN, + u8::MIN + 1, + u8::MIN + 2, + u8::MIN + 3, + u8::MIN + 4, +]; + +/// Interesting `u16` values to insert into a test input +pub const INTERESTING_U16: [u16; 20] = [ + (u8::MAX) as u16, + (u8::MAX - 1) as u16, + (u8::MAX - 2) as u16, + (u8::MAX - 3) as u16, + (u8::MAX - 4) as u16, + (u8::MIN) as u16, + (u8::MIN + 1) as u16, + (u8::MIN + 2) as u16, + (u8::MIN + 3) as u16, + (u8::MIN + 4) as u16, + u16::MAX, + u16::MAX - 1, + u16::MAX - 2, + u16::MAX - 3, + u16::MAX - 4, + u16::MIN, + u16::MIN + 1, + u16::MIN + 2, + u16::MIN + 3, + u16::MIN + 4, +]; + +/// Interesting `u32` values to insert into a test input +pub const INTERESTING_U32: [u32; 30] = [ + (u8::MAX) as u32, + (u8::MAX - 1) as u32, + (u8::MAX - 2) as u32, + (u8::MAX - 3) as u32, + (u8::MAX - 4) as u32, + (u8::MIN) as u32, + (u8::MIN + 1) as u32, + (u8::MIN + 2) as u32, + (u8::MIN + 3) as u32, + (u8::MIN + 4) as u32, + (u16::MAX) as u32, + (u16::MAX - 1) as u32, + (u16::MAX - 2) as u32, + (u16::MAX - 3) as u32, + (u16::MAX - 4) as u32, + (u16::MIN) as u32, + (u16::MIN + 1) as u32, + (u16::MIN + 2) as u32, + (u16::MIN + 3) as u32, + (u16::MIN + 4) as u32, + u32::MAX, + u32::MAX - 1, + u32::MAX - 2, + u32::MAX - 3, + u32::MAX - 4, + u32::MIN, + u32::MIN + 1, + u32::MIN + 2, + u32::MIN + 3, + u32::MIN + 4, +]; + +/// Interesting `u64` values to insert into a test input +pub const INTERESTING_U64: [u64; 40] = [ + (u8::MAX) as u64, + (u8::MAX - 1) as u64, + (u8::MAX - 2) as u64, + (u8::MAX - 3) as u64, + (u8::MAX - 4) as u64, + (u8::MIN) as u64, + (u8::MIN + 1) as u64, + (u8::MIN + 2) as u64, + (u8::MIN + 3) as u64, + (u8::MIN + 4) as u64, + (u16::MAX) as u64, + (u16::MAX - 1) as u64, + (u16::MAX - 2) as u64, + (u16::MAX - 3) as u64, + (u16::MAX - 4) as u64, + (u16::MIN) as u64, + (u16::MIN + 1) as u64, + (u16::MIN + 2) as u64, + (u16::MIN + 3) as u64, + (u16::MIN + 4) as u64, + (u32::MAX) as u64, + (u32::MAX - 1) as u64, + (u32::MAX - 2) as u64, + (u32::MAX - 3) as u64, + (u32::MAX - 4) as u64, + (u32::MIN) as u64, + (u32::MIN + 1) as u64, + (u32::MIN + 2) as u64, + (u32::MIN + 3) as u64, + (u32::MIN + 4) as u64, + u64::MAX, + u64::MAX - 1, + u64::MAX - 2, + u64::MAX - 3, + u64::MAX - 4, + u64::MIN, + u64::MIN + 1, + u64::MIN + 2, + u64::MIN + 3, + u64::MIN + 4, +]; + +/// Replace bytes in the input with interesting values that trigger common bugs such as +/// off by one +pub fn replace_with_interesting_integer>( + input: &mut T, + rng: &mut impl rand::Rng, +) -> Option<(usize, u8, u64)> { + let input = input.as_mut(); + if input.is_empty() { + return None; + } + + // Randomly choose which size of number to create (u8, u16, u32, u64) + let size = choose_integer_size(input.len(), rng); + if size == 0 { + return None; + } + + // select endianess at random + let big_endian = rng.gen_bool(0.5); + + // Get the offset to replace with a random interesting value ensuring the random + // number can fit in the input + let offset = rng.gen_range(0..=(input.len() - size)); + + // Get the random value to replace in the input + let val = match size { + 1 => { + let val = *INTERESTING_U8.as_slice().choose(rng).unwrap(); + input[offset] = val; + val as u64 + } + 2 => { + let val = *INTERESTING_U16.as_slice().choose(rng).unwrap(); + input[offset..offset + size].copy_from_slice( + &(if big_endian { + val.to_be_bytes() + } else { + val.to_le_bytes() + }), + ); + val as u64 + } + 4 => { + let val = *INTERESTING_U32.as_slice().choose(rng).unwrap(); + input[offset..offset + size].copy_from_slice( + &(if big_endian { + val.to_be_bytes() + } else { + val.to_le_bytes() + }), + ); + val as u64 + } + 8 => { + let val = *INTERESTING_U64.as_slice().choose(rng).unwrap(); + input[offset..offset + size].copy_from_slice( + &(if big_endian { + val.to_be_bytes() + } else { + val.to_le_bytes() + }), + ); + val as u64 + } + _ => unreachable!(), + }; + + Some((offset, size as u8, val as u64)) +} + +/// Insert interesting values that trigger common bugs such as off by one. This always extends +/// an input's size. +pub fn insert_interesting_integer( + input: &mut Vec, + rng: &mut impl rand::Rng, +) -> Option<(usize, u8, u64)> { + // Randomly choose which size of number to create (u8, u16, u32, u64) + let size = choose_integer_size(usize::MAX, rng); + if size == 0 { + return None; + } + + // select endianess at random + let big_endian = rng.gen_bool(0.5); + + // Get the offset to replace with a random interesting value ensuring the random + // number can fit in the input + let offset = rng.gen_range(0..=input.len()); + + // Get the random value to replace in the input + let val = match size { + 1 => { + let val = *INTERESTING_U8.as_slice().choose(rng).unwrap(); + input.insert(offset, val); + val as u64 + } + 2 => { + let val = *INTERESTING_U16.as_slice().choose(rng).unwrap(); + crate::utils::vec::fast_insert_at( + input, + offset, + &(if big_endian { + val.to_be_bytes() + } else { + val.to_le_bytes() + }), + ); + val as u64 + } + 4 => { + let val = *INTERESTING_U32.as_slice().choose(rng).unwrap(); + crate::utils::vec::fast_insert_at( + input, + offset, + &(if big_endian { + val.to_be_bytes() + } else { + val.to_le_bytes() + }), + ); + val as u64 + } + 8 => { + let val = *INTERESTING_U64.as_slice().choose(rng).unwrap(); + crate::utils::vec::fast_insert_at( + input, + offset, + &(if big_endian { + val.to_be_bytes() + } else { + val.to_le_bytes() + }), + ); + val as u64 + } + _ => unreachable!(), + }; + + Some((offset, size as u8, val as u64)) +} + +/// Sets a random slice in the input to a random byte (ala memset) +pub fn set_slice>( + input: &mut T, + rng: &mut impl rand::Rng, +) -> Option<(isize, usize, u8)> { + let input = input.as_mut(); + if input.len() < 4 { + return None; + } + + // Pre-compute random numbers to avoid borrowing from the &mut fuzzer + #[allow(clippy::cast_possible_wrap)] + let dst = rng.gen_range(0isize..(input.len() as isize)); + + // Get the maximum slice that is not out of bounds + let max_len = input.len() - usize::try_from(dst).ok()?; + + // Randomly choose a length of slice to copy that is in bounds + let len = rng.gen_range(0..max_len); + let val: u8 = rng.gen(); + + // Copy the slice internally + // SAFETY: dst offset is within the bounds of input + unsafe { + std::ptr::write_bytes(input.as_mut_ptr().offset(dst), val, len); + } + + Some((dst, len, val)) +} + +/// Overwrite data at random offset with a value from the dictionary. +pub fn overwrite_from_dictionary>( + input: &mut T, + rng: &mut impl rand::Rng, + dictionary: &[Vec], +) -> Option<(usize, usize)> { + if dictionary.is_empty() { + return None; + } + let input = input.as_mut(); + let dict_index = rng.gen_range(0..dictionary.len()); + // Get the dictionary element to insert + let element = &dictionary[dict_index]; + let element_len = element.len(); + + if element_len > input.len() { + return None; + } + if element_len == input.len() { + input.copy_from_slice(element); + return Some((0, element_len)); + } + + let input_offset = rng.gen_range(0..(input.len() - element_len)); + // Splice the dictionary entry + input[input_offset..input_offset + element_len].copy_from_slice(element); + + Some((input_offset, dict_index)) +} + +/// helper to splice data within a vector. +#[inline] +pub fn splice_within>( + input: &mut T, + rng: &mut impl rand::Rng, +) -> Option<(isize, isize, usize)> { + crate::mutators::helpers::splice_within(input, rng) +} + +/// Copy a random sub-slice from `src` into a random subslice of `dst`. +/// This will potentially grow or shrink the destination vector. +#[inline] +pub fn splice_bytes_extend( + dst: &mut Vec, + src: &[u8], + rng: &mut impl rand::Rng, +) -> Option<(std::ops::Range, std::ops::Range)> { + crate::mutators::helpers::splice_extend(dst, src, rng) +} + +/// Copy sub-slice from another byte slice into the current one. +#[inline] +pub fn splice_other_inplace, S: AsRef<[u8]>>( + input: &mut T, + other: &S, + rng: &mut impl rand::Rng, +) -> Option<(usize, usize, usize)> { + crate::mutators::helpers::splice_other_inplace(input, other, rng) +} diff --git a/src/mutators/bytes/mod.rs b/src/mutators/bytes/mod.rs new file mode 100644 index 0000000..a015d56 --- /dev/null +++ b/src/mutators/bytes/mod.rs @@ -0,0 +1,242 @@ +//! Various methods used to mutate inputs +#![allow(missing_docs)] +#![allow(clippy::ptr_arg)] + +use rand::Rng as _; +use std::sync::Arc; + +use crate::fuzz_input::InputWithMetadata; +use crate::rng::Rng; + +pub mod helpers; +pub mod expensive; + +/// Flip a random bit in the input +pub fn bit_flip( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + match helpers::bit_flip(input, rng) { + Some((byte_offset, bit_offset)) => { + // Output mutation + Some(format!( + "BitFlip_offset_{byte_offset:#x}_bit_{bit_offset:#x}" + )) + } + None => None, + } +} + +/// Replace a random byte in the input with a new byte +pub fn byte_flip( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + match helpers::byte_flip(input, rng) { + Some((offset, rand_byte)) => { + Some(format!("ByteFlip_offset_{offset:#x}_byte_{rand_byte:#x}")) + } + None => None, + } +} + +/// Insert a random byte into the input with a new byte +pub fn byte_insert( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + match helpers::byte_insert(input, rng) { + Some((offset, rand_byte)) => { + Some(format!("ByteInsert_offset_{offset:#x}_byte_{rand_byte:#x}")) + } + _ => None, + } +} + +/// Delete a random byte in the input +pub fn byte_delete( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + match helpers::byte_delete(input, rng) { + Some(offset) => Some(format!("ByteDelete_offset_{offset:#x}")), + _ => None, + } +} + +/// Increment a random byte +pub fn byte_inc( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + match helpers::inc(input, rng) { + Some(offset) => Some(format!("ByteInc_offset_{offset:#x}")), + _ => None, + } +} + +/// Decrement a random byte +pub fn byte_dec( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + match helpers::dec(input, rng) { + Some(offset) => Some(format!("ByteDec_offset_{offset:#x}")), + _ => None, + } +} + +/// Impl functions to randomly set a given type +macro_rules! set_random { + ($name:ident, $typ:ty) => { + /// Replace a integer at random offset into the input with a new random word. + pub fn $name( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, + ) -> Option { + match helpers::$name(input, rng) { + Some((offset, rand_word)) => + // Output mutation + { + Some(format!( + "{}_offset_{offset:#x}_data_{rand_word:#x}", + stringify!($name) + )) + } + _ => None, + } + } + }; +} + +set_random!(set_random_u8, u8); +set_random!(set_random_u16, u16); +set_random!(set_random_u32, u32); +set_random!(set_random_u64, u64); + +/// Copy a random slice from the corpus into the input +pub fn splice_corpus( + input: &mut Vec, + corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + if input.len() < 4 || corpus.is_empty() { + return None; + } + + // Pre-compute random numbers to avoid borrowing from the &mut fuzzer + let rand_num1 = rng.gen::(); + let rand_num2 = rng.gen::(); + let rand_num3 = rng.gen::(); + let rand_num4 = rng.gen::(); + + // Get the input which the comes will come from from the corpus + let splice_from = &corpus[rand_num1 % corpus.len()]; + + let max_splice_len = std::cmp::min(splice_from.input.len(), input.len()); + + // Assume the length will be larger than 4 bytes + if max_splice_len < 4 || input.len() < 4 || splice_from.input.len() < 4 { + return None; + } + + let splice_len = rand_num2 % max_splice_len; + let splice_offset = rand_num3 % (splice_from.input.len() - splice_len); + let input_offset = rand_num4 % (input.len() - splice_len); + + // Splice the found + input[input_offset..input_offset + splice_len] + .copy_from_slice(&splice_from.input[splice_offset..splice_offset + splice_len]); + + // Output mutation + Some(format!("SpliceCorpus_offset_{input_offset:#x}")) +} + +/// Copy a random slice from the current input into itself +pub fn splice_input( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + if input.len() < 4 { + return None; + } + + let (src, dst, len) = helpers::splice_within(input, rng)?; + + // Output mutation + Some(format!( + "SpliceInput_srcoffset_{src:#x}_dstoffset_{dst:#x}_len_{len:#x}" + )) +} + +/// Replace bytes in the input with interesting values that trigger common bugs such as +/// off by one +pub fn replace_with_interesting( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + match helpers::replace_with_interesting_integer(input, rng) { + Some((offset, size, val)) => { + let bits = size / 8; + Some(format!("InterestingVal_{offset:#x}_u{bits}_val_{val:#x}")) + } + None => None, + } +} + +/// Sets a random slice in the input to a random byte (ala memset) +pub fn set_input_slice( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + if input.len() < 4 { + return None; + } + + match helpers::set_slice(input, rng) { + Some((dst, len, val)) => Some(format!( + "SetInputSlice_offset_{dst:#x}_len_{len:#x}_val_{val:#x}" + )), + _ => None, + } +} + +/// Insert an element from the dictionary into the input +pub fn overwrite_from_dictionary( + input: &mut Vec, + _corpus: &[Arc>>], + rng: &mut Rng, + dictionary: &Option>>, +) -> Option { + match dictionary { + None => None, + Some(dict) => { + let (input_offset, element) = helpers::overwrite_from_dictionary(input, rng, dict)?; + + Some(format!( + "OverwriteFromDictionary_offset_{input_offset:#x}_{element:x?}" + )) + } + } +} diff --git a/src/mutators/havoc.rs b/src/mutators/havoc.rs new file mode 100644 index 0000000..ae06843 --- /dev/null +++ b/src/mutators/havoc.rs @@ -0,0 +1,723 @@ +//! Several helper functions that can be utilized by custom mutators to mutate raw byte or text +//! strings using snapchange's default mutation operations. + +use rand::seq::SliceRandom; + +use crate::mutators::bytes; +use crate::mutators::helpers::{splice_extend, splice_other_inplace, splice_within}; +use crate::mutators::numbers::{self, FuzzNum}; +use crate::mutators::text; + +/// Mutate a byte buffer using up to `max_mutations` mutation operators. The input is +/// potentially resized during mutation (leading to longer mutation time). +pub fn mutate_vec( + input: &mut Vec, + dictionary: Option<&Vec>>, + mut splice_with: Option<&[u8]>, + max_mutations: usize, + rng: &mut impl rand::Rng, +) -> Option> { + if max_mutations == 0 { + return None; + } + if input.is_empty() { + return None; + } + let num_mutations = (rng.gen_range(1..=max_mutations)) as u32; + let mut mutated = 0_u32; + let mut tries = 0_u32; + let mut mutation_log = vec![]; + while mutated < num_mutations { + tries += 1; + if tries >= (max_mutations as u32) * 3 { + // input is probably not suitable for mutation (e.g., to small) - bail out. + log::debug!( + "attempted {} mutations, only {} succeeded, goal {}", + tries, + mutated, + num_mutations + ); + break; + } + let choice = rng.gen_range(0_u32..=10); + let log_str = match choice { + 0 => { + if mutated > 0 { + break; + } else { + continue; + } + } + 1..=5 => { + match mutate_inplace( + input, + dictionary, + splice_with, + max_mutations + .saturating_sub(mutated as usize) + .saturating_sub(1), + rng, + ) { + Some(log) => { + mutated += log.len() as u32; + mutation_log.extend(log.into_iter()); + continue; + } + None => continue, + } + } + 6 => { + if let Some((offset, rand_byte)) = bytes::helpers::byte_insert(input, rng) { + format!("ByteInsert_offset_{offset:#x}_byte_{rand_byte:#x}") + } else { + continue; + } + } + 7 => { + if let Some(offset) = bytes::helpers::byte_delete(input, rng) { + format!("ByteDelete_offset_{offset:#x}") + } else { + continue; + } + } + 8 => { + if let Some((offset, size, _val)) = + bytes::helpers::insert_interesting_integer(input, rng) + { + let bits = size / 8; + format!("InsertInteresting_u{bits}_offset_{offset}") + } else { + continue; + } + } + 9 => match splice_with.take() { + // we use take here to splice only once + Some(other) => { + if other.is_empty() || input.is_empty() { + continue; + } + if let Some((dstr, srcr)) = splice_extend(input, other, rng) { + format!( + "SpliceBytesExtend_into_{}_{}_from_{}_{}", + dstr.start, dstr.end, srcr.start, srcr.end + ) + } else { + continue; + } + } + _ => continue, + }, + 10 => match dictionary { + Some(dictionary) => { + if dictionary.is_empty() { + continue; + } + let dict_idx = rng.gen_range(0..dictionary.len()); + let other = &dictionary[dict_idx]; + if other.is_empty() { + continue; + } + if input.is_empty() { + input.extend_from_slice(other); + format!( + "SpliceDictionaryExtend_into_{}_{}_from_{}_{}_{}", + 0, + 0, + dict_idx, + 0, + other.len() + ) + } else { + if let Some((dstr, srcr)) = + bytes::helpers::splice_bytes_extend(input, other, rng) + { + format!( + "SpliceDictionaryExtend_into_{}_{}_from_{}_{}_{}", + dstr.start, dstr.end, dict_idx, srcr.start, srcr.end + ) + } else { + continue; + } + } + } + None => continue, + }, + _ => unreachable!(), + }; + + mutation_log.push(log_str); + mutated += 1; + } + + if mutation_log.is_empty() { + None + } else { + Some(mutation_log) + } +} + +/// Mutate a byte buffer using up to `max_mutations` mutation operators. The input is never +/// resized. +pub fn mutate_inplace>( + input: &mut T, + dictionary: Option<&Vec>>, + mut splice_with: Option<&[u8]>, + max_mutations: usize, + rng: &mut impl rand::Rng, +) -> Option> { + if max_mutations == 0 { + return None; + } + let mut input = input.as_mut(); + if input.is_empty() { + return None; + } + let num_mutations = (rng.gen_range(1..=max_mutations)) as u32; + let mut mutated = 0_u32; + let mut tries = 0_u32; + let mut mutation_log = vec![]; + while mutated < num_mutations { + tries += 1; + if tries >= (max_mutations as u32) * 3 { + // input is probably not suitable for mutation (e.g., to small) - bail out. + log::debug!( + "attempted {} mutations, only {} succeeded, goal {}", + tries, + mutated, + num_mutations + ); + break; + } + let choice = rng.gen_range(0_u32..=14); + let log_str = match choice { + 0 => { + if mutated > 0 { + break; + } else { + continue; + } + } + 1 | 2 => match bytes::helpers::bit_flip(&mut input, rng) { + Some((byte_offset, bit_offset)) => { + format!("BitFlip_offset_{byte_offset:#x}_bit_{bit_offset:#x}") + } + None => continue, + }, + 3 | 4 => match bytes::helpers::byte_flip(&mut input, rng) { + Some((offset, rand_byte)) => { + format!("ByteFlip_offset_{offset:#x}_byte_{rand_byte:#x}") + } + None => continue, + }, + 5 => match bytes::helpers::inc(&mut input, rng) { + Some(offset) => format!("ByteInc_offset_{offset:#x}"), + _ => continue, + }, + 6 => match bytes::helpers::dec(&mut input, rng) { + Some(offset) => format!("ByteDec_offset_{offset:#x}"), + _ => continue, + }, + 7 => match bytes::helpers::set_random_word(&mut input, rng) { + Some((offset, size, val)) => { + let bits = size / 8; + format!("SetRandom_u{bits}_{offset:#x}_val_{val:#x}") + } + _ => continue, + }, + 8 => match bytes::helpers::set_slice(&mut input, rng) { + Some((offset, size, val)) => { + format!("SetSlice_{offset:#x}_len_{size}_val_{val:#x}") + } + _ => continue, + }, + 9 | 10 => match bytes::helpers::replace_with_interesting_integer(&mut input, rng) { + Some((offset, size, val)) => { + let bits = size / 8; + format!("InterestingVal_{offset:#x}_u{bits}_val_{val:#x}") + } + None => continue, + }, + 11 | 12 => match dictionary { + Some(dict) => { + match bytes::helpers::overwrite_from_dictionary(&mut input, rng, dict) { + Some((input_offset, element)) => { + format!("OverwriteFromDictionary_offset_{input_offset:#x}_{element:x?}") + } + None => continue, + } + } + None => continue, + }, + 13 => { + if input.len() > 8 { + if let Some((src, dst, len)) = splice_within(&mut input, rng) { + format!("SpliceWithin_srcoffset_{src:#x}_dstoffset_{dst:#x}_len_{len:#x}") + } else { + continue; + } + } else { + continue; + } + } + 14 => { + match splice_with.take() { + // we use take here to splice only once + Some(other) => match splice_other_inplace(&mut input, &other, rng) { + Some((input_offset, other_offset, len)) => { + format!("SpliceOther_at_{input_offset}_from_{other_offset}_len_{len}") + } + None => continue, + }, + _ => continue, + } + } + _ => unreachable!(), + }; + mutation_log.push(log_str); + mutated += 1; + } + + if mutation_log.is_empty() { + None + } else { + Some(mutation_log) + } +} + +/// Mutate a string using up to `max_mutations` mutation operators. The input is never +/// resized. +pub fn mutate_text_vec( + input: &mut Vec, + dictionary: Option<&Vec>>, + splice_with: Option<&[u8]>, + max_mutations: usize, + rng: &mut impl rand::Rng, +) -> Option> { + if max_mutations == 0 { + return None; + } + if input.is_empty() { + return None; + } + let num_mutations = (rng.gen_range(1..=max_mutations)) as u32; + let mut mutated = 0_u32; + let mut tries = 0_u32; + let mut mutation_log = vec![]; + while mutated < num_mutations { + tries += 1; + if tries >= (max_mutations as u32) * 3 { + // input is probably not suitable for mutation (e.g., to small) - bail out. + log::debug!( + "attempted {} mutations, only {} succeeded, goal {}", + tries, + mutated, + num_mutations + ); + break; + } + let choice = rng.gen_range(0_u32..=24); + let log_str = match choice { + 0 => { + if mutated > 0 { + break; + } else { + continue; + } + } + 1..6 => { + match mutate_inplace( + input, + dictionary, + splice_with, + max_mutations + .saturating_sub(mutated as usize) + .saturating_sub(2), + rng, + ) { + Some(log) => { + mutated += log.len() as u32; + mutation_log.extend(log.into_iter()); + continue; + } + None => continue, + } + } + 6..8 => { + match mutate_vec( + input, + dictionary, + splice_with, + max_mutations + .saturating_sub(mutated as usize) + .saturating_sub(2), + rng, + ) { + Some(log) => { + mutated += log.len() as u32; + mutation_log.extend(log.into_iter()); + continue; + } + None => continue, + } + } + 8 => { + if rng.gen_bool(0.8) { + match text::helpers::replace_integer_with_interesting(input, rng) { + Some(range) => { + let start = range.start; + let end = range.end; + format!("ReplaceIntegerWithInteresting_at_{start}_{end}") + } + None => continue, + } + } else { + match text::helpers::replace_hex_integer_with_interesting(input, rng) { + Some(range) => { + let start = range.start; + let end = range.end; + format!("ReplaceHexIntegerWithInteresting_at_{start}_{end}") + } + None => continue, + } + } + } + 9 => { + if rng.gen_bool(0.8) { + match text::helpers::replace_integer_with_rand(input, rng) { + Some(range) => { + let start = range.start; + let end = range.end; + format!("ReplaceIntegerWithRand_at_{start}_{end}") + } + None => continue, + } + } else { + match text::helpers::replace_hex_integer_with_rand(input, rng) { + Some(range) => { + let start = range.start; + let end = range.end; + format!("ReplaceHexIntegerWithRand_at_{start}_{end}") + } + None => continue, + } + } + } + 10 | 11 | 12 => match text::helpers::char_replace(input, rng) { + Some((offset, val)) => { + format!("CharReplace_at_{offset}_with_{val:x}") + } + None => continue, + }, + 13 => match text::helpers::insert_random_string::<4>(input, rng) { + Some((offset, len)) => { + format!("InsertRandomString_offset_{offset}_len_{len}") + } + None => continue, + }, + 14 => match text::helpers::insert_random_string::<8>(input, rng) { + Some((offset, len)) => { + format!("InsertRandomString_offset_{offset}_len_{len}") + } + None => continue, + }, + 15 => match text::helpers::insert_random_string::<128>(input, rng) { + Some((offset, len)) => { + format!("InsertRandomString_offset_{offset}_len_{len}") + } + None => continue, + }, + 16 => match text::helpers::insert_repeated_chars::<4>(input, rng) { + Some((offset, len, val)) => { + format!("InsertRepeatedChars_offset_{offset}_len_{len}_val_{val:x}") + } + None => continue, + }, + 17 => match text::helpers::insert_repeated_chars::<1024>(input, rng) { + Some((offset, len, val)) => { + format!("InsertRepeatedChars_offset_{offset}_len_{len}_val_{val:x}") + } + + None => continue, + }, + 18 | 19 | 20 | 21 => match dictionary { + Some(dictionary) => { + if dictionary.is_empty() { + continue; + } + let dict_idx = rng.gen_range(0..dictionary.len()); + let other = &dictionary[dict_idx]; + // validity check + if other.is_empty() || other.as_ptr() == input.as_ptr() { + continue; + } + if input.is_empty() { + input.extend_from_slice(other); + format!( + "SpliceDictionaryExtend_into_{}_{}_from_{}_{}_{}", + 0, + 0, + dict_idx, + 0, + other.len() + ) + } else { + if let Some((dstr, srcr)) = splice_extend(input, other, rng) { + format!( + "SpliceDictionaryExtend_into_{}_{}_from_{}_{}_{}", + dstr.start, dstr.end, dict_idx, srcr.start, srcr.end + ) + } else { + continue; + } + } + } + None => continue, + }, + 22 | 23 | 24 => { + if let Some((src, dst, len)) = splice_within(input, rng) { + format!("SpliceWithin_srcoffset_{src:#x}_dstoffset_{dst:#x}_len_{len:#x}") + } else { + continue; + } + } + _ => unreachable!(), + }; + + mutation_log.push(log_str); + mutated += 1; + } + + if mutation_log.is_empty() { + None + } else { + Some(mutation_log) + } +} + +/// Mutate a string using up to `max_mutations` mutation operators. The input is never +/// resized. +pub fn mutate_string( + input: &mut String, + dictionary: Option<&Vec>>, + splice_with: Option<&str>, + max_mutations: usize, + rng: &mut impl rand::Rng, +) -> Option> { + if max_mutations == 0 { + return None; + } + // SAFETY: this is safe, as we will not use the original string. instead we replace it with + // the String returned by `from_utf8_lossy` below. + let mut vec = unsafe { input.as_mut_vec() }; + let log = mutate_text_vec( + &mut vec, + dictionary, + splice_with.map(|v| v.as_bytes()), + max_mutations, + rng, + ); + *input = String::from_utf8_lossy(&vec).to_string(); + log +} + +/// Mutate a primitive integer number +pub fn mutate_number( + input: &mut T, + dictionary: Option<&Vec>>, + mut splice_with: Option<&T>, + max_mutations: usize, + rng: &mut impl rand::Rng, +) -> Option> +where + rand::distributions::Standard: rand::distributions::Distribution, +{ + if max_mutations == 0 { + return None; + } + let mut mutation_log: Vec = vec![]; + + let num_mutations = (rng.gen_range(1..=max_mutations)) as u32; + let mut mutated = 0_u32; + let mut tries = 0_u32; + + while mutated < num_mutations { + tries += 1; + if tries >= (max_mutations as u32) * 3 { + // input is probably not suitable for mutation (e.g., to small) - bail out. + log::debug!( + "attempted {} mutations, only {} succeeded, goal {}", + tries, + mutated, + num_mutations + ); + break; + } + if let Some(log_str) = + numbers::helpers::mutate_integer_once(input, dictionary, &mut splice_with, rng) + { + mutation_log.push(log_str); + mutated += 1; + } + } + + if mutation_log.is_empty() { + None + } else { + Some(mutation_log) + } +} + +/// mutate an array/slice of numbers +pub fn mutate_number_array( + input: &mut [T], + dictionary: Option<&Vec>>, + splice_with: Option<&[T]>, + max_mutations: usize, + rng: &mut impl rand::Rng, +) -> Option> +where + rand::distributions::Standard: rand::distributions::Distribution, +{ + if max_mutations == 0 { + return None; + } + if input.is_empty() { + return None; + } + let mut mutation_log: Vec = vec![]; + + let num_mutations = (rng.gen_range(1..=max_mutations)) as u32; + let mut mutated = 0_u32; + let mut tries = 0_u32; + let mut splice_with = splice_with.map_or(None, |s| s.choose(rng)); + + while mutated < num_mutations { + tries += 1; + if tries >= (max_mutations as u32) * 3 { + // input is probably not suitable for mutation (e.g., to small) - bail out. + log::debug!( + "attempted {} mutations, only {} succeeded, goal {}", + tries, + mutated, + num_mutations + ); + break; + } + + if let Some(target) = input.choose_mut(rng) { + if let Some(log_str) = + numbers::helpers::mutate_integer_once(target, dictionary, &mut splice_with, rng) + { + mutation_log.push(log_str); + mutated += 1; + } + } + } + + if mutation_log.is_empty() { + None + } else { + Some(mutation_log) + } +} + +/// mutate a `Vec` where `T` is a primitive integer number. +pub fn mutate_number_vec( + input: &mut Vec, + dictionary: Option<&Vec>>, + mut splice_with: Option<&[T]>, + max_mutations: usize, + rng: &mut impl rand::Rng, +) -> Option> +where + rand::distributions::Standard: rand::distributions::Distribution, +{ + if max_mutations == 0 { + return None; + } + if input.is_empty() { + return None; + } + + let num_mutations = (rng.gen_range(1..=max_mutations)) as u32; + let mut mutated = 0_u32; + let mut tries = 0_u32; + let mut mutation_log = vec![]; + while mutated < num_mutations { + tries += 1; + if tries >= (max_mutations as u32) * 3 { + // input is probably not suitable for mutation (e.g., to small) - bail out. + log::debug!( + "attempted {} mutations, only {} succeeded, goal {}", + tries, + mutated, + num_mutations + ); + break; + } + + let choice = rng.gen_range(0_u32..10); + let log_str = match choice { + 0 => match splice_with.take() { + Some(other) => { + if let Some((dstr, srcr)) = splice_extend(input, other, rng) { + format!( + "SpliceBytesExtend_into_{}_{}_from_{}_{}", + dstr.start, dstr.end, srcr.start, srcr.end + ) + } else { + continue; + } + } + None => continue, + }, + 1 => { + match splice_with.take() { + Some(other) => { + if let Some((input_offset, other_offset, length)) = + splice_other_inplace(input, &other, rng) + { + format!("SpliceInplace_offset_{input_offset}_other_{other_offset}_len_{length}") + } else { + continue; + } + } + None => continue, + } + } + 2 => { + if let Some((src, dst, len)) = splice_within(input, rng) { + format!("SpliceWithin_srcoffset_{src:#x}_dstoffset_{dst:#x}_len_{len:#x}") + } else { + continue; + } + } + _ => { + if let Some(target) = input.choose_mut(rng) { + let mut splice_with = splice_with.map_or(None, |s| s.choose(rng)); + if let Some(log_str) = numbers::helpers::mutate_integer_once( + target, + dictionary, + &mut splice_with, + rng, + ) { + log_str + } else { + continue; + } + } else { + continue; + } + } + }; + + mutation_log.push(log_str); + mutated += 1; + } + + if mutation_log.is_empty() { + None + } else { + Some(mutation_log) + } +} diff --git a/src/mutators/helpers.rs b/src/mutators/helpers.rs new file mode 100644 index 0000000..314790f --- /dev/null +++ b/src/mutators/helpers.rs @@ -0,0 +1,141 @@ +//! Generic helper functions for mutation. Mostly splicing. + +/// helper to splice data within a vector. +#[inline] +pub fn splice_within>( + input: &mut T, + rng: &mut impl rand::Rng, +) -> Option<(isize, isize, usize)> { + let input = input.as_mut(); + if input.is_empty() { + return None; + } + + let src = rng.gen_range(0..input.len()) as isize; + let dst = rng.gen_range(0..input.len()) as isize; + + // Get the larger of the two positions + let largest = std::cmp::max(src, dst); + + // Get the maximum slice that is not out of bounds + let max_len: usize = input.len() - (largest as usize); + + // Randomly choose a length of slice to copy that is in bounds + let len = rng.gen_range(0..max_len); + + // Copy the slice internally. These buffers could overlap + // SAFETY: src and dst are within the bounds of input + unsafe { + std::ptr::copy( + input.as_ptr().offset(src), + input.as_mut_ptr().offset(dst), + len, + ); + } + + Some((src, dst, len)) +} + +/// Copy a random sub-slice from `src` into a random subslice of `dst`. +/// This will potentially grow or shrink the destination vector. +#[inline] +pub fn splice_extend( + dst: &mut Vec, + src: &[C], + rng: &mut impl rand::Rng, +) -> Option<(std::ops::Range, std::ops::Range)> { + if src.is_empty() { + return None; + } + + let src_start = rng.gen_range(0..src.len()); + let src_end = rng.gen_range(src_start..=src.len()); + if dst.is_empty() { + dst.extend_from_slice(&src[src_start..src_end]); + return Some((0..0, src_start..src_end)); + } + + let dst_start = rng.gen_range(0..dst.len()); + let dst_end = rng.gen_range(dst_start..=dst.len()); + + crate::utils::vec::splice_into(dst, dst_start..dst_end, &src[src_start..src_end]); + Some(((dst_start..dst_end), (src_start..src_end))) +} + +/// Copy a random sub-slice from `src` into a random subslice of `dst`. +/// This will potentially grow or shrink the destination vector. +/// This will call clone on every element. Use [`splice_extend`] if your type is `Copy` for better +/// performance. +#[inline] +pub fn splice_clone_extend( + dst: &mut Vec, + src: &[C], + rng: &mut impl rand::Rng, +) -> Option<(std::ops::Range, std::ops::Range)> { + if src.is_empty() { + return None; + } + + let src_start = rng.gen_range(0..src.len()); + let src_end = rng.gen_range(src_start..=src.len()); + if dst.is_empty() { + dst.extend( + src.iter() + .skip(src_start) + .take(src_end - src_start) + .cloned(), + ); + return Some((0..0, src_start..src_end)); + } + + let dst_start = rng.gen_range(0..dst.len()); + let dst_end = rng.gen_range(dst_start..=dst.len()); + + dst.splice( + dst_start..dst_end, + src.iter() + .skip(src_start) + .take(src_end - src_start) + .cloned(), + ); + Some(((dst_start..dst_end), (src_start..src_end))) +} + +/// Copy sub-slice from another slice into the current one. +/// +/// # returns +/// +/// `Some((input_offset, other_offset, length))` or `None` if not applicable. +#[inline] +pub fn splice_other_inplace, S: AsRef<[C]>>( + input: &mut T, + other: &S, + rng: &mut impl rand::Rng, +) -> Option<(usize, usize, usize)> { + let input = input.as_mut(); + let other = other.as_ref(); + if other.is_empty() || input.is_empty() || input.len() < 8 { + return None; + } + + let other_start = rng.gen_range(0..other.len()); // at least 0..1 -> no panic + let other_end = rng.gen_range(other_start..=other.len()); // at least 1..=1 -> no panic + + // skip splicing another small input + if (other_end - other_start) < 4 { + return None; + } + let splice_from = &other[other_start..other_end]; + if splice_from.len() >= input.len() { + return None; + } + + let splice_len = splice_from.len(); + let input_offset = rng.gen_range(0_usize..(input.len() - splice_len)); + + // Splice the found + input[input_offset..(input_offset + splice_len)].copy_from_slice(splice_from); + + // Output mutation + Some((input_offset, other_start, splice_len)) +} diff --git a/src/mutators/mod.rs b/src/mutators/mod.rs new file mode 100644 index 0000000..3a9bee8 --- /dev/null +++ b/src/mutators/mod.rs @@ -0,0 +1,7 @@ +//! Implementations of several common mutator functions + +pub mod bytes; +pub mod havoc; +pub mod text; +pub mod numbers; +pub mod helpers; diff --git a/src/mutators/numbers.rs b/src/mutators/numbers.rs new file mode 100644 index 0000000..c933db3 --- /dev/null +++ b/src/mutators/numbers.rs @@ -0,0 +1,261 @@ +//! mutation functions for numbers and arrays of numbers. + +use num_traits::{ + FromPrimitive, Num, PrimInt, WrappingAdd, WrappingMul, WrappingNeg, WrappingShl, WrappingShr, + WrappingSub, +}; + +/// Trait used to summarize needed supertraits for fuzzing primitive numbers +pub trait FuzzNum: + Copy + + Clone + + Sized + + Num + + WrappingAdd + + WrappingSub + + WrappingNeg + + WrappingMul + + WrappingShl + + WrappingShr + + PrimInt + + FromPrimitive +{ + /// create a number for a byteslice using little-endian byteorder + /// + /// ```rust + /// # use snapchange::mutators::numbers::FuzzNum; + /// + /// assert_eq!(0x12345678u32, u32::from_le_byteslice(&[0x78, 0x56, 0x34, 0x12])); + /// assert_eq!(0x12345678u32, u32::from_le_byteslice(&[0x78, 0x56, 0x34, 0x12, 0x00, 0x00])); + /// assert_eq!(0x12345678u64, u64::from_le_byteslice(&[0x78, 0x56, 0x34, 0x12, 0x00, 0x00])); + /// assert_eq!(0xff_u8, u8::from_le_byteslice(&[0xff, 0x00, 0x00])); + /// assert_eq!(0_u8, u8::from_le_byteslice(&[])); + /// assert_eq!(-1_i8, i8::from_le_byteslice(&[0xff, 0x00, 0x00])); + /// assert_eq!(-1_i16, i16::from_le_byteslice(&[0xff, 0xff])); + /// ``` + fn from_le_byteslice>(bytes: B) -> Self { + let mut res = Self::zero(); + let byte_count = res.count_zeros() / 8; + for (byte, byte_index) in bytes.as_ref().iter().copied().zip(0..byte_count) { + let shift_by = byte_index * 8; + res = res | Self::from_u8(byte).unwrap().wrapping_shl(shift_by) + } + res + } + + /// create a number for a byteslice using big-endian byteorder + /// + /// ```rust + /// # use snapchange::mutators::numbers::FuzzNum; + /// + /// assert_eq!(0x12345678u32, u32::from_be_byteslice(&[0x12, 0x34, 0x56, 0x78]), "u32 basic"); + /// assert_eq!(0x12345678u32, u32::from_be_byteslice(&[0x12, 0x34, 0x56, 0x78, 0x00, 0x00]), "u32 excess bytes"); + /// assert_eq!(0x12345678u64, u64::from_be_byteslice(&[0x00, 0x00, 0x12, 0x34, 0x56, 0x78]), "u64 lacking bytes"); + /// assert_eq!(-1_i16, i16::from_be_byteslice(&[0xff, 0xff]), "i16"); + /// assert_eq!(-1_i32, i32::from_be_byteslice(&[0xff, 0xff, 0xff, 0xff, 0x00]), "i32 excess bytes"); + /// ``` + fn from_be_byteslice>(bytes: B) -> Self { + let mut res = Self::zero(); + let bytes = bytes.as_ref(); + let byte_count = res.count_zeros() / 8; + let until = std::cmp::min(byte_count as usize, bytes.len()); + for (byte, byte_index) in bytes[0..until] + .iter() + .copied() + .zip((0..until).into_iter().rev()) + { + let shift_by = byte_index * 8; + res = res | Self::from_u8(byte).unwrap().wrapping_shl(shift_by as u32) + } + res + } +} + +impl FuzzNum for u8 { + fn from_le_byteslice>(bytes: B) -> Self { + let bytes = bytes.as_ref(); + if bytes.is_empty() { + 0u8 + } else { + bytes[0] + } + } + + fn from_be_byteslice>(bytes: B) -> Self { + Self::from_le_byteslice(bytes) + } +} +impl FuzzNum for u16 {} +impl FuzzNum for u32 {} +impl FuzzNum for u64 {} +impl FuzzNum for i8 { + fn from_le_byteslice>(bytes: B) -> Self { + let bytes = bytes.as_ref(); + if bytes.is_empty() { + 0i8 + } else { + bytes[0] as Self + } + } + + fn from_be_byteslice>(bytes: B) -> Self { + Self::from_le_byteslice(bytes) + } +} +impl FuzzNum for i16 {} +impl FuzzNum for i32 {} +impl FuzzNum for i64 {} + +/// helpers for the mutation functions +pub mod helpers { + use super::*; + use crate::mutators::bytes::helpers::{ + INTERESTING_U16, INTERESTING_U32, INTERESTING_U64, INTERESTING_U8, + }; + use rand::seq::{IteratorRandom, SliceRandom}; + + /// mutate a primitive integer number once + pub fn mutate_integer_once( + input: &mut T, + dictionary: Option<&Vec>>, + splice_with: &mut Option<&T>, + rng: &mut impl rand::Rng, + ) -> Option + where + rand::distributions::Standard: rand::distributions::Distribution, + { + let bytelen: usize = std::mem::size_of_val(input); + let bitlen: usize = bytelen * 8; + let choice = rng.gen_range(0..=15); + match choice { + 0 => { + let bit_offset = bitlen - 1; + *input = *input ^ (T::one() << bit_offset); + Some("NumBitFlip".to_string()) + } + 1 => { + *input = input.wrapping_add(&T::one()); + Some("NumAddOne".to_string()) + } + 2 => { + *input = input.wrapping_sub(&T::one()); + Some("NumSubOne".to_string()) + } + 3 => { + *input = input.wrapping_neg(); + Some("NumNeg".to_string()) + } + 4 => { + *input = input.wrapping_shl(1); + Some("NumShlOne".to_string()) + } + 5 => { + *input = input.wrapping_shr(1); + Some("NumShrOne".to_string()) + } + 6 => { + match bitlen { + 8 => *input = T::from_u8(*INTERESTING_U8.choose(rng).unwrap())?, + 16 => *input = T::from_u16(*INTERESTING_U16.choose(rng).unwrap())?, + 32 => *input = T::from_u32(*INTERESTING_U32.choose(rng).unwrap())?, + 64 => *input = T::from_u64(*INTERESTING_U64.choose(rng).unwrap())?, + _ => return None, + }; + Some("NumReplaceWithInteresting".to_string()) + } + 7 => { + match bitlen { + 8 => *input = *input ^ T::from_u8(*INTERESTING_U8.choose(rng).unwrap())?, + 16 => *input = *input ^ T::from_u16(*INTERESTING_U16.choose(rng).unwrap())?, + 32 => *input = *input ^ T::from_u32(*INTERESTING_U32.choose(rng).unwrap())?, + 64 => *input = *input ^ T::from_u64(*INTERESTING_U64.choose(rng).unwrap())?, + _ => return None, + }; + Some("NumXorWithInteresting".to_string()) + } + 8 => { + match bitlen { + 8 => { + *input = + input.wrapping_add(&(T::from_u8(*INTERESTING_U8.choose(rng).unwrap())?)) + } + 16 => { + *input = input + .wrapping_add(&(T::from_u16(*INTERESTING_U16.choose(rng).unwrap())?)) + } + 32 => { + *input = input + .wrapping_add(&(T::from_u32(*INTERESTING_U32.choose(rng).unwrap())?)) + } + 64 => { + *input = input + .wrapping_add(&(T::from_u64(*INTERESTING_U64.choose(rng).unwrap())?)) + } + _ => return None, + }; + Some("NumAddInteresting".to_string()) + } + 9 => { + let other: T = rng.gen(); + *input = input.wrapping_add(&other); + Some("NumAddRand".to_string()) + } + 10 => { + let other: T = rng.gen(); + *input = *input ^ other; + Some("NumXorRand".to_string()) + } + 11 => match splice_with.take() { + Some(&other) => { + *input = input.wrapping_add(&other); + Some("NumAddSplice".to_string()) + } + None => None, + }, + 12 => match splice_with.take() { + Some(&other) => { + *input = (*input) ^ other; + Some("NumXorSplice".to_string()) + } + None => None, + }, + 13 | 14 | 15 => match dictionary { + Some(dictionary) => { + if dictionary.is_empty() { + return None; + } + let other = dictionary + .iter() + .filter(|entry| entry.len() <= bytelen) + .choose(rng)?; + let mut other_bytes = vec![0u8; bytelen]; + let other: T = if rng.gen_bool(0.5) { + let start = bytelen - other.len(); + other_bytes[start..].copy_from_slice(&other); + T::from_be_byteslice(&other_bytes) + } else { + other_bytes[0..other.len()].copy_from_slice(&other); + T::from_le_byteslice(&other_bytes) + }; + match choice { + 13 => { + *input = other; + Some("NumReplDict".to_string()) + } + 14 => { + *input = input.wrapping_add(&other); + Some("NumAddDict".to_string()) + } + 15 => { + *input = *input ^ other; + Some("NumXorDict".to_string()) + } + _ => unreachable!(), + } + } + None => None, + }, + _ => unreachable!(), + } + } +} diff --git a/src/mutators/text/helpers.rs b/src/mutators/text/helpers.rs new file mode 100644 index 0000000..664d981 --- /dev/null +++ b/src/mutators/text/helpers.rs @@ -0,0 +1,464 @@ +use lazy_static::lazy_static; +use rand::distributions::{Alphanumeric, DistString}; +use rand::prelude::Distribution; +use rand::seq::{IteratorRandom, SliceRandom}; +use regex::bytes::Regex; + +use crate::mutators::bytes::helpers::INTERESTING_U64; +use crate::utils; + +lazy_static! { + pub static ref INTERESTING_INTEGERS: Vec = { + INTERESTING_U64 + .iter() + .copied() + .map(|i| format!("{}", i)) + .collect() + }; + pub static ref INTERESTING_HEX_INTEGERS: Vec = { + INTERESTING_U64 + .iter() + .copied() + .map(|i| format!("{:#x}", i)) + .collect() + }; + pub static ref INTEGER_REGEX: Regex = Regex::new(r"[^\d][\d]+[^\d]").unwrap(); + pub static ref HEX_INTEGER_REGEX: Regex = + Regex::new(r"[^\da-fA-F][\da-fA-F][^\da-fA-F]").unwrap(); +} + +#[derive(Hash, PartialEq, Debug, Copy, Clone)] +pub enum DelimiterDirection { + Forward, + Backward, +} + +pub fn other_delimiter(delim: u8) -> Option<(u8, DelimiterDirection)> { + match delim { + b'<' => Some((b'>', DelimiterDirection::Forward)), + b'>' => Some((b'<', DelimiterDirection::Backward)), + b'(' => Some((b')', DelimiterDirection::Forward)), + b')' => Some((b'(', DelimiterDirection::Backward)), + b'{' => Some((b'}', DelimiterDirection::Forward)), + b'}' => Some((b'{', DelimiterDirection::Backward)), + b'[' => Some((b']', DelimiterDirection::Forward)), + b']' => Some((b'[', DelimiterDirection::Backward)), + b'"' => Some((b'"', DelimiterDirection::Forward)), + b'\'' => Some((b'\'', DelimiterDirection::Forward)), + _ => None, + } +} + +// pub fn pseudo_parse(data: &[u8]) -> + +pub fn get_delimited_block( + _data: &[u8], + _rng: &mut impl rand::Rng, +) -> Option> { + todo!(); + + // None +} + +pub fn get_delimited_block_quickly( + _data: &[u8], + _rng: &mut impl rand::Rng, +) -> Option> { + todo!(); + // other.iter().copied(). + // None +} + +/// Generate a random ascii string with the exact length. +pub fn random_ascii_string_exact(rng: &mut impl rand::Rng, max_length: usize) -> String { + Alphanumeric.sample_string(rng, max_length) +} + +/// Generate a random ascii string with a length up to max_length. +pub fn random_ascii_string(rng: &mut impl rand::Rng, max_length: usize) -> String { + assert!(max_length > 1); + let start = if max_length > 5 { 5 } else { 1 }; + let size = rng.gen_range(start..max_length); + Alphanumeric.sample_string(rng, size) +} + +/// Identify an integer in the text input and replace it with the given replacement bytes. +pub fn replace_integer_with( + data: &mut Vec, + repl: &[u8], + rng: &mut impl rand::Rng, +) -> Option> { + if let Some(irange) = INTEGER_REGEX + .find_iter(&data) + .choose(rng) + .map(|m| (m.start() + 1)..m.end()) + { + utils::vec::splice_into(data, irange.clone(), repl); + Some(irange) + } else { + None + } +} + +/// Identify a hex integer in the text input and replace it with the given replacement bytes. +pub fn replace_hex_integer_with( + data: &mut Vec, + repl: &[u8], + rng: &mut impl rand::Rng, +) -> Option> { + if let Some(irange) = HEX_INTEGER_REGEX + .find_iter(&data) + .choose(rng) + .map(|m| m.range()) + { + utils::vec::splice_into(data, irange.clone(), repl); + Some(irange) + } else { + None + } +} + +/// Identify an integer and replace it with a random u64. +pub fn replace_integer_with_rand( + data: &mut Vec, + rng: &mut impl rand::Rng, +) -> Option> { + let i: u64 = rng.gen(); + let repl = format!("{}", i).into_bytes(); + replace_integer_with(data, &repl, rng) +} + +/// Identify a hex integer and replace it with a random u64. +pub fn replace_hex_integer_with_rand( + data: &mut Vec, + rng: &mut impl rand::Rng, +) -> Option> { + let i: u64 = rng.gen(); + let repl = format!("{:#x}", i).into_bytes(); + replace_integer_with(data, &repl, rng) +} + +/// Identify an integer and replace it with an interesting integer value. +/// See [`INTERESTING_U64`] for the set of values. +pub fn replace_integer_with_interesting( + data: &mut Vec, + rng: &mut impl rand::Rng, +) -> Option> { + let repl = INTERESTING_INTEGERS.choose(rng).unwrap().as_bytes(); + replace_integer_with(data, repl, rng) +} + +/// Identify a hex integer and replace it with an interesting integer value. +/// See [`INTERESTING_U64`] for the set of values. +pub fn replace_hex_integer_with_interesting( + data: &mut Vec, + rng: &mut impl rand::Rng, +) -> Option> { + let repl = INTERESTING_HEX_INTEGERS.choose(rng).unwrap().as_bytes(); + replace_integer_with(data, repl, rng) +} + +/// Replace a random char in the input - more likely with another ascii value. +pub fn char_replace>( + input: &mut T, + rng: &mut impl rand::Rng, +) -> Option<(usize, u8)> { + let input = input.as_mut(); + if input.is_empty() { + return None; + } + let idx = rng.gen_range(0..input.len()); + let ascii = rng.gen_bool(0.8); + let r: u8 = if ascii { + Alphanumeric.sample(rng) + } else { + rng.gen() + }; + input[idx] = r; + Some((idx, r)) +} + +/// Insert a random ascii string at random offset. +/// returns `(offset, inserted_len)`. +pub fn insert_random_string( + input: &mut Vec, + rng: &mut impl rand::Rng, +) -> Option<(usize, usize)> { + if input.is_empty() { + return None; + } + let s = random_ascii_string(rng, N); + let idx = rng.gen_range(0..=input.len()); + utils::vec::fast_insert_at(input, idx, s.as_bytes()); + Some((idx, s.len())) +} + +/// Insert up to N random ascii chars at a random offset. +pub fn insert_repeated_chars( + input: &mut Vec, + rng: &mut impl rand::Rng, +) -> Option<(usize, u8, usize)> { + if input.is_empty() { + return None; + } + + let count = rng.gen_range(0..N); + let c: u8 = Alphanumeric.sample(rng); + let data = [c; N]; + let idx = rng.gen_range(0..=input.len()); + utils::vec::fast_insert_at(input, idx, &data[..count]); + Some((idx, c, count)) +} + +/// Insert data after a separator and add another separator. +/// +/// ```rust,ignore +/// let mut v = b"asdf; asdf"; +/// insert_separated(v, ';', "XXXX", rng); +/// assert_eq!(v, b"asdf;XXXX; asdf"); +/// ``` +pub fn insert_separated( + input: &mut Vec, + sep: T, + other: &[u8], + rng: &mut impl rand::Rng, +) -> Option +where + T: TryInto, + >::Error: std::fmt::Debug, +{ + let sep: u8 = sep.try_into().unwrap(); + input.reserve(other.len() + 2); + + if input.is_empty() { + input.push(sep); + input.extend_from_slice(other); + input.push(sep); + return Some(0); + } + + // select a random occurence of the separator char or otherwise append. + if let Some(index) = input + .iter() + .copied() + .enumerate() + .filter_map(|(i, c)| if c == sep { Some(i) } else { None }) + .choose(rng) + { + // insert after the seperator and append another separator. + let sep = [sep]; + utils::vec::fast_insert_two_at(input, index + 1, other, &sep); + Some(index) + } else { + // insert (separator + other + separator) into the input + let l = input.len(); + // or append separator + other at the end of the testcase + input.push(sep); + input.extend_from_slice(other); + input.push(sep); + Some(l) + } +} + +/// Insert data after a separator. +/// +/// ```rust,ignore +/// let mut v = b"asdf; asdf"; +/// insert_separated(v, ';', "XXXX", rng); +/// assert_eq!(v, b"asdf;XXXX asdf"); +/// +/// let mut v = b"var asdf = \"asdf\";"; +/// insert_at_separator(v, '"', "XXXX", rng); +/// assert!(&v == b"var asdf = \"XXXXasdf\";" || &v == b"var asdf = \"asdf\"XXXX;"); +/// ``` +pub fn insert_after_separator( + input: &mut Vec, + sep: T, + other: &[u8], + rng: &mut impl rand::Rng, +) -> Option +where + T: TryInto, + >::Error: std::fmt::Debug, +{ + let sep: u8 = sep.try_into().unwrap(); + input.reserve(other.len() + 1); + + if input.is_empty() { + input.push(sep); + input.extend_from_slice(other); + return Some(0); + } + + // select a random occurence of the separator char or otherwise append. + if let Some(index) = input + .iter() + .copied() + .enumerate() + .filter_map(|(i, c)| if c == sep { Some(i) } else { None }) + .choose(rng) + { + // insert after the seperator + utils::vec::fast_insert_at(input, index + 1, other); + Some(index) + } else { + // or append separator + other at the end of the testcase + let l = input.len(); + input.push(sep); + input.extend_from_slice(other); + Some(l) + } +} + +/// Insert data before a separator. +/// +/// ```rust,ignore +/// let mut v = b"asdf; asdf"; +/// insert_separated(v, ';', "XXXX", rng); +/// assert_eq!(v, b"asdfXXXX; asdf"); +/// +/// let mut v = b"var asdf = \"asdf\";"; +/// insert_at_separator(v, '"', "XXXX", rng); +/// assert!(&v == b"var asdf = XXXX\"asdf\";" || &v == b"var asdf = \"asdfXXXX\";"); +/// ``` +pub fn insert_before_separator( + input: &mut Vec, + sep: T, + other: &[u8], + rng: &mut impl rand::Rng, +) -> Option +where + T: TryInto, + >::Error: std::fmt::Debug, +{ + let sep: u8 = sep.try_into().unwrap(); + input.reserve(other.len() + 1); + + if input.is_empty() { + input.push(sep); + input.extend_from_slice(other); + return Some(0); + } + + // select a random occurence of the separator char or otherwise append. + if let Some(index) = input + .iter() + .copied() + .enumerate() + .filter_map(|(i, c)| if c == sep { Some(i) } else { None }) + .choose(rng) + { + // insert after the seperator + utils::vec::fast_insert_at(input, index, other); + Some(index) + } else { + // or just append + let l = input.len(); + input.extend_from_slice(other); + input.push(sep); + Some(l) + } +} + +/// Delete data between two separators +/// Returns range of deleted data. +#[inline] +pub fn delete_between_separator( + input: &mut Vec, + sep: T, + rng: &mut impl rand::Rng, +) -> Option<(usize, usize)> +where + T: TryInto, + >::Error: std::fmt::Debug, +{ + let sep: u8 = sep.try_into().unwrap(); + if input.is_empty() { + return None; + } + + let sep_idx: Vec = input + .iter() + .copied() + .enumerate() + .filter_map(|(i, c)| if c == sep { Some(i) } else { None }) + .collect(); + if sep_idx.is_empty() { + return None; + } + let start = rng.gen_range(0_usize..sep_idx.len()); + let start_offset = sep_idx[start]; + let end_offset = if let Some(o) = sep_idx.get(start + 1) { + *o + } else { + input.len() + }; + + input.splice(start_offset..end_offset, []); + + Some((start_offset, end_offset)) +} + +/// Duplicate data between two separators +#[inline] +pub fn dup_between_separator( + input: &mut Vec, + sep: T, + rng: &mut impl rand::Rng, +) -> Option<(usize, usize)> +where + T: TryInto, + >::Error: std::fmt::Debug, +{ + let sep: u8 = sep.try_into().unwrap(); + if input.is_empty() { + return None; + } + + let sep_idx: Vec = input + .iter() + .copied() + .enumerate() + .filter_map(|(i, c)| if c == sep { Some(i) } else { None }) + .collect(); + if sep_idx.is_empty() { + return None; + } + let start = rng.gen_range(0_usize..sep_idx.len()); + let start_offset = sep_idx[start]; + let end_offset = if let Some(o) = sep_idx.get(start + 1) { + *o + } else { + input.push(sep); + input.len() + }; + + // let src = &input[start_offset..end_offset]; + // input.splice(end_offset..end_offset, src.iter().copied()); + crate::utils::vec::insert_from_within(input, end_offset, start_offset..end_offset); + + Some((start_offset, end_offset)) +} + +/// Insert from dictionary at a given separator. +/// returns `(dict_index, offset_in_input)`. +pub fn insert_from_dictionary_at_const_separator( + input: &mut Vec, + rng: &mut impl rand::Rng, + dictionary: Option<&Vec>>, +) -> Option<(usize, usize)> { + if let Some(dictionary) = dictionary { + if !dictionary.is_empty() { + let sep: u8 = C.try_into().unwrap(); + // select another corpus item + let dict_idx = rng.gen_range(0..dictionary.len()); + let other = &dictionary[dict_idx]; + + let offset = insert_after_separator(input, sep, other, rng)?; + + return Some((dict_idx, offset)); + } + } + + None +} diff --git a/src/mutators/text/mod.rs b/src/mutators/text/mod.rs new file mode 100644 index 0000000..e4ea0f7 --- /dev/null +++ b/src/mutators/text/mod.rs @@ -0,0 +1,273 @@ +//! Text mutators. + +#![allow(missing_docs)] +#![allow(dead_code)] + +use std::sync::Arc; + +use rand::seq::SliceRandom; +use rand::Rng as _; + +use crate::fuzz_input::InputWithMetadata; +use crate::input_types::TextInput; +use crate::rng::Rng; +use crate::utils; + +pub mod helpers; + +/// Replace a random byte in the input. +pub fn char_replace( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + let (idx, r) = helpers::char_replace(&mut input.data, rng)?; + Some(format!("ByteReplace_offset_{idx}_val_{r:x}")) +} + +/// Insert a random string. +pub fn insert_random_string( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + assert!(!input.is_empty()); + let s = helpers::random_ascii_string(rng, N); + let idx = rng.gen_range(0..=input.len()); + utils::vec::fast_insert_at(input.data_mut(), idx, s.as_bytes()); + Some(format!("InsertRandom_offset_{}_len_{}", idx, s.len())) +} + +/// Insert repeated a randomly chosen repeated char (up to N times). +pub fn insert_repeated_chars( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + let (offset, c, len) = helpers::insert_repeated_chars::(input.data_mut(), rng)?; + Some(format!("InsertRepeated_{c:x}_len_{len}_offset_{offset}")) +} + +/// Insert another corpus entry as a whole into the current input at a given separator. +pub fn insert_from_corpus_separated_by( + input: &mut TextInput, + corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + if corpus.len() < 2 { + return None; + } + // select another corpus item != the current one - we require that there are at least two corpus + // entries so we know this doesn't loop endlessly. + let other = loop { + let other = corpus.choose(rng).unwrap(); + if other.data().as_ptr() != input.data().as_ptr() { + break other; + } + }; + + let index = helpers::insert_separated(input.data_mut(), C, &other.data(), rng)?; + + Some(format!("InsertFromCorpus_at_{index}")) +} + +/// Insert from dictionary at a given separator. +pub fn insert_from_dictionary_separated_by( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + dictionary: &Option>>, +) -> Option { + if let Some((dict_idx, offset)) = helpers::insert_from_dictionary_at_const_separator::( + input.data_mut(), + rng, + dictionary.as_ref(), + ) { + let sep: u8 = C.try_into().unwrap(); + return Some(format!( + "InsertFromDictAtSep_{sep:x}_dict_{dict_idx}_offset_{offset}" + )); + } + + None +} + +/// Insert random dictionary entry into the text input. +pub fn insert_from_dictionary( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + dictionary: &Option>>, +) -> Option { + if let Some(dictionary) = dictionary { + if !dictionary.is_empty() { + let dict_idx = rng.gen_range(0..dictionary.len()); + let entry = &dictionary[dict_idx]; + let index = rng.gen_range(0..input.len()); + utils::vec::fast_insert_at(input.data_mut(), index, &entry[..]); + // TODO: add mutation log + return Some(format!("InsertFromDict_{dict_idx}_offset_{index}")); + } + } + None +} + +/// Splice random dictionary entry into the text input. +pub fn splice_from_dictionary( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + dictionary: &Option>>, +) -> Option { + if let Some(dictionary) = dictionary { + if !dictionary.is_empty() { + let dict_idx = rng.gen_range(0..dictionary.len()); + let entry = &dictionary[dict_idx]; + let (dstr, srcr) = + crate::mutators::bytes::helpers::splice_bytes_extend(input.data_mut(), entry, rng)?; + return Some(format!( + "SpliceFromDictionaryExtend_into_{}_{}_from_{}_{}_{}", + dstr.start, dstr.end, dict_idx, srcr.start, srcr.end + )); + } + } + None +} + +/// Identify and replace a integer in the text input with an interesting or random integer. +pub fn replace_integer( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + if rng.gen_bool(0.75) { + helpers::replace_integer_with_interesting(input.data_mut(), rng); + Some("ReplaceIntegerWithInteresting".to_string()) + } else { + helpers::replace_integer_with_rand(input.data_mut(), rng); + Some("ReplaceIntegerWithRand".to_string()) + } +} + +/// Identify and replace a hex integer in the text input with an interesting or random integer. +pub fn replace_hex_integer( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + if rng.gen_bool(0.75) { + helpers::replace_hex_integer_with_interesting(input.data_mut(), rng); + Some("ReplaceHexIntegerWithInteresting".to_string()) + } else { + helpers::replace_hex_integer_with_rand(input.data_mut(), rng); + Some("ReplaceHexIntegerWithRand".to_string()) + } +} + +/// Copy random data inside of the input to another place in the input. +pub fn splice_within( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + let (src, dst, len) = crate::mutators::bytes::helpers::splice_within(input.data_mut(), rng)?; + Some(format!( + "SpliceWithin_srcoffset_{src:#x}_dstoffset_{dst:#x}_len_{len:#x}" + )) +} + +/// duplicate content found between a separator +pub fn dup_between_separator( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + let (start, end) = helpers::dup_between_separator(input.data_mut(), C, rng)?; + let sep: u8 = C.try_into().unwrap(); + Some(format!("DupBetween_{sep:x}_{start}_{end}")) +} + +/// remove content between a separator +pub fn delete_between_separator( + input: &mut TextInput, + _corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + let (start, end) = helpers::dup_between_separator(input.data_mut(), C, rng)?; + let sep: u8 = C.try_into().unwrap(); + Some(format!("DeleteBetween_{sep:x}_{start}_{end}")) +} + +/// Treat text as bytes input and apply a bunch of mutations like it were a binary format. +pub fn havoc_as_bytes( + input: &mut TextInput, + corpus: &[Arc>], + rng: &mut Rng, + dictionary: &Option>>, +) -> Option { + let max_mutations = 16; + // find a second testcase except for the current one. + let splice_with = if corpus.len() >= 2 { + loop { + let other = corpus.choose(rng).unwrap(); + if other.data().as_ptr() != input.data.as_ptr() { + break Some(other.data()); + } + } + } else { + None + }; + let res = crate::mutators::havoc::mutate_vec( + input.data_mut(), + dictionary.as_ref(), + splice_with, + max_mutations, + rng, + )?; + let mut log = "HavocAsBytes_".to_string(); + for mutation in res.into_iter() { + log.push_str(mutation.as_str()); + log.push('_'); + } + Some(log) +} + +// pub fn mutate_delimited_data( +// input: &mut String, +// _corpus: &[String], +// rng: &mut Rng, +// _dictionary: &Option>>, +// ) -> Option { +// let params = input +// .char_indices() +// .filter_map(|(i, c)| { +// if let Some((delim, direction)) = other_delimiter(c) { +// Some((i, c, delim, direction)) +// } else { +// None +// } +// }) +// .choose(rng); +// if let Some((index, first, second, direction)) = params { +// // let's find the second delimiter +// let (start_idx, end_idx) = if direction == DelimiterDirection::Forward { +// unimplemented!(); +// } else { +// unimplemented!(); +// }; +// +// // TODO: add mutation log +// None +// } else { +// None +// } +// } diff --git a/src/utils.rs b/src/utils.rs index c8c966c..bb2fe85 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -238,9 +238,9 @@ pub mod vec { } } - /// Overwrite a sub-slice from `&dst[dst_range]` with a subslice `&src[src_range]`. The size of the subslices must not be - /// identical. If `&src[src_range]` is larger than `&dst[dst_range]`, then `dst` is grown. - /// If `&src[src_range]` is smaller than `&dst[dst_range]`. This helper function is useful, if both + /// Overwrite a sub-slice from `&dst[dst_range]` with a subslice `&src[src_range]`. The size of the subslices can be different. + /// If `&src[src_range]` is larger than `&dst[dst_range]`, then `dst` is grown. + /// If `&src[src_range]` is smaller than `&dst[dst_range]` then `dst` is shrunk. This helper function is useful, if both /// sub-slice ranges are generated randomly while fuzzing and you do not want to worry about what /// Vec operation to use. /// @@ -371,6 +371,104 @@ pub mod vec { dst.set_len(new_len as usize); } } + + /// Insert new data at a given index, with the data being another sub-slice of the `dst` vec. + /// + /// This boils down to a: + /// + /// * Optional: vec growth + /// * `memmove` + /// * `memcpy` + /// + /// # Panics: + /// + /// * if `index` is out of bounds + /// * if `src_range` is out of bounds + /// * if `index` is within `src_range` + /// + /// # Examples: + /// + /// ```rust + /// # use snapchange::utils::vec::insert_from_within; + /// + /// let mut v = b"abcd".to_vec(); + /// insert_from_within(&mut v, 0, 2..4); + /// assert_eq!(&v, b"cdabcd", "basic test"); + /// + /// let mut v = b"abcd".to_vec(); + /// insert_from_within(&mut v, 1, 2..4); + /// assert_eq!(&v, b"acdbcd", "basic test 2"); + /// + /// let mut v = b"abcd".to_vec(); + /// insert_from_within(&mut v, 0, 2..); + /// assert_eq!(&v, b"cdabcd", "unbounded range"); + /// + /// let mut v = b"abcd".to_vec(); + /// insert_from_within(&mut v, 0, 0..4); + /// assert_eq!(&v, b"abcdabcd", "extend front"); + /// + /// let mut v = b"abcd".to_vec(); + /// insert_from_within(&mut v, 4, 0..4); + /// assert_eq!(&v, b"abcdabcd", "append"); + /// + /// let mut v = b"abcd".to_vec(); + /// insert_from_within(&mut v, 2, 0..0); + /// assert_eq!(&v, b"abcd", "empty range"); + /// ``` + pub fn insert_from_within>( + dst: &mut Vec, + index: usize, + src_range: R, + ) { + // check for index OOB + assert!(index <= dst.len()); + // Deal with generic RangeBounds. + let src_start = match src_range.start_bound() { + std::ops::Bound::Unbounded => 0, + std::ops::Bound::Included(t) => *t, + // I don't think there is a range with Excluded start bound? + std::ops::Bound::Excluded(_t) => unreachable!(), + } + .min(dst.len()); + let src_end = match src_range.end_bound() { + std::ops::Bound::Unbounded => dst.len(), + std::ops::Bound::Excluded(t) => *t, + std::ops::Bound::Included(t) => *t + 1, + } + .min(dst.len()); + if src_start == src_end { // no-op + return; + } + assert!(src_start < src_end); + assert!(index <= src_start || index >= src_end); + + let copy_len = src_end - src_start; + let src_start = if index <= src_start { + // adjust start offset + src_start + copy_len + } else { + src_start + }; + + // make sure there is enough + dst.reserve(copy_len); + let old_len = dst.len(); + let new_len = dst.len() + copy_len; + + // SAFETY: we reserved enough space to stay within bounds. + unsafe { + // move data back to make space. + let dst_ptr = dst.as_mut_ptr().offset(index as isize).offset(copy_len as isize); + let src_ptr = dst.as_ptr().offset(index as isize); + std::ptr::copy(src_ptr, dst_ptr, old_len - index); + // copy the data within. + let dst_ptr = dst.as_mut_ptr().offset(index as isize); + let src_ptr = dst.as_ptr().offset(src_start as isize); + std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, copy_len); + // set the Vec's length + dst.set_len(new_len); + } + } } /// Prints a hexdump representation of the given `data` assuming the data starts at @@ -379,11 +477,11 @@ pub mod vec { /// Example: /// /// ```rust -/// #use snapchange::utils::hexdump; -/// hexdump([0x41, 0x42, 0x43, 0x44], 0xdead0000) +/// # use snapchange::utils::hexdump; +/// hexdump(&[0x41, 0x42, 0x43, 0x44], 0xdead0000); /// ```` /// Output: -/// ```non-rust +/// ```non-rust,ignore /// 0xdead0000: 41 42 43 44 | ABCD /// ``` /// @@ -579,7 +677,7 @@ pub enum Error { /// /// Examples: /// -/// ``` +/// ```non-rust,ignore /// deadbeef /// 0xdeadbeef /// main From b7a76e8fc992879815126fe5c20e9026256797ca Mon Sep 17 00:00:00 2001 From: Michael Rodler Date: Tue, 6 Feb 2024 22:12:32 +0000 Subject: [PATCH 2/6] Added example 08 - a mujs fuzzing harness using the new `TextInput` data. Fixed some issues with the text-based mutator code along the way. * The new example has two artificial vulnerabilities that can be triggered using the right arguments to a JS function. * Added some tweaks to the text-based mutations. * In order to avoid breakpoints on longjump and similar functions, we need to properly detach gdb. This requires a pattern of ``` __asm("int3"); sleep(1); __asm("vmcall"); ``` and also the `SNAPSHOT_GDB_MODE="detach"` definition. To allow gdbsnapshot.py to do its thing, but then also to detach gdb before the snapshot is triggered. --- docker/utils/build.sh | 2 + examples/08_textinput_mujs/.dockerignore | 1 + examples/08_textinput_mujs/.gitignore | 23 +++ .../.solutions/magic_crash.js | 2 + .../.solutions/party_crash.js | 1 + examples/08_textinput_mujs/Cargo.toml | 25 +++ examples/08_textinput_mujs/Dockerfile | 25 +++ examples/08_textinput_mujs/Makefile | 64 ++++++ examples/08_textinput_mujs/README.md | 5 + examples/08_textinput_mujs/build.rs | 30 +++ examples/08_textinput_mujs/dict.txt | 125 ++++++++++++ examples/08_textinput_mujs/harness/.gitignore | 5 + examples/08_textinput_mujs/harness/Makefile | 17 ++ examples/08_textinput_mujs/harness/main.c | 184 ++++++++++++++++++ examples/08_textinput_mujs/input/magic.js | 2 + examples/08_textinput_mujs/input/party.js | 3 + examples/08_textinput_mujs/make_dict.py | 18 ++ examples/08_textinput_mujs/src/fuzzer.rs | 57 ++++++ examples/08_textinput_mujs/src/main.rs | 8 + examples/08_textinput_mujs/test.sh | 26 +++ src/commands/fuzz.rs | 8 + src/fuzzvm.rs | 4 +- src/input_types/text.rs | 32 +-- src/mutators/text/helpers.rs | 3 +- src/mutators/text/mod.rs | 15 +- 25 files changed, 663 insertions(+), 22 deletions(-) create mode 120000 examples/08_textinput_mujs/.dockerignore create mode 100644 examples/08_textinput_mujs/.gitignore create mode 100644 examples/08_textinput_mujs/.solutions/magic_crash.js create mode 100644 examples/08_textinput_mujs/.solutions/party_crash.js create mode 100644 examples/08_textinput_mujs/Cargo.toml create mode 100644 examples/08_textinput_mujs/Dockerfile create mode 100644 examples/08_textinput_mujs/Makefile create mode 100644 examples/08_textinput_mujs/README.md create mode 100644 examples/08_textinput_mujs/build.rs create mode 100644 examples/08_textinput_mujs/dict.txt create mode 100644 examples/08_textinput_mujs/harness/.gitignore create mode 100644 examples/08_textinput_mujs/harness/Makefile create mode 100644 examples/08_textinput_mujs/harness/main.c create mode 100644 examples/08_textinput_mujs/input/magic.js create mode 100644 examples/08_textinput_mujs/input/party.js create mode 100644 examples/08_textinput_mujs/make_dict.py create mode 100644 examples/08_textinput_mujs/src/fuzzer.rs create mode 100644 examples/08_textinput_mujs/src/main.rs create mode 100755 examples/08_textinput_mujs/test.sh diff --git a/docker/utils/build.sh b/docker/utils/build.sh index 717c192..8185639 100755 --- a/docker/utils/build.sh +++ b/docker/utils/build.sh @@ -315,6 +315,8 @@ echo "" >> $RC_LOCAL echo "echo [+] waiting for processes to finish" >> $RC_LOCAL echo "wait" >> $RC_LOCAL +echo "sleep 5" >> $RC_LOCAL +echo "wait" >> $RC_LOCAL # Add a newline echo "" >> $RC_LOCAL diff --git a/examples/08_textinput_mujs/.dockerignore b/examples/08_textinput_mujs/.dockerignore new file mode 120000 index 0000000..3e4e48b --- /dev/null +++ b/examples/08_textinput_mujs/.dockerignore @@ -0,0 +1 @@ +.gitignore \ No newline at end of file diff --git a/examples/08_textinput_mujs/.gitignore b/examples/08_textinput_mujs/.gitignore new file mode 100644 index 0000000..dd0cd0d --- /dev/null +++ b/examples/08_textinput_mujs/.gitignore @@ -0,0 +1,23 @@ +# snapshot-specific +snapshot/ +snapshot_image +src/constants.rs +dict/ + +# rust build artifacts +debug/ +target/ +Cargo.lock +**/*.rs.bk +*.pdb + +# built harness binaries +harness/mujs_harness +harness/mujs-* + +# other misc data +perf.data* +flamegraph.svg +strace.log +fuzz*.log +*.log diff --git a/examples/08_textinput_mujs/.solutions/magic_crash.js b/examples/08_textinput_mujs/.solutions/magic_crash.js new file mode 100644 index 0000000..7daca57 --- /dev/null +++ b/examples/08_textinput_mujs/.solutions/magic_crash.js @@ -0,0 +1,2 @@ +var r = magic(-10000000); +console.log(r); diff --git a/examples/08_textinput_mujs/.solutions/party_crash.js b/examples/08_textinput_mujs/.solutions/party_crash.js new file mode 100644 index 0000000..4d0b340 --- /dev/null +++ b/examples/08_textinput_mujs/.solutions/party_crash.js @@ -0,0 +1 @@ +party(1, 1, 1, 40); diff --git a/examples/08_textinput_mujs/Cargo.toml b/examples/08_textinput_mujs/Cargo.toml new file mode 100644 index 0000000..903ba82 --- /dev/null +++ b/examples/08_textinput_mujs/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "mujs_fuzzer" +version = "0.1.0" +edition = "2021" +exclude = ["qemu_snapshot", "snapshot"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +snapchange = { path = "../../" } +log = "0.4" + +[build-dependencies] +regex = "1" + +[features] +default = [] +redqueen = ["snapchange/redqueen"] + +[profile.release] +panic = "abort" +lto = true +codegen-units = 1 +opt-level = 3 +debug = true diff --git a/examples/08_textinput_mujs/Dockerfile b/examples/08_textinput_mujs/Dockerfile new file mode 100644 index 0000000..cc92391 --- /dev/null +++ b/examples/08_textinput_mujs/Dockerfile @@ -0,0 +1,25 @@ +# build environment -- feel free to adapt to your needs. + +FROM alpine:edge as base + +RUN apk --no-cache upgrade \ + && apk add --no-cache --initramfs-diskless-boot \ + python3 gdb musl-dbg \ + curl tar \ + build-base perf \ + clang lld compiler-rt + +COPY ./harness/ /opt/ +RUN ls -alR /opt/ +RUN make -C /opt/ + +#### switch to snapchang container #### +FROM ghcr.io/awslabs/snapchange + +# copy whole root filesystem from build environment to this container layer +# SNAPSHOT_INPUT is the location that the snapshotting script will create. +COPY --from=base / "$SNAPSHOT_INPUT" + +# set the path to the target within the root filesystem of the build environment +ENV SNAPSHOT_ENTRYPOINT="/opt/mujs_harness" +ENV SNAPSHOT_GDB_MODE="detach" diff --git a/examples/08_textinput_mujs/Makefile b/examples/08_textinput_mujs/Makefile new file mode 100644 index 0000000..d5df9e4 --- /dev/null +++ b/examples/08_textinput_mujs/Makefile @@ -0,0 +1,64 @@ +DOCKER ?= docker +FUZZ_CORES ?= /2 + +DOCKER_IMAGE_NAME ?= snapchange_example8 + +all: base_images test + +base_images: + # Build the base snapchange image used for snapshotting + $(MAKE) -C ../../docker + +snapshot_image: Dockerfile harness/main.c harness/Makefile +ifeq ($(VERBOSE),1) + $(DOCKER) build -t $(DOCKER_IMAGE_NAME):snapshot . -f $< +endif + $(DOCKER) build -q -t $(DOCKER_IMAGE_NAME):snapshot . -f $< > $@ + +snapshot: snapshot_image +ifeq ($(VERBOSE),1) + $(DOCKER) run --rm -i \ + -v $(shell realpath -m ./snapshot):/snapshot \ + -e SNAPSHOT_IMGTYPE=initramfs \ + $(shell cat $<) +else + $(DOCKER) run --rm -i \ + -v $(shell realpath -m ./snapshot):/snapshot \ + -e SNAPSHOT_IMGTYPE=initramfs \ + $(shell cat $<) \ + >/dev/null 2>&1 +endif + cd snapshot; if ! test -L input; then rm -rf input || true; ln -s ../input; fi + cd snapshot; if ! test -L dict; then rm -rf dict || true; ln -s ../dict/; fi + +dict: dict.txt + -mkdir -p dict + python make_dict.py $< + echo "magic" > dict/magic + echo "party" > dict/party + +fuzzer: dict + cargo build -r + +fuzz: snapshot + cargo run -r -- -p ./snapshot fuzz -c $(FUZZ_CORES) + +fuzz-%: snapshot + cargo run -r -- -p ./snapshot fuzz -c $(FUZZ_CORES) --stop-after-time $(shell echo $@ | sed 's/fuzz-//g')m +# .PHONY: fuzz-1 fuzz-2 fuzz-3 fuzz-4 fuzz-5 + +test: snapshot fuzzer reset + ./test.sh + +reset: snapshot + cd snapshot && ./reset.sh + +clean: clean-docker + -$(RM) -rf snapshot target + +clean-docker: + -$(DOCKER) rmi `cat ./snapshot_image` + -$(DOCKER) rmi $(DOCKER_IMAGE_NAME):snapshot + -$(RM) snapshot_image + +.PHONY: fuzzer all base_images test reset fuzz diff --git a/examples/08_textinput_mujs/README.md b/examples/08_textinput_mujs/README.md new file mode 100644 index 0000000..84122ed --- /dev/null +++ b/examples/08_textinput_mujs/README.md @@ -0,0 +1,5 @@ +# Tutorial 8 - Using the text input type + +This fuzzer uses the `TextInput` type to introduce more useful mutations for +common text-based formats such as programming or markup languages. + diff --git a/examples/08_textinput_mujs/build.rs b/examples/08_textinput_mujs/build.rs new file mode 100644 index 0000000..a8659cf --- /dev/null +++ b/examples/08_textinput_mujs/build.rs @@ -0,0 +1,30 @@ +use regex::Regex; +use std::fs; +use std::fs::File; +use std::io::Write; + +fn main() { + println!("cargo:rerun-if-changed=snapshot/fuzzvm.qemuregs"); + println!("cargo:rerun-if-changed=snapshot/vm.log"); + + let qemuregs = fs::read_to_string("./snapshot/fuzzvm.qemuregs").unwrap(); + let mut w = File::create("src/constants.rs").unwrap(); + + writeln!(w, "#![allow(unused)]").unwrap(); + + let re = Regex::new(r"CR3=([0-9A-Fa-f]+)").unwrap(); + let captures = re.captures(&qemuregs).unwrap(); + let cr3 = &captures.get(1).unwrap().as_str(); + writeln!(w, "pub const CR3: u64 = 0x{};", cr3).unwrap(); + + let re = Regex::new(r"RIP=([0-9A-Fa-f]+)").unwrap(); + let captures = re.captures(&qemuregs).unwrap(); + let rip = &captures.get(1).unwrap().as_str(); + writeln!(w, "pub const RIP: u64 = 0x{};", rip).unwrap(); + + let vmlog = fs::read_to_string("./snapshot/vm.log").unwrap(); + let re = Regex::new(r"SNAPSHOT buffer: (0x[0-9A-Fa-f]+)").unwrap(); + let captures = re.captures(&vmlog).unwrap(); + let input_addr = &captures.get(1).unwrap().as_str(); + writeln!(w, "pub const INPUT: u64 = {};", input_addr).unwrap(); +} diff --git a/examples/08_textinput_mujs/dict.txt b/examples/08_textinput_mujs/dict.txt new file mode 100644 index 0000000..2551fbe --- /dev/null +++ b/examples/08_textinput_mujs/dict.txt @@ -0,0 +1,125 @@ +abstract +arguments +await +boolean +break +byte +case +catch +char +class +const +continue +debugger +default +delete +do +double +else +enum +eval +export +extends +false +final +finally +float +for +function +goto +if +implements +import +in +instanceof +int +interface +let +long +native +new +null +package +private +protected +public +return +short +static +super +switch +synchronized +this +throw +throws +transient +true +try +typeof +var +void +volatile +while +with +yield + +Array +Date +eval +function +hasOwnProperty +Infinity +isFinite +isNaN +isPrototypeOf +length +Math +NaN +name +Number +Object +prototype +String +toString +undefined +valueOf + ++ +- +* +** +/ +% +++ +-- +== +=== +< +> +>= +<= += ++= +-= +*= +/= +%= +**= +<<= +>>= +& +&= +; + +() +[0] +[1] + +Math.pow + +0.0 +1e10 +-1 +-254 +-255 + diff --git a/examples/08_textinput_mujs/harness/.gitignore b/examples/08_textinput_mujs/harness/.gitignore new file mode 100644 index 0000000..ba525b9 --- /dev/null +++ b/examples/08_textinput_mujs/harness/.gitignore @@ -0,0 +1,5 @@ +mujs-1.3.4.tar.gz +mujs-1.3.4 +mujs_harness +.cache +compile_commands.json diff --git a/examples/08_textinput_mujs/harness/Makefile b/examples/08_textinput_mujs/harness/Makefile new file mode 100644 index 0000000..633fbb9 --- /dev/null +++ b/examples/08_textinput_mujs/harness/Makefile @@ -0,0 +1,17 @@ +ifeq ($(origin CC),default) +CC = clang +endif +# CFLAGS ?= -O3 -ggdb -fuse-ld=lld -fsanitize=address -fno-omit-frame-pointer -flto +CFLAGS ?= -O3 -ggdb -fuse-ld=lld -fno-omit-frame-pointer -flto + +mujs_harness: main.c mujs-1.3.4 Makefile + $(CC) $(CFLAGS) -o $@ $< ./mujs-1.3.4/one.c -I./mujs-1.3.4/ -lm + +mujs_harness.asan: main.c mujs-1.3.4 Makefile + $(CC) $(CFLAGS) -fsanitize=address -o $@ $< ./mujs-1.3.4/one.c -I./mujs-1.3.4/ -lm + +clean: + -$(RM) mujs_harness + +mujs-1.3.4: + curl -q https://mujs.com/downloads/mujs-1.3.4.tar.gz | tar xz diff --git a/examples/08_textinput_mujs/harness/main.c b/examples/08_textinput_mujs/harness/main.c new file mode 100644 index 0000000..2c09c94 --- /dev/null +++ b/examples/08_textinput_mujs/harness/main.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include +#include + +#include "mujs.h" + +static void jsB_gc(js_State *J) { + int report = js_toboolean(J, 1); + js_gc(J, report); + js_pushundefined(J); +} + +static void jsB_print(js_State *J) { + int i, top = js_gettop(J); + for (i = 1; i < top; ++i) { + const char *s = js_tostring(J, i); + if (i > 1) + putchar(' '); + fputs(s, stdout); + } + putchar('\n'); + js_pushundefined(J); +} + +int64_t magic_values[] = {0xdeadbeef, 0xcafecafe, 0x42424242, 0x41414141}; + +static void magic(js_State *J) { + if (js_isnumber(J, 1)) { + double idx = js_tonumber(J, 1); + if (idx < 4) { + js_pushnumber(J, (double)magic_values[(int)idx]); + return; + } + } + js_pushnumber(J, 0); +} + +int party_counter = 0; + +static void party(js_State *J) { + int i, top = js_gettop(J); + for (i = 1; i < top; ++i) { + if (js_isnumber(J, i)) { + int val = (int)js_tonumber(J, i); + party_counter += val; + if (party_counter > 1000) { + party_counter = 0; + } + if (party_counter > 42) { + abort(); + } + } else { + goto err; + } + } + +good: + js_pushboolean(J, 1); + return; + +err: + js_pushboolean(J, 0); +} + +static void jsB_quit(js_State *J) { exit(js_tonumber(J, 1)); } + +static void jsB_repr(js_State *J) { js_repr(J, 1); } + +static const char *require_js = + "function require(name) {\n" + "var cache = require.cache;\n" + "if (name in cache) return cache[name];\n" + "var exports = {};\n" + "cache[name] = exports;\n" + "Function('exports', read(name+'.js'))(exports);\n" + "return exports;\n" + "}\n" + "require.cache = Object.create(null);\n"; + +static const char *stacktrace_js = + "Error.prototype.toString = function() {\n" + "var s = this.name;\n" + "if ('message' in this) s += ': ' + this.message;\n" + "if ('stackTrace' in this) s += this.stackTrace;\n" + "return s;\n" + "};\n"; + +static const char *console_js = + "var console = { log: print, debug: print, warn: print, error: print };"; + +static char *read_stdin(void) { + int n = 0; + int t = 512; + char *s = NULL; + + for (;;) { + char *ss = realloc(s, t); + if (!ss) { + free(s); + fprintf(stderr, "cannot allocate storage for stdin contents\n"); + return NULL; + } + s = ss; + n += fread(s + n, 1, t - n - 1, stdin); + if (n < t - 1) + break; + t *= 2; + } + + if (ferror(stdin)) { + free(s); + fprintf(stderr, "error reading stdin\n"); + return NULL; + } + + s[n] = 0; + return s; +} + +int main(int argc, char **argv) { + char *input = NULL; + js_State *J = NULL; + int status = 0; + int i = 0; + int c = 0; + + J = js_newstate(NULL, NULL, 0); + if (!J) { + fprintf(stderr, "Could not initialize MuJS.\n"); + exit(-1); + } + + js_newcfunction(J, jsB_gc, "gc", 0); + js_setglobal(J, "gc"); + + js_newcfunction(J, jsB_print, "print", 0); + js_setglobal(J, "print"); + + js_newcfunction(J, jsB_repr, "repr", 0); + js_setglobal(J, "repr"); + + js_dostring(J, stacktrace_js); + js_dostring(J, console_js); + + // intentially vulnerable function here to test the fuzzer. + js_newcfunction(J, magic, "magic", 0); + js_setglobal(J, "magic"); + js_newcfunction(J, party, "party", 0); + js_setglobal(J, "party"); + + if (getenv("SNAPCHANGE_SNAPSHOT") != NULL) { + input = malloc(0x4000); + memset(input, 0, 0x4000); + printf("SNAPSHOT buffer: %p\n", input); + + // Ensure the stdout has been flushed + fflush(stdout); + fflush(stderr); + + // Snapshot taken here + __asm("int3"); + sleep(1); + __asm("vmcall"); + + } else { + input = read_stdin(); + } + + // JS exec + if (input == NULL) { + status = 1; + } else { + status = js_dostring(J, input); + } + free(input); + + js_gc(J, 0); + js_freestate(J); + + exit(status); +} diff --git a/examples/08_textinput_mujs/input/magic.js b/examples/08_textinput_mujs/input/magic.js new file mode 100644 index 0000000..a5d17bf --- /dev/null +++ b/examples/08_textinput_mujs/input/magic.js @@ -0,0 +1,2 @@ +var r = magic(0); +console.log(r); diff --git a/examples/08_textinput_mujs/input/party.js b/examples/08_textinput_mujs/input/party.js new file mode 100644 index 0000000..2fc8be1 --- /dev/null +++ b/examples/08_textinput_mujs/input/party.js @@ -0,0 +1,3 @@ +party(1); +var r = party(5, 10); +console.log(r); diff --git a/examples/08_textinput_mujs/make_dict.py b/examples/08_textinput_mujs/make_dict.py new file mode 100644 index 0000000..ba6c016 --- /dev/null +++ b/examples/08_textinput_mujs/make_dict.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +import string +import sys + +entries = set() + +for line in open(sys.argv[1]).readlines(): + line = line.strip() + entries.add(line) + + +for lin in entries: + fname = hex(hash(line))[2:] + "_" + if all(c in string.ascii_letters for c in line): + fname += line + with open("./dict/" + fname, "w") as f: + f.write(line) diff --git a/examples/08_textinput_mujs/src/fuzzer.rs b/examples/08_textinput_mujs/src/fuzzer.rs new file mode 100644 index 0000000..136f68d --- /dev/null +++ b/examples/08_textinput_mujs/src/fuzzer.rs @@ -0,0 +1,57 @@ +//! Fuzzer template + +#![allow(clippy::missing_docs_in_private_items)] +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::cast_lossless)] + +use crate::constants; +use snapchange::{input_types::TextInput, prelude::*}; + +const CR3: Cr3 = Cr3(constants::CR3); + +#[derive(Default, Clone, Debug)] +pub struct JSTextFuzzer {} + +impl Fuzzer for JSTextFuzzer { + type Input = TextInput; + const START_ADDRESS: u64 = constants::RIP; + const MAX_INPUT_LENGTH: usize = 0x4000; + const MAX_MUTATIONS: u64 = 16; + + fn set_input( + &mut self, + input: &InputWithMetadata, + fuzzvm: &mut FuzzVm, + ) -> Result<()> { + // truncate to the maximum input length. + let ilen = std::cmp::min(input.data().len(), Self::MAX_INPUT_LENGTH - 1); + let data = &input.data()[..ilen]; + fuzzvm.write_bytes_dirty(VirtAddr(constants::INPUT), CR3, data)?; + // and we make sure that we have a zero terminator. + fuzzvm.write_dirty(VirtAddr(constants::INPUT + ilen as u64), CR3, 0u8)?; + Ok(()) + } + + fn init_snapshot(&mut self, fuzzvm: &mut FuzzVm) -> Result<()> { + // disable printing code by placing immediate returns at the relevant functions. + // this is faster than using breakpoints, since it doesn't require a hypercall. + for sym in &[ + "ld-musl-x86_64.so.1!puts", + "ld-musl-x86_64.so.1!fputs", + "ld-musl-x86_64.so.1!fprintf", + "ld-musl-x86_64.so.1!printf", + "ld-musl-x86_64.so.1!putchar", + ] { + if fuzzvm + .patch_bytes_permanent(AddressLookup::SymbolOffset(sym, 0), &[0xc3]) + .is_ok() + { + log::warn!("inserting immediate ret at sym {}", sym); + } else { + log::warn!("fail to set ret at sym {}", sym); + } + } + + Ok(()) + } +} diff --git a/examples/08_textinput_mujs/src/main.rs b/examples/08_textinput_mujs/src/main.rs new file mode 100644 index 0000000..36ee4dd --- /dev/null +++ b/examples/08_textinput_mujs/src/main.rs @@ -0,0 +1,8 @@ +use snapchange::prelude::*; + +mod fuzzer; +mod constants; + +fn main() -> anyhow::Result<()> { + snapchange_main::() +} diff --git a/examples/08_textinput_mujs/test.sh b/examples/08_textinput_mujs/test.sh new file mode 100755 index 0000000..62d1cb3 --- /dev/null +++ b/examples/08_textinput_mujs/test.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +source ../test.include.sh + +setup_build + +start_fuzzing + +# Check if the fuzzer found a crash +SEGV_FILE="$(find ./snapshot/crashes/*SIGSEGV* -type f | head -n 1)" +RAISE_FILE="$(find ./snapshot/crashes/*SIGABRT* -type f | head -n 1)" +if [[ -z "$SEGV_FILE" ]]; then + err "failed to identify the crash that leads to SIGSEGV" +fi +if [[ -z "$RAISE_FILE" ]]; then + err "failed to identify the crash that leads to SIGABRT" +fi + +CORPUS_FILE="$(find ./snapshot/current_corpus/ -type f | tail -n 1)" + +if [[ -z "$CORPUS_FILE" ]]; then + err "failed to identify a corpus file" +fi + +log_success "fuzz" + diff --git a/src/commands/fuzz.rs b/src/commands/fuzz.rs index ccf9c19..e81a440 100644 --- a/src/commands/fuzz.rs +++ b/src/commands/fuzz.rs @@ -281,6 +281,14 @@ pub(crate) fn run( curr_clean_snapshot.write_bytes(*addr, cr3, &[0xcc])?; covbp_bytes.insert(*addr, orig_byte); count += 1; + + if orig_byte == 0xcc { + log::error!( + "coverage breakpoint on top of breakpoint at {:#x} cr3 {:#x}", + addr.0, + cr3.0 + ); + } } } } diff --git a/src/fuzzvm.rs b/src/fuzzvm.rs index b15a7a3..3627e6c 100644 --- a/src/fuzzvm.rs +++ b/src/fuzzvm.rs @@ -3921,7 +3921,9 @@ impl<'a, FUZZER: Fuzzer> FuzzVm<'a, FUZZER> { // we have hit a new coverage address. Return a CoverageBreakpoint exit. if let Some(orig_byte) = cov_bps.get(&virt_addr) { - assert!(*orig_byte != 0xcc); + if *orig_byte == 0xcc { + assert!(*orig_byte != 0xcc, "breakpoint-on-breakpoint @ {:#x}", virt_addr.0); + } // This breakpoint is a coverage breakpoint. Restore the VM // memory and the global clean memory of this breakpoint so no // other VM has to cover this breakpoint either diff --git a/src/input_types/text.rs b/src/input_types/text.rs index 653439d..14ee484 100644 --- a/src/input_types/text.rs +++ b/src/input_types/text.rs @@ -8,7 +8,6 @@ use crate::mutators; use crate::rng::Rng; use anyhow::Result; -#[cfg(feature = "redqueen")] use rand::seq::SliceRandom; use rand::{Rng as _, RngCore}; #[cfg(feature = "redqueen")] @@ -100,13 +99,14 @@ impl FuzzInput for TextInput { } // Get the number of changes to make to the input - let num_change = (rng.next_u64() % max_mutations).max(1) as usize; + let num_change: usize = rng.gen_range(1..=max_mutations).try_into().unwrap(); // Mutations applied to this input let mut mutations: Vec = Vec::with_capacity(num_change); // Perform some number of mutations on the input - for _ in 0..num_change { + let mut changed = 0; + while changed < num_change { // Special case the redqueen mutation if there are available rules #[cfg(feature = "redqueen")] if let Some(rules) = redqueen_rules { @@ -144,19 +144,21 @@ impl FuzzInput for TextInput { // Choose which mutators to use for this mutation. Expensive mutators are // harder to hit since they are a bit more costly - let curr_mutators = if rng.next_u64() % max_mutations * 5 == 0 { + let curr_mutators = if !Self::expensive_mutators().is_empty() + && (rng.next_u64() % max_mutations * 5 == 0) + { Self::expensive_mutators() } else { Self::mutators() }; // Select one of the mutators - let mutator_index = rng.gen::() % curr_mutators.len(); - let mutator_func = curr_mutators[mutator_index]; + let mutator_func = curr_mutators.choose(rng).unwrap(); // Execute the mutator if let Some(mutation) = mutator_func(input, corpus, rng, dictionary) { mutations.push(mutation); + changed += 1; } } @@ -179,10 +181,7 @@ impl FuzzInput for TextInput { // insert random strings, with the const param, being an upper bound to the number of // inserted bytes. This ensures that we will do small modification much more often. mutators::text::insert_repeated_chars::<4>, - mutators::text::insert_repeated_chars::<1024>, mutators::text::insert_random_string::<4>, - mutators::text::insert_random_string::<8>, - mutators::text::insert_random_string::<1024>, // dictionary-based mutations mutators::text::splice_from_dictionary, mutators::text::insert_from_dictionary, @@ -191,10 +190,6 @@ impl FuzzInput for TextInput { mutators::text::insert_from_dictionary_separated_by::<'\t'>, mutators::text::insert_from_dictionary_separated_by::<' '>, mutators::text::insert_from_dictionary_separated_by::<';'>, - mutators::text::insert_from_corpus_separated_by::<'\n'>, - mutators::text::insert_from_corpus_separated_by::<' '>, - mutators::text::insert_from_corpus_separated_by::<'\t'>, - mutators::text::insert_from_corpus_separated_by::<';'>, // text-focused mutation operations: // line-focused mutators::text::dup_between_separator::<'\n'>, @@ -214,7 +209,16 @@ impl FuzzInput for TextInput { /// Current expensive mutators available for mutation (typically those which allocate) fn expensive_mutators() -> &'static [Self::MutatorFunc] { - &[] + &[ + // potentially longer random strings + mutators::text::insert_repeated_chars::<1024>, + mutators::text::insert_random_string::<1024>, + // insert whole testcases from the corpus + mutators::text::insert_from_corpus_separated_by::<'\n'>, + mutators::text::insert_from_corpus_separated_by::<' '>, + mutators::text::insert_from_corpus_separated_by::<'\t'>, + mutators::text::insert_from_corpus_separated_by::<';'>, + ] } fn generate( diff --git a/src/mutators/text/helpers.rs b/src/mutators/text/helpers.rs index 664d981..0fc11f7 100644 --- a/src/mutators/text/helpers.rs +++ b/src/mutators/text/helpers.rs @@ -12,7 +12,8 @@ lazy_static! { INTERESTING_U64 .iter() .copied() - .map(|i| format!("{}", i)) + .map(|i| [format!("{i}"), format!("-{i}")]) + .flatten() .collect() }; pub static ref INTERESTING_HEX_INTEGERS: Vec = { diff --git a/src/mutators/text/mod.rs b/src/mutators/text/mod.rs index e4ea0f7..aa8d427 100644 --- a/src/mutators/text/mod.rs +++ b/src/mutators/text/mod.rs @@ -33,11 +33,15 @@ pub fn insert_random_string( rng: &mut Rng, _dictionary: &Option>>, ) -> Option { - assert!(!input.is_empty()); let s = helpers::random_ascii_string(rng, N); - let idx = rng.gen_range(0..=input.len()); - utils::vec::fast_insert_at(input.data_mut(), idx, s.as_bytes()); - Some(format!("InsertRandom_offset_{}_len_{}", idx, s.len())) + if input.is_empty() { + input.data_mut().extend_from_slice(s.as_bytes()); + Some(format!("InsertRandom_offset_0_len_{}", s.len())) + } else { + let idx = rng.gen_range(0..=input.len()); + utils::vec::fast_insert_at(input.data_mut(), idx, s.as_bytes()); + Some(format!("InsertRandom_offset_{}_len_{}", idx, s.len())) + } } /// Insert repeated a randomly chosen repeated char (up to N times). @@ -107,9 +111,8 @@ pub fn insert_from_dictionary( if !dictionary.is_empty() { let dict_idx = rng.gen_range(0..dictionary.len()); let entry = &dictionary[dict_idx]; - let index = rng.gen_range(0..input.len()); + let index = rng.gen_range(0..=input.len()); utils::vec::fast_insert_at(input.data_mut(), index, &entry[..]); - // TODO: add mutation log return Some(format!("InsertFromDict_{dict_idx}_offset_{index}")); } } From 02139f883e8023a9baea39906cc3b9653834ca42 Mon Sep 17 00:00:00 2001 From: Michael Rodler Date: Wed, 10 Apr 2024 13:11:26 +0200 Subject: [PATCH 3/6] tweaked the text based mutators; introduced a proper splicing mutation that can e.g., splice multiple lines of text from another corpus entry --- examples/08_textinput_mujs/harness/main.c | 2 +- src/input_types/text.rs | 30 ++++--- src/lib.rs | 2 +- src/mutators/text/helpers.rs | 97 ++++++++++++++++++++++- src/mutators/text/mod.rs | 26 +++++- 5 files changed, 141 insertions(+), 16 deletions(-) diff --git a/examples/08_textinput_mujs/harness/main.c b/examples/08_textinput_mujs/harness/main.c index 2c09c94..c65a661 100644 --- a/examples/08_textinput_mujs/harness/main.c +++ b/examples/08_textinput_mujs/harness/main.c @@ -46,7 +46,7 @@ static void party(js_State *J) { if (js_isnumber(J, i)) { int val = (int)js_tonumber(J, i); party_counter += val; - if (party_counter > 1000) { + if (party_counter > 43) { party_counter = 0; } if (party_counter > 42) { diff --git a/src/input_types/text.rs b/src/input_types/text.rs index 14ee484..223bec4 100644 --- a/src/input_types/text.rs +++ b/src/input_types/text.rs @@ -142,8 +142,7 @@ impl FuzzInput for TextInput { } } - // Choose which mutators to use for this mutation. Expensive mutators are - // harder to hit since they are a bit more costly + // Choose which mutators to use for this mutation. let curr_mutators = if !Self::expensive_mutators().is_empty() && (rng.next_u64() % max_mutations * 5 == 0) { @@ -185,29 +184,36 @@ impl FuzzInput for TextInput { // dictionary-based mutations mutators::text::splice_from_dictionary, mutators::text::insert_from_dictionary, - // advanced text-focused insertion operators - mutators::text::insert_from_dictionary_separated_by::<'\n'>, - mutators::text::insert_from_dictionary_separated_by::<'\t'>, - mutators::text::insert_from_dictionary_separated_by::<' '>, - mutators::text::insert_from_dictionary_separated_by::<';'>, + // splice sublices from corpus // text-focused mutation operations: // line-focused mutators::text::dup_between_separator::<'\n'>, mutators::text::delete_between_separator::<'\n'>, - // word-focused + mutators::text::insert_from_dictionary_after::<'\n'>, + mutators::text::splice_from_corpus_separated_by::<'\n'>, + // word-focused with ' ' and '\t' mutators::text::dup_between_separator::<' '>, - mutators::text::delete_between_separator::<' '>, mutators::text::dup_between_separator::<'\t'>, + mutators::text::delete_between_separator::<' '>, mutators::text::delete_between_separator::<'\t'>, + mutators::text::splice_from_corpus_separated_by::<' '>, + mutators::text::splice_from_corpus_separated_by::<'\t'>, + mutators::text::insert_from_dictionary_after::<' '>, + mutators::text::insert_from_dictionary_after::<'\t'>, // interesting for programming languages: mutators::text::dup_between_separator::<';'>, mutators::text::delete_between_separator::<';'>, - mutators::text::dup_between_separator::<','>, - mutators::text::delete_between_separator::<','>, + mutators::text::splice_from_corpus_separated_by::<';'>, + mutators::text::insert_from_dictionary_after::<';'>, + mutators::text::dup_between_separator::<';'>, + mutators::text::delete_between_separator::<';'>, + mutators::text::splice_from_corpus_separated_by::<','>, + mutators::text::insert_from_dictionary_after::<','>, ] } - /// Current expensive mutators available for mutation (typically those which allocate) + /// mutators that are called less often. For `TextInput` these are not necessarily more + /// expensive, but not as likely to trigger progress. fn expensive_mutators() -> &'static [Self::MutatorFunc] { &[ // potentially longer random strings diff --git a/src/lib.rs b/src/lib.rs index 322b379..32bd8f4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -97,7 +97,7 @@ #![feature(trait_alias)] #![feature(thread_id_value)] #![feature(map_try_insert)] -#![feature(stdsimd)] +// #![feature(stdsimd)] #![feature(avx512_target_feature)] #![feature(core_intrinsics)] #![feature(associated_type_defaults)] diff --git a/src/mutators/text/helpers.rs b/src/mutators/text/helpers.rs index 0fc11f7..4ce9416 100644 --- a/src/mutators/text/helpers.rs +++ b/src/mutators/text/helpers.rs @@ -231,6 +231,7 @@ where let sep: u8 = sep.try_into().unwrap(); input.reserve(other.len() + 2); + // if the target input is empty, just insert everything and add a separator before and after. if input.is_empty() { input.push(sep); input.extend_from_slice(other); @@ -261,6 +262,88 @@ where } } +/// Splice data between a separator. +/// +/// ```rust,ignore +/// let mut v = b"asdf; asdf".to_vec(); +/// splice_separated(&mut v, ';', "XXXX; AAAA", rng); +/// assert_eq!(v, b"XXXX; asdf"); +/// ``` +pub fn splice_separated( + input: &mut Vec, + sep: T, + other: &[u8], + rng: &mut impl rand::Rng, +) -> Option<(usize, usize)> +where + T: TryInto, + >::Error: std::fmt::Debug, +{ + let sep: u8 = sep.try_into().unwrap(); + + // find separator in the other slice + let other_seps: Vec = other + .iter() + .copied() + .enumerate() + .filter_map(|(i, c)| if c == sep { Some(i) } else { None }) + .collect(); + let input_seps: Vec = input + .iter() + .copied() + .enumerate() + .filter_map(|(i, c)| if c == sep { Some(i) } else { None }) + .collect(); + + // now attempt choose a random sublice that is enclosed with the separator + let (other_data, other_start) = if other_seps.is_empty() { + // if there is no separator splice the whole other + (other, 0) + } else { + // choose a random subslice separated by a separator + let sep_idx = rng.gen_range(0..other_seps.len()); + let (from, to) = if sep_idx > 0 { + if rng.gen_bool(0.7) { + // with higher prob choose a "single line" + (other_seps[sep_idx - 1] + 1, other_seps[sep_idx]) + } else { + // with lower prob choose a subslice spanning "multiple lines" + let start = other_seps[..sep_idx].choose(rng).unwrap().clone(); + (start, other_seps[sep_idx]) + } + } else { + (0, other_seps[sep_idx]) + }; + (&other[from..to], from) + }; + + if input_seps.is_empty() { + input.clear(); + input.extend_from_slice(other_data); + Some((0, other_start)) + } else { + // choose a random subslice separated by a separator + let sep_idx = rng.gen_range(0..input_seps.len()); + let (from, to) = if sep_idx > 0 { + if rng.gen_bool(0.8) { + // with higher prob choose a "single line" + (input_seps[sep_idx - 1] + 1, input_seps[sep_idx]) + } else { + // with lower prob choose a subslice spanning "multiple lines" + let start = input_seps[..sep_idx].choose(rng).unwrap().clone(); + (start, input_seps[sep_idx]) + } + } else { + (0, input_seps[sep_idx]) + }; + + // splice + utils::vec::splice_into(input, from..to, other_data); + + Some((from, other_start)) + } +} + /// Insert data after a separator. /// /// ```rust,ignore @@ -363,6 +446,12 @@ where /// Delete data between two separators /// Returns range of deleted data. +/// +/// ```rust,ignore +/// let mut v = b"asdf\nbsdf\n".to_vec(); +/// delete_between_separator(&mut v, b'\n', rng); +/// assert!(v == b"asdf\n" || v == b"bsdf\n"); +/// ``` #[inline] pub fn delete_between_separator( input: &mut Vec, @@ -400,7 +489,13 @@ where Some((start_offset, end_offset)) } -/// Duplicate data between two separators +/// Duplicate data between two separators. +/// +/// ```rust,ignore +/// let mut v = b"asdf\nbsdf\n".to_vec(); +/// dup_between_separator(&mut v, b'\n', rng); +/// assert!(v == b"asdf\nasdf\nbsdf\n" || v == b"asdf\nbsdf\nbsdf\n"); +/// ``` #[inline] pub fn dup_between_separator( input: &mut Vec, diff --git a/src/mutators/text/mod.rs b/src/mutators/text/mod.rs index aa8d427..972f551 100644 --- a/src/mutators/text/mod.rs +++ b/src/mutators/text/mod.rs @@ -79,8 +79,32 @@ pub fn insert_from_corpus_separated_by( Some(format!("InsertFromCorpus_at_{index}")) } +/// Splice from another corpus entry into the current input at a given separator. +pub fn splice_from_corpus_separated_by( + input: &mut TextInput, + corpus: &[Arc>], + rng: &mut Rng, + _dictionary: &Option>>, +) -> Option { + if corpus.len() < 2 { + return None; + } + // select another corpus item != the current one - we require that there are at least two corpus + // entries so we know this doesn't loop endlessly. + let other = loop { + let other = corpus.choose(rng).unwrap(); + if other.data().as_ptr() != input.data().as_ptr() { + break other; + } + }; + + let (input_index, _other_index) = helpers::splice_separated(input.data_mut(), C, &other.data(), rng)?; + + Some(format!("SpliceFromCorpus_at_{input_index}")) +} + /// Insert from dictionary at a given separator. -pub fn insert_from_dictionary_separated_by( +pub fn insert_from_dictionary_after( input: &mut TextInput, _corpus: &[Arc>], rng: &mut Rng, From ac8096d2c118b7fd6667dff2172a23fc5989896b Mon Sep 17 00:00:00 2001 From: Michael Rodler Date: Thu, 11 Apr 2024 11:18:30 +0000 Subject: [PATCH 4/6] Updated interesting integer const arrays to avoid duplicates and added more interesting values. * Previously those arrays contains `0, 1, 2, 3, 4` repeatedly, because they include `u8::MIN` and `u16::MIN`, which both is just `0`. * Now they also include the `::MAX` values for the signed integer types, which together with the unsigned `::MAX` covers the whole range of signed types too. --- src/mutators/bytes/helpers.rs | 210 ++++++++++++++++++---------------- 1 file changed, 111 insertions(+), 99 deletions(-) diff --git a/src/mutators/bytes/helpers.rs b/src/mutators/bytes/helpers.rs index 2b521f8..d695748 100644 --- a/src/mutators/bytes/helpers.rs +++ b/src/mutators/bytes/helpers.rs @@ -213,119 +213,131 @@ pub fn set_random_word>( } /// Interesting `u8` values to insert into a test input -pub const INTERESTING_U8: [u8; 10] = [ - u8::MAX, - u8::MAX - 1, - u8::MAX - 2, - u8::MAX - 3, +pub const INTERESTING_U8: [u8; 13] = [ + 0, + 1, + 16, + 32, + 64, + 100, + i8::MAX as u8 - 1, + i8::MAX as u8, + i8::MAX as u8 + 1, + u8::MAX - 8, u8::MAX - 4, - u8::MIN, - u8::MIN + 1, - u8::MIN + 2, - u8::MIN + 3, - u8::MIN + 4, + u8::MAX - 1, + u8::MAX, ]; /// Interesting `u16` values to insert into a test input -pub const INTERESTING_U16: [u16; 20] = [ - (u8::MAX) as u16, - (u8::MAX - 1) as u16, - (u8::MAX - 2) as u16, - (u8::MAX - 3) as u16, - (u8::MAX - 4) as u16, - (u8::MIN) as u16, - (u8::MIN + 1) as u16, - (u8::MIN + 2) as u16, - (u8::MIN + 3) as u16, - (u8::MIN + 4) as u16, - u16::MAX, - u16::MAX - 1, - u16::MAX - 2, - u16::MAX - 3, +pub const INTERESTING_U16: [u16; 23] = [ + // u8 + 0, + 1, + 16, + 32, + 64, + 100, + i8::MAX as u16 - 1, + i8::MAX as u16, + i8::MAX as u16 + 1, + u8::MAX as u16 - 8, + u8::MAX as u16 - 4, + u8::MAX as u16 - 1, + u8::MAX as u16, + // u16 + 1000, + 1024, + 4096, + i16::MAX as u16 - 1, + i16::MAX as u16, + i16::MAX as u16 + 1, + u16::MAX - 8, u16::MAX - 4, - u16::MIN, - u16::MIN + 1, - u16::MIN + 2, - u16::MIN + 3, - u16::MIN + 4, + u16::MAX - 1, + u16::MAX, ]; /// Interesting `u32` values to insert into a test input pub const INTERESTING_U32: [u32; 30] = [ - (u8::MAX) as u32, - (u8::MAX - 1) as u32, - (u8::MAX - 2) as u32, - (u8::MAX - 3) as u32, - (u8::MAX - 4) as u32, - (u8::MIN) as u32, - (u8::MIN + 1) as u32, - (u8::MIN + 2) as u32, - (u8::MIN + 3) as u32, - (u8::MIN + 4) as u32, - (u16::MAX) as u32, - (u16::MAX - 1) as u32, - (u16::MAX - 2) as u32, - (u16::MAX - 3) as u32, - (u16::MAX - 4) as u32, - (u16::MIN) as u32, - (u16::MIN + 1) as u32, - (u16::MIN + 2) as u32, - (u16::MIN + 3) as u32, - (u16::MIN + 4) as u32, - u32::MAX, - u32::MAX - 1, - u32::MAX - 2, - u32::MAX - 3, + // u8 + 0, + 1, + 16, + 32, + 64, + 100, + i8::MAX as u32 - 1, + i8::MAX as u32, + i8::MAX as u32 + 1, + u8::MAX as u32 - 8, + u8::MAX as u32 - 4, + u8::MAX as u32 - 1, + u8::MAX as u32, + // u16 + 1000, + 1024, + 4096, + i16::MAX as u32 - 1, + i16::MAX as u32, + i16::MAX as u32 + 1, + u16::MAX as u32 - 8, + u16::MAX as u32 - 4, + u16::MAX as u32 - 1, + u16::MAX as u32, + // u32 + i32::MAX as u32 - 1, + i32::MAX as u32, + i32::MAX as u32 + 1, + u32::MAX - 8, u32::MAX - 4, - u32::MIN, - u32::MIN + 1, - u32::MIN + 2, - u32::MIN + 3, - u32::MIN + 4, + u32::MAX - 1, + u32::MAX, ]; /// Interesting `u64` values to insert into a test input -pub const INTERESTING_U64: [u64; 40] = [ - (u8::MAX) as u64, - (u8::MAX - 1) as u64, - (u8::MAX - 2) as u64, - (u8::MAX - 3) as u64, - (u8::MAX - 4) as u64, - (u8::MIN) as u64, - (u8::MIN + 1) as u64, - (u8::MIN + 2) as u64, - (u8::MIN + 3) as u64, - (u8::MIN + 4) as u64, - (u16::MAX) as u64, - (u16::MAX - 1) as u64, - (u16::MAX - 2) as u64, - (u16::MAX - 3) as u64, - (u16::MAX - 4) as u64, - (u16::MIN) as u64, - (u16::MIN + 1) as u64, - (u16::MIN + 2) as u64, - (u16::MIN + 3) as u64, - (u16::MIN + 4) as u64, - (u32::MAX) as u64, - (u32::MAX - 1) as u64, - (u32::MAX - 2) as u64, - (u32::MAX - 3) as u64, - (u32::MAX - 4) as u64, - (u32::MIN) as u64, - (u32::MIN + 1) as u64, - (u32::MIN + 2) as u64, - (u32::MIN + 3) as u64, - (u32::MIN + 4) as u64, - u64::MAX, - u64::MAX - 1, - u64::MAX - 2, - u64::MAX - 3, +pub const INTERESTING_U64: [u64; 37] = [ + // u8 + 0, + 1, + 16, + 32, + 64, + 100, + i8::MAX as u64 - 1, + i8::MAX as u64, + i8::MAX as u64 + 1, + u8::MAX as u64 - 8, + u8::MAX as u64 - 4, + u8::MAX as u64 - 1, + u8::MAX as u64, + // u16 + 1000, + 1024, + 4096, + i16::MAX as u64 - 1, + i16::MAX as u64, + i16::MAX as u64 + 1, + u16::MAX as u64 - 8, + u16::MAX as u64 - 4, + u16::MAX as u64 - 1, + u16::MAX as u64, + // u32 + i32::MAX as u64 - 1, + i32::MAX as u64, + i32::MAX as u64 + 1, + u32::MAX as u64 - 8, + u32::MAX as u64 - 4, + u32::MAX as u64 - 1, + u32::MAX as u64, + // u64 + i64::MAX as u64 - 1, + i64::MAX as u64, + i64::MAX as u64 + 1, + u64::MAX - 8, u64::MAX - 4, - u64::MIN, - u64::MIN + 1, - u64::MIN + 2, - u64::MIN + 3, - u64::MIN + 4, + u64::MAX - 1, + u64::MAX, ]; /// Replace bytes in the input with interesting values that trigger common bugs such as From df2ff5453ed76b2d47a1852e566fa0a8b3da58c9 Mon Sep 17 00:00:00 2001 From: Michael Rodler Date: Thu, 11 Apr 2024 11:21:32 +0000 Subject: [PATCH 5/6] Updated doctests of the mutation helper functions for text-based mutation. + some fixes and a `MockRng` type useful for tests. --- src/mutators/text/helpers.rs | 150 ++++++++++++++++++++++++++--------- src/utils.rs | 62 ++++++++++++++- 2 files changed, 171 insertions(+), 41 deletions(-) diff --git a/src/mutators/text/helpers.rs b/src/mutators/text/helpers.rs index 4ce9416..0301fda 100644 --- a/src/mutators/text/helpers.rs +++ b/src/mutators/text/helpers.rs @@ -24,8 +24,7 @@ lazy_static! { .collect() }; pub static ref INTEGER_REGEX: Regex = Regex::new(r"[^\d][\d]+[^\d]").unwrap(); - pub static ref HEX_INTEGER_REGEX: Regex = - Regex::new(r"[^\da-fA-F][\da-fA-F][^\da-fA-F]").unwrap(); + pub static ref HEX_INTEGER_REGEX: Regex = Regex::new(r"[^\d]0x[\da-fA-F]+[^\da-fA-F]").unwrap(); } #[derive(Hash, PartialEq, Debug, Copy, Clone)] @@ -84,6 +83,15 @@ pub fn random_ascii_string(rng: &mut impl rand::Rng, max_length: usize) -> Strin } /// Identify an integer in the text input and replace it with the given replacement bytes. +/// +/// ```rust +/// # let mut rngi = snapchange::utils::MockRng::default(); +/// # let mut rng = &mut rngi; +/// # use snapchange::mutators::text::helpers::replace_integer_with; +/// let mut buf = b"strncpy(d, s, 10);".to_vec(); +/// replace_integer_with(&mut buf, b"42", rng).unwrap(); +/// assert_eq!(buf, b"strncpy(d, s, 42);"); +/// ``` pub fn replace_integer_with( data: &mut Vec, repl: &[u8], @@ -92,7 +100,7 @@ pub fn replace_integer_with( if let Some(irange) = INTEGER_REGEX .find_iter(&data) .choose(rng) - .map(|m| (m.start() + 1)..m.end()) + .map(|m| (m.start() + 1)..(m.end() - 1)) { utils::vec::splice_into(data, irange.clone(), repl); Some(irange) @@ -102,6 +110,15 @@ pub fn replace_integer_with( } /// Identify a hex integer in the text input and replace it with the given replacement bytes. +/// +/// ```rust +/// # let mut rngi = snapchange::utils::MockRng::new(&[1, 1, 1]); +/// # let mut rng = &mut rngi; +/// # use snapchange::mutators::text::helpers::replace_hex_integer_with; +/// let mut buf = b"strncpy(d, s, 0x10);".to_vec(); +/// replace_hex_integer_with(&mut buf, b"111111", rng).unwrap(); +/// assert_eq!(String::from_utf8_lossy(&buf), "strncpy(d, s, 0x111111);"); +/// ``` pub fn replace_hex_integer_with( data: &mut Vec, repl: &[u8], @@ -110,7 +127,7 @@ pub fn replace_hex_integer_with( if let Some(irange) = HEX_INTEGER_REGEX .find_iter(&data) .choose(rng) - .map(|m| m.range()) + .map(|m| (m.start() + 3)..(m.end() - 1)) { utils::vec::splice_into(data, irange.clone(), repl); Some(irange) @@ -120,6 +137,15 @@ pub fn replace_hex_integer_with( } /// Identify an integer and replace it with a random u64. +/// +/// ```rust +/// # let mut rngi = snapchange::utils::MockRng::new(&[1337]); +/// # let mut rng = &mut rngi; +/// # use snapchange::mutators::text::helpers::replace_integer_with_rand; +/// let mut buf = b"strncpy(d, s, 10);".to_vec(); +/// replace_integer_with_rand(&mut buf, rng); +/// assert_eq!(String::from_utf8_lossy(&buf), "strncpy(d, s, 1337);"); +/// ``` pub fn replace_integer_with_rand( data: &mut Vec, rng: &mut impl rand::Rng, @@ -195,16 +221,22 @@ pub fn insert_random_string( } /// Insert up to N random ascii chars at a random offset. +/// +/// ```rust +/// # let mut rngi = snapchange::utils::MockRng::new(&[0, 0, 10, 10, (1 << (32 - 6))]); +/// # let mut rng = &mut rngi; +/// # use snapchange::mutators::text::helpers::*; +/// let mut v = b"1234".to_vec(); +/// insert_repeated_chars::<4>(&mut v, rng).unwrap(); +/// assert_eq!(String::from_utf8_lossy(&v), "B1234"); +/// ``` pub fn insert_repeated_chars( input: &mut Vec, rng: &mut impl rand::Rng, ) -> Option<(usize, u8, usize)> { - if input.is_empty() { - return None; - } - - let count = rng.gen_range(0..N); + debug_assert!(N > 1); let c: u8 = Alphanumeric.sample(rng); + let count = rng.gen_range(1..=N); let data = [c; N]; let idx = rng.gen_range(0..=input.len()); utils::vec::fast_insert_at(input, idx, &data[..count]); @@ -213,9 +245,12 @@ pub fn insert_repeated_chars( /// Insert data after a separator and add another separator. /// -/// ```rust,ignore -/// let mut v = b"asdf; asdf"; -/// insert_separated(v, ';', "XXXX", rng); +/// ```rust +/// # let mut rngi = snapchange::utils::MockRng::new(&[]); +/// # let mut rng = &mut rngi; +/// # use snapchange::mutators::text::helpers::*; +/// let mut v = b"asdf; asdf".to_vec(); +/// insert_separated(&mut v, ';', b"XXXX", rng).unwrap(); /// assert_eq!(v, b"asdf;XXXX; asdf"); /// ``` pub fn insert_separated( @@ -264,9 +299,12 @@ where /// Splice data between a separator. /// -/// ```rust,ignore +/// ```rust +/// # let mut rngi = snapchange::utils::MockRng::new(&[]); +/// # let mut rng = &mut rngi; +/// # use snapchange::mutators::text::helpers::*; /// let mut v = b"asdf; asdf".to_vec(); -/// splice_separated(&mut v, ';', "XXXX; AAAA", rng); +/// splice_separated(&mut v, ';', b"XXXX; AAAA", rng); /// assert_eq!(v, b"XXXX; asdf"); /// ``` pub fn splice_separated( @@ -346,14 +384,19 @@ where /// Insert data after a separator. /// -/// ```rust,ignore -/// let mut v = b"asdf; asdf"; -/// insert_separated(v, ';', "XXXX", rng); -/// assert_eq!(v, b"asdf;XXXX asdf"); -/// -/// let mut v = b"var asdf = \"asdf\";"; -/// insert_at_separator(v, '"', "XXXX", rng); -/// assert!(&v == b"var asdf = \"XXXXasdf\";" || &v == b"var asdf = \"asdf\"XXXX;"); +/// ```rust +/// # use snapchange::mutators::text::helpers::*; +/// # let mut rngi = snapchange::utils::MockRng::new(&[0]); +/// # let mut rng = &mut rngi; +/// # // let mut v = b"var asdf = \"asdf\";".to_vec(); +/// # // insert_after_separator(&mut v, '"', b"XXXX", rng); +/// # // assert_eq!(String::from_utf8_lossy(&v), "var asdf = \"XXXXasdf\";", "first \" was chosen by rng"); +/// # // or +/// # let mut rngi = snapchange::utils::MockRng::new(&[1]); +/// # let mut rng = &mut rngi; +/// let mut v = b"var asdf = \"asdf\";".to_vec(); +/// insert_after_separator(&mut v, '"', b"XXXX", rng); +/// assert_eq!(String::from_utf8_lossy(&v), "var asdf = \"asdf\"XXXX;", "second \" was chosen by rng"); /// ``` pub fn insert_after_separator( input: &mut Vec, @@ -396,14 +439,28 @@ where /// Insert data before a separator. /// -/// ```rust,ignore -/// let mut v = b"asdf; asdf"; -/// insert_separated(v, ';', "XXXX", rng); -/// assert_eq!(v, b"asdfXXXX; asdf"); +/// ```rust +/// # use snapchange::mutators::text::helpers::*; +/// # let mut rngi = snapchange::utils::MockRng::new(&[0xffff, 0xffff, 0xffff, 0xff]); +/// # let mut rng = &mut rngi; +/// # // TODO: fix this part of the doctest? +/// # let mut v = b"var asdf = \"asdf\";".to_vec(); +/// # insert_before_separator(&mut v, '"', b"XXXX", rng).unwrap(); +/// # // assert_eq!(String::from_utf8_lossy(&v), "var asdf = XXXX\"asdf\";", "first \" chosen"); +/// // or +/// # let mut rngi = snapchange::utils::MockRng::new(&[0]); +/// # let mut rng = &mut rngi; +/// let mut v = b"var asdf = \"asdf\";".to_vec(); +/// insert_before_separator(&mut v, '"', b"XXXX", rng).unwrap(); +/// assert_eq!(String::from_utf8_lossy(&v), "var asdf = \"asdfXXXX\";", "second \" chosen"); /// -/// let mut v = b"var asdf = \"asdf\";"; -/// insert_at_separator(v, '"', "XXXX", rng); -/// assert!(&v == b"var asdf = XXXX\"asdf\";" || &v == b"var asdf = \"asdfXXXX\";"); +/// +/// # let mut rngi = snapchange::utils::MockRng::new(&[0]); +/// # let mut rng = &mut rngi; +/// // in case the target buf is empty, we insert. +/// let mut v = b"".to_vec(); +/// insert_before_separator(&mut v, '"', b"XXXX", rng).unwrap(); +/// assert_eq!(String::from_utf8_lossy(&v), "XXXX\""); /// ``` pub fn insert_before_separator( input: &mut Vec, @@ -419,8 +476,8 @@ where input.reserve(other.len() + 1); if input.is_empty() { - input.push(sep); input.extend_from_slice(other); + input.push(sep); return Some(0); } @@ -447,10 +504,13 @@ where /// Delete data between two separators /// Returns range of deleted data. /// -/// ```rust,ignore +/// ```rust +/// # use snapchange::mutators::text::helpers::*; +/// # let mut rngi = snapchange::utils::MockRng::new(&[]); +/// # let mut rng = &mut rngi; /// let mut v = b"asdf\nbsdf\n".to_vec(); /// delete_between_separator(&mut v, b'\n', rng); -/// assert!(v == b"asdf\n" || v == b"bsdf\n"); +/// assert_eq!(String::from_utf8_lossy(&v), "asdf\n"); /// ``` #[inline] pub fn delete_between_separator( @@ -489,12 +549,20 @@ where Some((start_offset, end_offset)) } -/// Duplicate data between two separators. +/// Duplicate data between two separators, e.g., can be used to duplicate a line or a word. /// -/// ```rust,ignore +/// ```rust +/// # use snapchange::mutators::text::helpers::*; +/// # let mut rngi = snapchange::utils::MockRng::default(); +/// # let mut rng = &mut rngi; +/// // duplicate a line /// let mut v = b"asdf\nbsdf\n".to_vec(); /// dup_between_separator(&mut v, b'\n', rng); -/// assert!(v == b"asdf\nasdf\nbsdf\n" || v == b"asdf\nbsdf\nbsdf\n"); +/// assert_eq!(v, b"asdf\nbsdf\nbsdf\n"); +/// // duplicate a word +/// let mut v = b"asdf bsdf".to_vec(); +/// dup_between_separator(&mut v, b' ', rng); +/// assert_eq!(v, b"asdf bsdf bsdf"); /// ``` #[inline] pub fn dup_between_separator( @@ -525,13 +593,21 @@ where let end_offset = if let Some(o) = sep_idx.get(start + 1) { *o } else { - input.push(sep); input.len() }; // let src = &input[start_offset..end_offset]; // input.splice(end_offset..end_offset, src.iter().copied()); - crate::utils::vec::insert_from_within(input, end_offset, start_offset..end_offset); + if rng.gen_bool(0.8) { + // duplicate once with high prob + crate::utils::vec::insert_from_within(input, end_offset, start_offset..end_offset); + } else { + // duplicate a couple of times + let times: usize = rng.gen_range(2..16); + for _ in 0..times { + crate::utils::vec::insert_from_within(input, end_offset, start_offset..end_offset); + } + } Some((start_offset, end_offset)) } diff --git a/src/utils.rs b/src/utils.rs index bb2fe85..35fde18 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -385,7 +385,7 @@ pub mod vec { /// * if `index` is out of bounds /// * if `src_range` is out of bounds /// * if `index` is within `src_range` - /// + /// /// # Examples: /// /// ```rust @@ -436,12 +436,13 @@ pub mod vec { std::ops::Bound::Included(t) => *t + 1, } .min(dst.len()); - if src_start == src_end { // no-op + if src_start == src_end { + // no-op return; } assert!(src_start < src_end); assert!(index <= src_start || index >= src_end); - + let copy_len = src_end - src_start; let src_start = if index <= src_start { // adjust start offset @@ -458,7 +459,10 @@ pub mod vec { // SAFETY: we reserved enough space to stay within bounds. unsafe { // move data back to make space. - let dst_ptr = dst.as_mut_ptr().offset(index as isize).offset(copy_len as isize); + let dst_ptr = dst + .as_mut_ptr() + .offset(index as isize) + .offset(copy_len as isize); let src_ptr = dst.as_ptr().offset(index as isize); std::ptr::copy(src_ptr, dst_ptr, old_len - index); // copy the data within. @@ -888,3 +892,53 @@ pub fn write_crash_input( pub fn custom_crash>(crash_name: T) -> Result { Ok(Execution::new_crash(crash_name)) } + +pub use rand; +pub use rand::RngCore; + +/// A RNG implementation useful for testing. +/// A fake rng that spits out deterministic numbers. +#[derive(Default)] +pub struct MockRng { + buf: Vec, +} + +impl MockRng { + /// create new based on given slice of numbers; will be used in reverse + pub fn new(buf: &[u32]) -> Self { + Self { buf: buf.to_vec() } + } + + /// push a value to the underlying vector + pub fn push(&mut self, v: u32) { + self.buf.push(v); + } +} + +impl<'a> RngCore for MockRng { + fn next_u32(&mut self) -> u32 { + // #[cfg(test)] + // println!("sampling {:?}", self.buf.last()); + if let Some(x) = self.buf.pop() { + x + } else { + 0 + } + } + + fn next_u64(&mut self) -> u64 { + let lo: u64 = self.next_u32() as u64; + let hi: u64 = self.next_u32() as u64; + hi << 32 | lo + } + + fn fill_bytes(&mut self, dest: &mut [u8]) { + for i in 0..dest.len() { + dest[i] = self.next_u32() as u8; + } + } + fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand::Error> { + self.fill_bytes(dest); + Ok(()) + } +} From f767d0e2e4266ec6c23983744585811ca38a00e0 Mon Sep 17 00:00:00 2001 From: Michael Rodler Date: Thu, 11 Apr 2024 13:05:25 +0000 Subject: [PATCH 6/6] several updates to the mujs example --- examples/08_textinput_mujs/dict.txt | 54 ++++++++++++++++++++- examples/08_textinput_mujs/harness/Makefile | 10 +++- examples/08_textinput_mujs/harness/main.c | 31 ++++++++---- examples/08_textinput_mujs/input/magic.js | 5 +- examples/08_textinput_mujs/input/party.js | 6 ++- examples/08_textinput_mujs/make_dict.py | 9 ++-- examples/08_textinput_mujs/src/fuzzer.rs | 10 +++- 7 files changed, 104 insertions(+), 21 deletions(-) diff --git a/examples/08_textinput_mujs/dict.txt b/examples/08_textinput_mujs/dict.txt index 2551fbe..0d9f25a 100644 --- a/examples/08_textinput_mujs/dict.txt +++ b/examples/08_textinput_mujs/dict.txt @@ -84,6 +84,30 @@ toString undefined valueOf + +Math.round +Math.pow +Math.sqrt +Math.abs +math.ceil +Math.floor +Math.sin +Math.cos +Math.max +Math.min + +Array.forEach +Array.map +Array.filter +Array.reduce +Array.reduceRight +Array.every +Array.some +Array.indexOf +Array.lastIndexOf +Array.find +Array.findIndex + + - * @@ -112,10 +136,38 @@ valueOf ; () +(0) +(1) +("a") +(0, 0) +(1, 1) +("a", "a") +(0, 0, 0) +(1, 1, 1) +("a", "a", "a") +(0, 0, 0, 0) +(1, 1, 1, 1) +("a", "a", "a", "a") +(0, 0, 0, 0, 0) +(1, 1, 1, 1, 1) +("a", "a", "a", "a", "a") + + [0] [1] +[-1] -Math.pow +[0, 0] +[0, 0, 0] +[0, 0, 0, 0] +[0, 0, 0, 0, 0] + +{"name": "john", "surname": "doe", "age": 30} +{} + +function(){} + +var a = function(){}; 0.0 1e10 diff --git a/examples/08_textinput_mujs/harness/Makefile b/examples/08_textinput_mujs/harness/Makefile index 633fbb9..4f70df5 100644 --- a/examples/08_textinput_mujs/harness/Makefile +++ b/examples/08_textinput_mujs/harness/Makefile @@ -1,8 +1,14 @@ ifeq ($(origin CC),default) CC = clang endif -# CFLAGS ?= -O3 -ggdb -fuse-ld=lld -fsanitize=address -fno-omit-frame-pointer -flto -CFLAGS ?= -O3 -ggdb -fuse-ld=lld -fno-omit-frame-pointer -flto +CFLAGS ?= -O3 -ggdb -fno-omit-frame-pointer +# CFLAGS ?= -O3 -ggdb -fuse-ld=lld -fno-omit-frame-pointer -flto +USE_ASAN ?= 0 + +all: mujs_harness mujs_harness.asan +ifeq ($(USE_ASAN),1) + mv mujs_harness.asan mujs_harness +endif mujs_harness: main.c mujs-1.3.4 Makefile $(CC) $(CFLAGS) -o $@ $< ./mujs-1.3.4/one.c -I./mujs-1.3.4/ -lm diff --git a/examples/08_textinput_mujs/harness/main.c b/examples/08_textinput_mujs/harness/main.c index c65a661..14d1a91 100644 --- a/examples/08_textinput_mujs/harness/main.c +++ b/examples/08_textinput_mujs/harness/main.c @@ -14,7 +14,8 @@ static void jsB_gc(js_State *J) { } static void jsB_print(js_State *J) { - int i, top = js_gettop(J); + int i = 0; + int top = js_gettop(J); for (i = 1; i < top; ++i) { const char *s = js_tostring(J, i); if (i > 1) @@ -28,14 +29,19 @@ static void jsB_print(js_State *J) { int64_t magic_values[] = {0xdeadbeef, 0xcafecafe, 0x42424242, 0x41414141}; static void magic(js_State *J) { - if (js_isnumber(J, 1)) { - double idx = js_tonumber(J, 1); - if (idx < 4) { - js_pushnumber(J, (double)magic_values[(int)idx]); - return; + if (js_gettop(J) > 0) { + if (js_isnumber(J, 1)) { + double idx = js_tonumber(J, 1); + if (idx < 4) { + js_pushnumber(J, (double)magic_values[(unsigned int)idx]); + return; + } } + js_pushnumber(J, 0); + } else { + + js_pushundefined(J); } - js_pushnumber(J, 0); } int party_counter = 0; @@ -46,7 +52,7 @@ static void party(js_State *J) { if (js_isnumber(J, i)) { int val = (int)js_tonumber(J, i); party_counter += val; - if (party_counter > 43) { + if (party_counter > 1000) { party_counter = 0; } if (party_counter > 42) { @@ -120,6 +126,11 @@ static char *read_stdin(void) { return s; } +void stop(int status) { + puts("\n...bye"); + exit(status); +} + int main(int argc, char **argv) { char *input = NULL; js_State *J = NULL; @@ -162,7 +173,7 @@ int main(int argc, char **argv) { // Snapshot taken here __asm("int3"); - sleep(1); + sleep(3); __asm("vmcall"); } else { @@ -180,5 +191,5 @@ int main(int argc, char **argv) { js_gc(J, 0); js_freestate(J); - exit(status); + stop(status); } diff --git a/examples/08_textinput_mujs/input/magic.js b/examples/08_textinput_mujs/input/magic.js index a5d17bf..6b6a443 100644 --- a/examples/08_textinput_mujs/input/magic.js +++ b/examples/08_textinput_mujs/input/magic.js @@ -1,2 +1,5 @@ -var r = magic(0); +var r = magic(100); +console.log(r); + +var r = magic(1); console.log(r); diff --git a/examples/08_textinput_mujs/input/party.js b/examples/08_textinput_mujs/input/party.js index 2fc8be1..56adb3a 100644 --- a/examples/08_textinput_mujs/input/party.js +++ b/examples/08_textinput_mujs/input/party.js @@ -1,3 +1,5 @@ -party(1); -var r = party(5, 10); +party(10); +party(10); +party(10); +var r = party(1, 2); console.log(r); diff --git a/examples/08_textinput_mujs/make_dict.py b/examples/08_textinput_mujs/make_dict.py index ba6c016..506cd81 100644 --- a/examples/08_textinput_mujs/make_dict.py +++ b/examples/08_textinput_mujs/make_dict.py @@ -5,14 +5,15 @@ entries = set() -for line in open(sys.argv[1]).readlines(): +for line in open(sys.argv[1], "r").readlines(): line = line.strip() - entries.add(line) + if line: + entries.add(line) -for lin in entries: +for line in entries: fname = hex(hash(line))[2:] + "_" if all(c in string.ascii_letters for c in line): fname += line - with open("./dict/" + fname, "w") as f: + with open("./dict/" + fname.strip(), "w") as f: f.write(line) diff --git a/examples/08_textinput_mujs/src/fuzzer.rs b/examples/08_textinput_mujs/src/fuzzer.rs index 136f68d..750f0ff 100644 --- a/examples/08_textinput_mujs/src/fuzzer.rs +++ b/examples/08_textinput_mujs/src/fuzzer.rs @@ -16,7 +16,7 @@ impl Fuzzer for JSTextFuzzer { type Input = TextInput; const START_ADDRESS: u64 = constants::RIP; const MAX_INPUT_LENGTH: usize = 0x4000; - const MAX_MUTATIONS: u64 = 16; + const MAX_MUTATIONS: u64 = 4; fn set_input( &mut self, @@ -41,6 +41,7 @@ impl Fuzzer for JSTextFuzzer { "ld-musl-x86_64.so.1!fprintf", "ld-musl-x86_64.so.1!printf", "ld-musl-x86_64.so.1!putchar", + // "mujs_harness!jsB_print", ] { if fuzzvm .patch_bytes_permanent(AddressLookup::SymbolOffset(sym, 0), &[0xc3]) @@ -54,4 +55,11 @@ impl Fuzzer for JSTextFuzzer { Ok(()) } + + fn reset_breakpoints(&self) -> Option<&[AddressLookup]> { + Some(&[ + AddressLookup::SymbolOffset("ld-musl-x86_64.so.1!exit", 0), + // AddressLookup::SymbolOffset("mujs_harness!stop", 0) + ]) + } }