Skip to content

Commit

Permalink
refactor: diff check_stats_cache helper
Browse files Browse the repository at this point in the history
- expand docstring
- do not force stats cache creation, only use stats cache if its already available.
  • Loading branch information
jqnatividad committed Feb 13, 2025
1 parent c2e5db9 commit d855aa0
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions src/cmd/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
/// This function checks if the stats cache is available and if it is, performs "smart"
/// validation checks on the input files.
///
/// If the stats cache is not available, the function returns false.
/// If it is available, the function returns true if the files are identical.
/// First, it check if the current options allow us to leverage the stats cache.
/// If so, it checks if the stats cache is available.
/// If it is, the function returns true if the files are identical per their fingerprint hashes,
/// allowing us to short-circuit the diff.
/// If the files are not identical, it performs additional "smart" validation checks.
fn check_stats_cache(args: &Args) -> Result<bool, CliError> {
if args.flag_force
Expand All @@ -313,8 +315,6 @@ fn check_stats_cache(args: &Args) -> Result<bool, CliError> {
return Ok(false);
}

// ---- STATS CACHE VALIDATION CHECKS ----

// Set stats config for left file
let left_schema_args = SchemaArgs {
arg_input: args.arg_input_left.clone(),
Expand Down Expand Up @@ -343,20 +343,19 @@ fn check_stats_cache(args: &Args) -> Result<bool, CliError> {
Ok((left_csv_fields, left_stats, left_dataset_stats)),
Ok((_, right_stats, right_dataset_stats)),
) = (
get_stats_records(&left_schema_args, StatsMode::FrequencyForceStats),
get_stats_records(&right_schema_args, StatsMode::FrequencyForceStats),
get_stats_records(&left_schema_args, StatsMode::Frequency),
get_stats_records(&right_schema_args, StatsMode::Frequency),
) {
// check if dataset stats are empty
// if so, return false and proceed to "regular" diff processing
if left_dataset_stats.is_empty() || right_dataset_stats.is_empty() {
return Ok(false);
}

// If both files fingerprint hashes match, files are identical short-circuit diff
// If both files' fingerprint hashes match, files are identical. Short-circuit diff
if left_dataset_stats.get("qsv__fingerprint_hash")
== right_dataset_stats.get("qsv__fingerprint_hash")
{
// if fingerprint hashes match, files are identical, short-circuit diff
return Ok(true);
}

Expand Down

0 comments on commit d855aa0

Please sign in to comment.