diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..2eed8df --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,34 @@ +--- +name: Bug Report +about: Report a bug to help us improve aicheck +title: "[BUG] " +labels: bug +assignees: '' +--- + +## Description + +A clear description of the bug. + +## Steps to Reproduce + +1. Run `aic check ...` +2. ... + +## Expected Behavior + +What you expected to happen. + +## Actual Behavior + +What actually happened. Include any error output. + +## Environment + +- **OS**: (e.g., macOS 14, Ubuntu 24.04) +- **aicheck version**: (`aic --version`) +- **Rust version**: (`rustc --version`) + +## Sample File + +If possible, attach or link to a sample file that reproduces the issue. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..55eea5d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,23 @@ +--- +name: Feature Request +about: Suggest an idea for aicheck +title: "[FEATURE] " +labels: enhancement +assignees: '' +--- + +## Problem + +A clear description of the problem or limitation you're experiencing. + +## Proposed Solution + +Describe the feature or improvement you'd like to see. + +## Alternatives Considered + +Any alternative solutions or features you've considered. + +## Additional Context + +Any other context, references, or screenshots about the feature request. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..5554103 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,15 @@ +## Summary + +Brief description of the changes. + +## Changes + +- + +## Checklist + +- [ ] `cargo fmt -- --check` passes +- [ ] `cargo clippy -- -D warnings` passes +- [ ] `cargo test` passes +- [ ] New detection methods include appropriate confidence tiers +- [ ] Documentation updated (if applicable) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 150d21d..ff69bef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,8 +21,9 @@ jobs: lfs: true - uses: dtolnay/rust-toolchain@stable with: - components: clippy + components: clippy, rustfmt - uses: Swatinem/rust-cache@v2 + - run: cargo fmt -- --check - run: cargo clippy -- -D warnings - run: cargo test - run: cargo build --release diff --git a/.gitignore b/.gitignore index 198b807..7c65b48 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,7 @@ *.swp .idea/ .vscode/ +.context/ +.env* +*.pem +*.key diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..96f9bac --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,32 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.1.0] - 2025-06-01 + +### Added + +- CLI commands: `aic check [PATHS]` and `aic info ` +- C2PA manifest detection and validation +- XMP/IPTC metadata analysis for AI provenance signals +- EXIF heuristic analysis +- PNG text chunk inspection +- MP4 container metadata parsing +- ID3 audio tag detection +- WAV container metadata analysis +- Filename pattern matching for known AI tools +- Audio spectral analysis for synthetic content +- Invisible watermark detection (DWT-DCT) +- Confidence tiers: HIGH, MEDIUM, LOW +- Support for image formats: JPEG, PNG, WebP, AVIF, HEIF, TIFF, GIF, BMP +- Support for video formats: MP4, MOV, AVI, WebM +- Support for audio formats: MP3, M4A, WAV +- Support for document formats: PDF +- JSON output mode (`--json`) +- Quiet mode (`--quiet`) +- Deep analysis mode (`--deep`) +- Internationalization support for 7 languages (en, de, es, hi, ja, ko, zh-CN) +- Rustdoc documentation with GitHub Pages deployment diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..d292ab2 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,21 @@ +# Contributor Covenant Code of Conduct + +This project follows the [Contributor Covenant v2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct/). + +## Our Pledge + +We pledge to make participation in our community a welcoming experience for everyone, regardless of background or identity. + +## Our Standards + +Positive behavior includes using welcoming language, respecting differing viewpoints, accepting constructive feedback, and focusing on what is best for the community. + +Unacceptable behavior includes personal or political attacks, publishing others' private information without permission, and other conduct which could reasonably be considered inappropriate in a professional setting. + +## Enforcement + +Instances of unacceptable behavior may be reported to the project maintainers. All complaints will be reviewed and investigated promptly and fairly. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 2.1. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..515729a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,58 @@ +# Contributing to AICheck + +Thanks for your interest in contributing! Here's how to get started. + +## Development Setup + +```bash +# Clone the repo +git clone https://github.com/MatrixA/aicheck.git +cd aicheck + +# Build +cargo build + +# Run tests +cargo test + +# Run the CLI +cargo run -- check photo.jpg +``` + +Requires **Rust 1.86+**. + +## Before Submitting a PR + +Please make sure all checks pass locally: + +```bash +cargo fmt -- --check # formatting +cargo clippy -- -D warnings # lints +cargo test # tests +``` + +## Pull Request Process + +1. Fork the repo and create a branch from `main` +2. Make your changes +3. Ensure all checks above pass +4. Open a PR with a clear description of what changed and why + +## Adding a New AI Tool + +To add detection for a new AI tool, update the pattern list in `src/known_tools.rs`. Each entry needs a case-insensitive pattern and a canonical tool name. + +## Adding a New Detection Method + +New detectors go in `src/detector/`. Implement detection logic, then register it in `src/detector/mod.rs` within `run_all_detectors()`. + +## Reporting Bugs + +Use the [bug report template](https://github.com/MatrixA/aicheck/issues/new?template=bug_report.md) and include: +- The file you tested (or a description if you can't share it) +- Expected vs actual output +- Your OS and Rust version + +## License + +By contributing, you agree that your contributions will be licensed under AGPL-3.0-or-later. diff --git a/Cargo.toml b/Cargo.toml index 316a27c..b0daf2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,12 +2,15 @@ name = "aicheck" version = "0.1.0" edition = "2021" +rust-version = "1.86" description = "Detect AI-generated content via provenance signals (C2PA, XMP/IPTC, EXIF)" license = "AGPL-3.0-or-later" repository = "https://github.com/MatrixA/aicheck" homepage = "https://github.com/MatrixA/aicheck" +documentation = "https://matrixa.github.io/aicheck/" keywords = ["ai-detection", "c2pa", "watermark", "metadata", "forensics"] categories = ["command-line-utilities", "multimedia"] +exclude = [".github/", ".context/", "docs/", "tests/fixtures/"] [lib] name = "aicheck" diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..e93b58c --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|---------|-----------| +| 0.1.x | Yes | + +## Reporting a Vulnerability + +If you discover a security vulnerability, please report it responsibly: + +1. **Do not** open a public issue +2. Use [GitHub's private vulnerability reporting](https://github.com/MatrixA/aicheck/security/advisories/new) +3. Include a description of the vulnerability, steps to reproduce, and potential impact + +You should receive an initial response within 72 hours. We'll work with you to understand the issue and coordinate a fix before any public disclosure. diff --git a/src/detector/audio_spectral.rs b/src/detector/audio_spectral.rs index 09823ab..1709422 100644 --- a/src/detector/audio_spectral.rs +++ b/src/detector/audio_spectral.rs @@ -4,7 +4,7 @@ use std::fs; use std::path::Path; use super::wav_metadata; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; const FFT_SIZE: usize = 2048; const MAX_FRAMES: usize = 64; @@ -18,7 +18,9 @@ fn decode_pcm_16le(data: &[u8], channels: u16) -> Vec { let mut samples = Vec::with_capacity(num_blocks); for i in 0..num_blocks { let offset = i * block_align; - if offset + 2 > data.len() { break; } + if offset + 2 > data.len() { + break; + } let raw = i16::from_le_bytes([data[offset], data[offset + 1]]); samples.push(raw as f64 / 32768.0); } @@ -26,7 +28,9 @@ fn decode_pcm_16le(data: &[u8], channels: u16) -> Vec { } fn compute_avg_spectrum(samples: &[f64], fft_size: usize) -> Vec { - if samples.len() < fft_size { return vec![]; } + if samples.len() < fft_size { + return vec![]; + } let mut planner = FftPlanner::::new(); let fft = planner.plan_fft_forward(fft_size); let mid = samples.len() / 2; @@ -39,55 +43,86 @@ fn compute_avg_spectrum(samples: &[f64], fft_size: usize) -> Vec { let hop = fft_size / 2; let mut pos = 0; while pos + fft_size <= available.len() && frame_count < MAX_FRAMES { - let mut buffer: Vec> = available[pos..pos + fft_size].iter().enumerate() + let mut buffer: Vec> = available[pos..pos + fft_size] + .iter() + .enumerate() .map(|(i, &s)| { - let w = 0.5 * (1.0 - (2.0 * std::f64::consts::PI * i as f64 / (fft_size - 1) as f64).cos()); + let w = 0.5 + * (1.0 - (2.0 * std::f64::consts::PI * i as f64 / (fft_size - 1) as f64).cos()); Complex::new(s * w, 0.0) - }).collect(); + }) + .collect(); fft.process(&mut buffer); - for (bin, power) in avg_power.iter_mut().enumerate() { *power += buffer[bin].norm_sqr(); } + for (bin, power) in avg_power.iter_mut().enumerate() { + *power += buffer[bin].norm_sqr(); + } frame_count += 1; pos += hop; } - if frame_count == 0 { return vec![]; } - for power in avg_power.iter_mut() { *power /= frame_count as f64; } + if frame_count == 0 { + return vec![]; + } + for power in avg_power.iter_mut() { + *power /= frame_count as f64; + } avg_power } fn find_bandwidth_cutoff(spectrum: &[f64], sample_rate: u32) -> Option<(f64, f64)> { - if spectrum.is_empty() { return None; } + if spectrum.is_empty() { + return None; + } let num_bins = spectrum.len(); let nyquist = sample_rate as f64 / 2.0; let bin_hz = nyquist / num_bins as f64; let total_energy: f64 = spectrum.iter().sum(); - if total_energy == 0.0 { return None; } + if total_energy == 0.0 { + return None; + } let mut cumulative = 0.0; let mut cutoff_bin = num_bins; for (i, &power) in spectrum.iter().enumerate() { cumulative += power; - if cumulative >= total_energy * 0.99 { cutoff_bin = i + 1; break; } + if cumulative >= total_energy * 0.99 { + cutoff_bin = i + 1; + break; + } } let cutoff_freq = cutoff_bin as f64 * bin_hz; let bandwidth_ratio = cutoff_freq / nyquist; if bandwidth_ratio < BANDWIDTH_THRESHOLD { let below_energy: f64 = spectrum[..cutoff_bin].iter().sum(); let above_energy: f64 = spectrum[cutoff_bin..].iter().sum(); - let ratio = if below_energy > 0.0 { above_energy / below_energy } else { 0.0 }; - if ratio < CUTOFF_ENERGY_RATIO { return Some((cutoff_freq, bandwidth_ratio)); } + let ratio = if below_energy > 0.0 { + above_energy / below_energy + } else { + 0.0 + }; + if ratio < CUTOFF_ENERGY_RATIO { + return Some((cutoff_freq, bandwidth_ratio)); + } } None } fn spectral_flatness(spectrum: &[f64]) -> f64 { let n = spectrum.len() as f64; - if n == 0.0 { return 0.0; } + if n == 0.0 { + return 0.0; + } let filtered: Vec = spectrum.iter().copied().filter(|&x| x > 1e-20).collect(); - if filtered.is_empty() { return 0.0; } + if filtered.is_empty() { + return 0.0; + } let n = filtered.len() as f64; let log_mean = filtered.iter().map(|x| x.ln()).sum::() / n; let geometric_mean = log_mean.exp(); let arithmetic_mean = filtered.iter().sum::() / n; - if arithmetic_mean > 0.0 { geometric_mean / arithmetic_mean } else { 0.0 } + if arithmetic_mean > 0.0 { + geometric_mean / arithmetic_mean + } else { + 0.0 + } } pub fn detect(path: &Path) -> Result> { @@ -96,26 +131,38 @@ pub fn detect(path: &Path) -> Result> { Some(w) => w, None => return Ok(vec![]), }; - if wav.fmt.bits_per_sample != 16 || wav.pcm_start >= wav.pcm_end { return Ok(vec![]); } + if wav.fmt.bits_per_sample != 16 || wav.pcm_start >= wav.pcm_end { + return Ok(vec![]); + } let pcm_data = &data[wav.pcm_start..wav.pcm_end]; let samples = decode_pcm_16le(pcm_data, wav.fmt.channels); - if samples.len() < FFT_SIZE { return Ok(vec![]); } + if samples.len() < FFT_SIZE { + return Ok(vec![]); + } let spectrum = compute_avg_spectrum(&samples, FFT_SIZE); - if spectrum.is_empty() { return Ok(vec![]); } + if spectrum.is_empty() { + return Ok(vec![]); + } let mut signals = Vec::new(); - if let Some((cutoff_freq, bandwidth_ratio)) = find_bandwidth_cutoff(&spectrum, wav.fmt.sample_rate) { + if let Some((cutoff_freq, bandwidth_ratio)) = + find_bandwidth_cutoff(&spectrum, wav.fmt.sample_rate) + { let nyquist = wav.fmt.sample_rate as f64 / 2.0; signals.push( - SignalBuilder::new(SignalSource::AudioSpectral, Confidence::Low, "signal_audio_cutoff") - .param("freq", format!("{:.0}", cutoff_freq)) - .param("pct", format!("{:.0}", bandwidth_ratio * 100.0)) - .param("nyquist", format!("{:.0}", nyquist)) - .detail("cutoff_frequency", format!("{:.0}Hz", cutoff_freq)) - .detail("nyquist", format!("{:.0}Hz", nyquist)) - .detail("bandwidth_used", format!("{:.1}%", bandwidth_ratio * 100.0)) - .build(), + SignalBuilder::new( + SignalSource::AudioSpectral, + Confidence::Low, + "signal_audio_cutoff", + ) + .param("freq", format!("{:.0}", cutoff_freq)) + .param("pct", format!("{:.0}", bandwidth_ratio * 100.0)) + .param("nyquist", format!("{:.0}", nyquist)) + .detail("cutoff_frequency", format!("{:.0}Hz", cutoff_freq)) + .detail("nyquist", format!("{:.0}Hz", nyquist)) + .detail("bandwidth_used", format!("{:.1}%", bandwidth_ratio * 100.0)) + .build(), ); } @@ -123,10 +170,14 @@ pub fn detect(path: &Path) -> Result> { let nyquist = wav.fmt.sample_rate as f64 / 2.0; if nyquist <= 12000.0 && wav.fmt.channels == 1 && flatness < 0.05 { signals.push( - SignalBuilder::new(SignalSource::AudioSpectral, Confidence::Low, "signal_audio_flatness") - .param("value", format!("{:.4}", flatness)) - .detail("spectral_flatness", format!("{:.4}", flatness)) - .build(), + SignalBuilder::new( + SignalSource::AudioSpectral, + Confidence::Low, + "signal_audio_flatness", + ) + .param("value", format!("{:.4}", flatness)) + .detail("spectral_flatness", format!("{:.4}", flatness)) + .build(), ); } @@ -177,7 +228,9 @@ mod tests { #[test] fn test_find_bandwidth_cutoff_half() { let mut spectrum = vec![0.0; 1024]; - for i in 0..300 { spectrum[i] = 1.0; } + for i in 0..300 { + spectrum[i] = 1.0; + } let result = find_bandwidth_cutoff(&spectrum, 48000); assert!(result.is_some()); let (freq, ratio) = result.unwrap(); diff --git a/src/detector/c2pa_detector.rs b/src/detector/c2pa_detector.rs index 924eca8..1eb4b6f 100644 --- a/src/detector/c2pa_detector.rs +++ b/src/detector/c2pa_detector.rs @@ -3,22 +3,24 @@ use c2pa::assertions::{Actions, DigitalSourceType}; use c2pa::Reader; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::known_tools; /// AI-related digital source types that indicate AI generation. fn is_ai_source_type(dst: &DigitalSourceType) -> Option<(Confidence, &'static str)> { match dst { - DigitalSourceType::TrainedAlgorithmicMedia => { - Some((Confidence::High, "trainedAlgorithmicMedia (fully AI-generated)")) - } + DigitalSourceType::TrainedAlgorithmicMedia => Some(( + Confidence::High, + "trainedAlgorithmicMedia (fully AI-generated)", + )), DigitalSourceType::CompositeWithTrainedAlgorithmicMedia => Some(( Confidence::High, "compositeWithTrainedAlgorithmicMedia (AI-edited)", )), - DigitalSourceType::CompositeSynthetic => { - Some((Confidence::High, "compositeSynthetic (includes AI elements)")) - } + DigitalSourceType::CompositeSynthetic => Some(( + Confidence::High, + "compositeSynthetic (includes AI elements)", + )), DigitalSourceType::AlgorithmicMedia => Some(( Confidence::Medium, "algorithmicMedia (algorithmic, not necessarily AI-trained)", @@ -59,11 +61,15 @@ fn check_manifest(manifest: &c2pa::Manifest, signals: &mut Vec) { if let Some(cg) = manifest.claim_generator() { if let Some(tool_name) = known_tools::match_ai_tool(cg) { signals.push( - SignalBuilder::new(SignalSource::C2pa, Confidence::High, "signal_c2pa_claim_generator") - .param("value", cg) - .tool(tool_name) - .detail("claim_generator", cg) - .build(), + SignalBuilder::new( + SignalSource::C2pa, + Confidence::High, + "signal_c2pa_claim_generator", + ) + .param("value", cg) + .tool(tool_name) + .detail("claim_generator", cg) + .build(), ); } } @@ -74,10 +80,14 @@ fn check_manifest(manifest: &c2pa::Manifest, signals: &mut Vec) { let info_json = serde_json::to_string(info).unwrap_or_default(); if let Some(tool_name) = known_tools::match_ai_tool(&info_json) { signals.push( - SignalBuilder::new(SignalSource::C2pa, Confidence::High, "signal_c2pa_claim_generator_info") - .tool(tool_name) - .detail("claim_generator_info", &info_json) - .build(), + SignalBuilder::new( + SignalSource::C2pa, + Confidence::High, + "signal_c2pa_claim_generator_info", + ) + .tool(tool_name) + .detail("claim_generator_info", &info_json) + .build(), ); } } @@ -104,14 +114,18 @@ fn check_manifest(manifest: &c2pa::Manifest, signals: &mut Vec) { } signals.push( - SignalBuilder::new(SignalSource::C2pa, confidence, "signal_c2pa_digital_source_type") - .param("value", desc) - .tool_opt(action.software_agent().and_then(|sw| { - let sw_str = serde_json::to_string(sw).unwrap_or_default(); - known_tools::match_ai_tool(&sw_str).map(|s| s.to_string()) - })) - .details(details) - .build(), + SignalBuilder::new( + SignalSource::C2pa, + confidence, + "signal_c2pa_digital_source_type", + ) + .param("value", desc) + .tool_opt(action.software_agent().and_then(|sw| { + let sw_str = serde_json::to_string(sw).unwrap_or_default(); + known_tools::match_ai_tool(&sw_str).map(|s| s.to_string()) + })) + .details(details) + .build(), ); } } diff --git a/src/detector/exif.rs b/src/detector/exif.rs index e6ec481..1546e6a 100644 --- a/src/detector/exif.rs +++ b/src/detector/exif.rs @@ -4,7 +4,7 @@ use std::fs::File; use std::io::BufReader; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::known_tools; /// Camera-specific EXIF tags that real photos typically have. @@ -53,12 +53,16 @@ pub fn detect(path: &Path) -> Result> { let val = field.display_value().to_string().replace('"', ""); if let Some(tool_name) = known_tools::match_ai_tool(&val) { signals.push( - SignalBuilder::new(SignalSource::Exif, Confidence::Low, "signal_exif_tag_value") - .param("tag", tag.to_string()) - .param("value", &val) - .tool(tool_name) - .detail(tag.to_string(), &val) - .build(), + SignalBuilder::new( + SignalSource::Exif, + Confidence::Low, + "signal_exif_tag_value", + ) + .param("tag", tag.to_string()) + .param("value", &val) + .tool(tool_name) + .detail(tag.to_string(), &val) + .build(), ); software_matched = true; } @@ -71,11 +75,15 @@ pub fn detect(path: &Path) -> Result> { let val = field.display_value().to_string().replace('"', ""); if let Some(tool_name) = known_tools::match_ai_tool(&val) { signals.push( - SignalBuilder::new(SignalSource::Exif, Confidence::Low, "signal_exif_tag_references_ai") - .param("tag", tag.to_string()) - .tool(tool_name) - .detail(tag.to_string(), &val) - .build(), + SignalBuilder::new( + SignalSource::Exif, + Confidence::Low, + "signal_exif_tag_references_ai", + ) + .param("tag", tag.to_string()) + .tool(tool_name) + .detail(tag.to_string(), &val) + .build(), ); software_matched = true; } @@ -85,15 +93,18 @@ pub fn detect(path: &Path) -> Result> { // Check Artist tag for suspicious patterns if let Some(field) = exif.get_field(Tag::Artist, In::PRIMARY) { let val = field.display_value().to_string().replace('"', ""); - let is_hex_hash = - val.len() >= 32 && val.chars().all(|c| c.is_ascii_hexdigit() || c == '-'); + let is_hex_hash = val.len() >= 32 && val.chars().all(|c| c.is_ascii_hexdigit() || c == '-'); if is_hex_hash { let prefix = &val[..val.len().min(16)]; signals.push( - SignalBuilder::new(SignalSource::Exif, Confidence::Low, "signal_exif_artist_hash") - .param("value", prefix) - .detail("Artist", &val) - .build(), + SignalBuilder::new( + SignalSource::Exif, + Confidence::Low, + "signal_exif_artist_hash", + ) + .param("value", prefix) + .detail("Artist", &val) + .build(), ); software_matched = true; } @@ -128,10 +139,7 @@ pub fn dump_info(path: &Path) -> Result> { let mut fields = Vec::new(); for field in exif.fields() { - fields.push(( - format!("{}", field.tag), - field.display_value().to_string(), - )); + fields.push((format!("{}", field.tag), field.display_value().to_string())); } Ok(fields) } diff --git a/src/detector/filename.rs b/src/detector/filename.rs index ce53b8c..4234f16 100644 --- a/src/detector/filename.rs +++ b/src/detector/filename.rs @@ -1,7 +1,7 @@ use anyhow::Result; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; /// Known filename patterns from AI audio/media generation tools. const FILENAME_PATTERNS: &[(&str, &str)] = &[ @@ -32,11 +32,15 @@ pub fn detect(path: &Path) -> Result> { for &(pattern, tool_name) in FILENAME_PATTERNS { if lower.contains(pattern) { signals.push( - SignalBuilder::new(SignalSource::Filename, Confidence::Low, "signal_filename_pattern") - .param("pattern", pattern) - .tool(tool_name) - .detail("filename", filename) - .build(), + SignalBuilder::new( + SignalSource::Filename, + Confidence::Low, + "signal_filename_pattern", + ) + .param("pattern", pattern) + .tool(tool_name) + .detail("filename", filename) + .build(), ); break; } @@ -44,10 +48,14 @@ pub fn detect(path: &Path) -> Result> { if signals.is_empty() && detect_elevenlabs_pattern(&lower) { signals.push( - SignalBuilder::new(SignalSource::Filename, Confidence::Low, "signal_filename_elevenlabs") - .tool("elevenlabs") - .detail("filename", filename) - .build(), + SignalBuilder::new( + SignalSource::Filename, + Confidence::Low, + "signal_filename_elevenlabs", + ) + .tool("elevenlabs") + .detail("filename", filename) + .build(), ); } @@ -63,7 +71,11 @@ fn detect_elevenlabs_pattern(lower: &str) -> bool { return false; } let bytes = rest.as_bytes(); - bytes[4] == b'-' && bytes[7] == b'-' && bytes[10] == b't' && bytes[13] == b'_' && bytes[16] == b'_' + bytes[4] == b'-' + && bytes[7] == b'-' + && bytes[10] == b't' + && bytes[13] == b'_' + && bytes[16] == b'_' } #[cfg(test)] @@ -105,7 +117,9 @@ mod tests { #[test] fn test_elevenlabs_pattern_detection() { - assert!(detect_elevenlabs_pattern("elevenlabs_2026-03-11t04_15_43_something")); + assert!(detect_elevenlabs_pattern( + "elevenlabs_2026-03-11t04_15_43_something" + )); assert!(!detect_elevenlabs_pattern("elevenlabs_short")); assert!(!detect_elevenlabs_pattern("something_else")); } diff --git a/src/detector/id3_metadata.rs b/src/detector/id3_metadata.rs index 9deeadf..a1b07ad 100644 --- a/src/detector/id3_metadata.rs +++ b/src/detector/id3_metadata.rs @@ -2,7 +2,7 @@ use anyhow::Result; use id3::{Tag, TagLike}; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::known_tools; /// Known AI audio platform URL domains. @@ -24,11 +24,15 @@ fn detect_comments(tag: &Tag) -> Vec { } if let Some(tool_name) = known_tools::match_ai_tool(text) { signals.push( - SignalBuilder::new(SignalSource::Id3Metadata, Confidence::Medium, "signal_id3_comment") - .param("text", text.as_str()) - .tool(tool_name) - .detail("COMM", text.as_str()) - .build(), + SignalBuilder::new( + SignalSource::Id3Metadata, + Confidence::Medium, + "signal_id3_comment", + ) + .param("text", text.as_str()) + .tool(tool_name) + .detail("COMM", text.as_str()) + .build(), ); } } @@ -59,11 +63,15 @@ fn check_url(signals: &mut Vec, frame_id: &str, url: &str) { for &(domain, tool_name) in AI_URL_DOMAINS { if lower.contains(domain) { signals.push( - SignalBuilder::new(SignalSource::Id3Metadata, Confidence::Medium, "signal_id3_url") - .param("url", url) - .tool(tool_name) - .detail(frame_id, url) - .build(), + SignalBuilder::new( + SignalSource::Id3Metadata, + Confidence::Medium, + "signal_id3_url", + ) + .param("url", url) + .tool(tool_name) + .detail(frame_id, url) + .build(), ); break; } @@ -77,12 +85,16 @@ fn detect_text_frames(tag: &Tag) -> Vec { if let Some(text) = tag.get(frame_id).and_then(|f| f.content().text()) { if let Some(tool_name) = known_tools::match_ai_tool(text) { signals.push( - SignalBuilder::new(SignalSource::Id3Metadata, Confidence::Medium, "signal_id3_text_frame") - .param("frame", *frame_id) - .param("text", text) - .tool(tool_name) - .detail(*frame_id, text) - .build(), + SignalBuilder::new( + SignalSource::Id3Metadata, + Confidence::Medium, + "signal_id3_text_frame", + ) + .param("frame", *frame_id) + .param("text", text) + .tool(tool_name) + .detail(*frame_id, text) + .build(), ); } } @@ -91,13 +103,17 @@ fn detect_text_frames(tag: &Tag) -> Vec { let combined = format!("{} {}", txxx.description, txxx.value); if let Some(tool_name) = known_tools::match_ai_tool(&combined) { signals.push( - SignalBuilder::new(SignalSource::Id3Metadata, Confidence::Medium, "signal_id3_txxx") - .param("desc", &txxx.description) - .param("value", &txxx.value) - .tool(tool_name) - .detail("TXXX description", &txxx.description) - .detail("TXXX value", &txxx.value) - .build(), + SignalBuilder::new( + SignalSource::Id3Metadata, + Confidence::Medium, + "signal_id3_txxx", + ) + .param("desc", &txxx.description) + .param("value", &txxx.value) + .tool(tool_name) + .detail("TXXX description", &txxx.description) + .detail("TXXX value", &txxx.value) + .build(), ); } } diff --git a/src/detector/mp4_metadata.rs b/src/detector/mp4_metadata.rs index 11bffc1..6f944df 100644 --- a/src/detector/mp4_metadata.rs +++ b/src/detector/mp4_metadata.rs @@ -2,50 +2,86 @@ use anyhow::Result; use std::fs; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::known_tools; -const MP4_TOOL_MAPPINGS: &[(&str, &str, Confidence)] = &[ - ("google", "google veo", Confidence::Medium), -]; +const MP4_TOOL_MAPPINGS: &[(&str, &str, Confidence)] = + &[("google", "google veo", Confidence::Medium)]; -const SEI_MARKERS: &[(&[u8], &str)] = &[ - (b"kling-ai", "kling"), -]; +const SEI_MARKERS: &[(&[u8], &str)] = &[(b"kling-ai", "kling")]; fn read_u32_be(data: &[u8], offset: usize) -> Option { - if offset + 4 > data.len() { return None; } - Some(u32::from_be_bytes([data[offset], data[offset+1], data[offset+2], data[offset+3]])) + if offset + 4 > data.len() { + return None; + } + Some(u32::from_be_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ])) } -struct BoxInfo { box_type: [u8; 4], content_start: usize, box_end: usize } +struct BoxInfo { + box_type: [u8; 4], + content_start: usize, + box_end: usize, +} fn find_boxes(data: &[u8], start: usize, end: usize) -> Vec { let mut boxes = Vec::new(); let mut pos = start; while pos + 8 <= end { - let size = match read_u32_be(data, pos) { Some(s) => s as u64, None => break }; + let size = match read_u32_be(data, pos) { + Some(s) => s as u64, + None => break, + }; let mut box_type = [0u8; 4]; box_type.copy_from_slice(&data[pos + 4..pos + 8]); let (content_start, actual_size) = if size == 1 { - if pos + 16 > end { break; } - let ext = u64::from_be_bytes([data[pos+8],data[pos+9],data[pos+10],data[pos+11],data[pos+12],data[pos+13],data[pos+14],data[pos+15]]); + if pos + 16 > end { + break; + } + let ext = u64::from_be_bytes([ + data[pos + 8], + data[pos + 9], + data[pos + 10], + data[pos + 11], + data[pos + 12], + data[pos + 13], + data[pos + 14], + data[pos + 15], + ]); (pos + 16, ext) - } else if size == 0 { (pos + 8, (end - pos) as u64) } - else { (pos + 8, size) }; - if actual_size < 8 { break; } + } else if size == 0 { + (pos + 8, (end - pos) as u64) + } else { + (pos + 8, size) + }; + if actual_size < 8 { + break; + } let box_end = (pos as u64 + actual_size).min(end as u64) as usize; - boxes.push(BoxInfo { box_type, content_start, box_end }); + boxes.push(BoxInfo { + box_type, + content_start, + box_end, + }); pos = box_end; } boxes } fn get_box(data: &[u8], start: usize, end: usize, box_type: &[u8; 4]) -> Option<(usize, usize)> { - find_boxes(data, start, end).into_iter().find(|b| &b.box_type == box_type).map(|b| (b.content_start, b.box_end)) + find_boxes(data, start, end) + .into_iter() + .find(|b| &b.box_type == box_type) + .map(|b| (b.content_start, b.box_end)) } -fn box_type_to_string(box_type: &[u8; 4]) -> String { box_type.iter().map(|&b| b as char).collect() } +fn box_type_to_string(box_type: &[u8; 4]) -> String { + box_type.iter().map(|&b| b as char).collect() +} fn parse_ilst_standard(data: &[u8], start: usize, end: usize) -> Vec<(String, String)> { let mut results = Vec::new(); @@ -53,8 +89,12 @@ fn parse_ilst_standard(data: &[u8], start: usize, end: usize) -> Vec<(String, St let key = box_type_to_string(&item.box_type); if let Some((data_cs, data_ce)) = get_box(data, item.content_start, item.box_end, b"data") { if data_ce - data_cs >= 8 { - let value = String::from_utf8_lossy(&data[data_cs + 8..data_ce]).trim_matches('\0').to_string(); - if !value.is_empty() { results.push((key, value)); } + let value = String::from_utf8_lossy(&data[data_cs + 8..data_ce]) + .trim_matches('\0') + .to_string(); + if !value.is_empty() { + results.push((key, value)); + } } } } @@ -62,14 +102,26 @@ fn parse_ilst_standard(data: &[u8], start: usize, end: usize) -> Vec<(String, St } fn parse_keys(data: &[u8], start: usize, end: usize) -> Vec { - if end - start < 8 { return vec![]; } - let count = match read_u32_be(data, start + 4) { Some(c) => c as usize, None => return vec![] }; + if end - start < 8 { + return vec![]; + } + let count = match read_u32_be(data, start + 4) { + Some(c) => c as usize, + None => return vec![], + }; let mut keys = Vec::with_capacity(count); let mut offset = start + 8; for _ in 0..count { - if offset + 8 > end { break; } - let key_size = match read_u32_be(data, offset) { Some(s) => s as usize, None => break }; - if key_size < 8 || offset + key_size > end { break; } + if offset + 8 > end { + break; + } + let key_size = match read_u32_be(data, offset) { + Some(s) => s as usize, + None => break, + }; + if key_size < 8 || offset + key_size > end { + break; + } let name = String::from_utf8_lossy(&data[offset + 8..offset + key_size]).to_string(); keys.push(name); offset += key_size; @@ -77,15 +129,28 @@ fn parse_keys(data: &[u8], start: usize, end: usize) -> Vec { keys } -fn parse_ilst_keyed(data: &[u8], keys: &[String], ilst_start: usize, ilst_end: usize) -> Vec<(String, String)> { +fn parse_ilst_keyed( + data: &[u8], + keys: &[String], + ilst_start: usize, + ilst_end: usize, +) -> Vec<(String, String)> { let mut results = Vec::new(); for item in find_boxes(data, ilst_start, ilst_end) { let idx = u32::from_be_bytes(item.box_type) as usize; - let key_name = if idx > 0 && idx <= keys.len() { keys[idx - 1].clone() } else { format!("idx:{}", idx) }; + let key_name = if idx > 0 && idx <= keys.len() { + keys[idx - 1].clone() + } else { + format!("idx:{}", idx) + }; if let Some((data_cs, data_ce)) = get_box(data, item.content_start, item.box_end, b"data") { if data_ce - data_cs >= 8 { - let value = String::from_utf8_lossy(&data[data_cs + 8..data_ce]).trim_matches('\0').to_string(); - if !value.is_empty() { results.push((key_name, value)); } + let value = String::from_utf8_lossy(&data[data_cs + 8..data_ce]) + .trim_matches('\0') + .to_string(); + if !value.is_empty() { + results.push((key_name, value)); + } } } } @@ -93,13 +158,27 @@ fn parse_ilst_keyed(data: &[u8], keys: &[String], ilst_start: usize, ilst_end: u } fn extract_ilst_entries(data: &[u8]) -> Vec<(String, String)> { - let moov = match get_box(data, 0, data.len(), b"moov") { Some(m) => m, None => return vec![] }; - let udta = match get_box(data, moov.0, moov.1, b"udta") { Some(u) => u, None => return vec![] }; - let meta = match get_box(data, udta.0, udta.1, b"meta") { Some(m) => m, None => return vec![] }; + let moov = match get_box(data, 0, data.len(), b"moov") { + Some(m) => m, + None => return vec![], + }; + let udta = match get_box(data, moov.0, moov.1, b"udta") { + Some(u) => u, + None => return vec![], + }; + let meta = match get_box(data, udta.0, udta.1, b"meta") { + Some(m) => m, + None => return vec![], + }; let meta_content = meta.0 + 4; - if meta_content >= meta.1 { return vec![]; } + if meta_content >= meta.1 { + return vec![]; + } let keys_box = get_box(data, meta_content, meta.1, b"keys"); - let ilst = match get_box(data, meta_content, meta.1, b"ilst") { Some(i) => i, None => return vec![] }; + let ilst = match get_box(data, meta_content, meta.1, b"ilst") { + Some(i) => i, + None => return vec![], + }; if let Some((keys_start, keys_end)) = keys_box { let keys = parse_keys(data, keys_start, keys_end); parse_ilst_keyed(data, &keys, ilst.0, ilst.1) @@ -113,7 +192,9 @@ fn detect_ilst_tools(entries: &[(String, String)]) -> Vec { let tool_keys: &[&str] = &["\u{a9}too", "\u{a9}swr", "encoder", "tool", "software"]; for (key, value) in entries { let is_tool_key = tool_keys.iter().any(|tk| key.eq_ignore_ascii_case(tk)); - if !is_tool_key { continue; } + if !is_tool_key { + continue; + } let label = match key.as_str() { "\u{a9}too" => "Encoding Tool", "\u{a9}swr" => "Software", @@ -121,9 +202,16 @@ fn detect_ilst_tools(entries: &[(String, String)]) -> Vec { }; if let Some(tool_name) = known_tools::match_ai_tool(value) { signals.push( - SignalBuilder::new(SignalSource::Mp4Metadata, Confidence::Medium, "signal_mp4_tool_match") - .param("label", label).param("value", value.as_str()) - .tool(tool_name).detail(key.as_str(), value.as_str()).build(), + SignalBuilder::new( + SignalSource::Mp4Metadata, + Confidence::Medium, + "signal_mp4_tool_match", + ) + .param("label", label) + .param("value", value.as_str()) + .tool(tool_name) + .detail(key.as_str(), value.as_str()) + .build(), ); continue; } @@ -131,9 +219,16 @@ fn detect_ilst_tools(entries: &[(String, String)]) -> Vec { for &(pattern, mapped_tool, confidence) in MP4_TOOL_MAPPINGS { if lower == pattern { signals.push( - SignalBuilder::new(SignalSource::Mp4Metadata, confidence, "signal_mp4_tool_match") - .param("label", label).param("value", value.as_str()) - .tool(mapped_tool).detail(key.as_str(), value.as_str()).build(), + SignalBuilder::new( + SignalSource::Mp4Metadata, + confidence, + "signal_mp4_tool_match", + ) + .param("label", label) + .param("value", value.as_str()) + .tool(mapped_tool) + .detail(key.as_str(), value.as_str()) + .build(), ); break; } @@ -145,20 +240,32 @@ fn detect_ilst_tools(entries: &[(String, String)]) -> Vec { fn detect_aigc_label(entries: &[(String, String)]) -> Vec { let mut signals = Vec::new(); for (key, value) in entries { - if !key.eq_ignore_ascii_case("AIGC") { continue; } + if !key.eq_ignore_ascii_case("AIGC") { + continue; + } let has_ai_label = value.contains("\"Label\":\"1\"") || value.contains("\"Label\": \"1\""); - if !has_ai_label { continue; } + if !has_ai_label { + continue; + } let produce_id = extract_json_field(value, "ProduceID"); let signal = if let Some(ref pid) = produce_id { - SignalBuilder::new(SignalSource::Mp4Metadata, Confidence::Medium, "signal_mp4_aigc_label_id") - .param("id", pid.as_str()) - .detail("AIGC", value.as_str()) - .detail("ProduceID", pid.as_str()) - .build() + SignalBuilder::new( + SignalSource::Mp4Metadata, + Confidence::Medium, + "signal_mp4_aigc_label_id", + ) + .param("id", pid.as_str()) + .detail("AIGC", value.as_str()) + .detail("ProduceID", pid.as_str()) + .build() } else { - SignalBuilder::new(SignalSource::Mp4Metadata, Confidence::Medium, "signal_mp4_aigc_label") - .detail("AIGC", value.as_str()) - .build() + SignalBuilder::new( + SignalSource::Mp4Metadata, + Confidence::Medium, + "signal_mp4_aigc_label", + ) + .detail("AIGC", value.as_str()) + .build() }; signals.push(signal); } @@ -179,18 +286,25 @@ fn extract_json_field(json: &str, field: &str) -> Option { fn detect_sei_markers(data: &[u8]) -> Vec { let mut signals = Vec::new(); - let mdat = match get_box(data, 0, data.len(), b"mdat") { Some(m) => m, None => return signals }; + let mdat = match get_box(data, 0, data.len(), b"mdat") { + Some(m) => m, + None => return signals, + }; let scan_end = mdat.1.min(mdat.0 + 1_048_576); let scan_data = &data[mdat.0..scan_end]; for &(marker, tool_name) in SEI_MARKERS { if scan_data.windows(marker.len()).any(|w| w == marker) { let marker_str = String::from_utf8_lossy(marker); signals.push( - SignalBuilder::new(SignalSource::Mp4Metadata, Confidence::Medium, "signal_mp4_sei_watermark") - .param("marker", &*marker_str) - .tool(tool_name) - .detail("SEI marker", &*marker_str) - .build(), + SignalBuilder::new( + SignalSource::Mp4Metadata, + Confidence::Medium, + "signal_mp4_sei_watermark", + ) + .param("marker", &*marker_str) + .tool(tool_name) + .detail("SEI marker", &*marker_str) + .build(), ); } } @@ -199,7 +313,9 @@ fn detect_sei_markers(data: &[u8]) -> Vec { pub fn detect(path: &Path) -> Result> { let data = fs::read(path)?; - if get_box(&data, 0, data.len().min(64), b"ftyp").is_none() { return Ok(vec![]); } + if get_box(&data, 0, data.len().min(64), b"ftyp").is_none() { + return Ok(vec![]); + } let entries = extract_ilst_entries(&data); let mut signals = Vec::new(); signals.extend(detect_ilst_tools(&entries)); @@ -210,7 +326,9 @@ pub fn detect(path: &Path) -> Result> { pub fn dump_info(path: &Path) -> Result> { let data = fs::read(path)?; - if get_box(&data, 0, data.len().min(64), b"ftyp").is_none() { return Ok(vec![]); } + if get_box(&data, 0, data.len().min(64), b"ftyp").is_none() { + return Ok(vec![]); + } let mut props = extract_ilst_entries(&data); let mdat = get_box(&data, 0, data.len(), b"mdat"); if let Some((mdat_start, mdat_end)) = mdat { @@ -218,7 +336,10 @@ pub fn dump_info(path: &Path) -> Result> { let scan_data = &data[mdat_start..scan_end]; for &(marker, tool_name) in SEI_MARKERS { if scan_data.windows(marker.len()).any(|w| w == marker) { - props.push(("SEI watermark".to_string(), format!("{} ({})", String::from_utf8_lossy(marker), tool_name))); + props.push(( + "SEI watermark".to_string(), + format!("{} ({})", String::from_utf8_lossy(marker), tool_name), + )); } } } @@ -233,11 +354,17 @@ mod tests { fn test_extract_json_field() { let json = r#"{"Label":"1","ProduceID":"abc-123","Other":"val"}"#; assert_eq!(extract_json_field(json, "Label"), Some("1".to_string())); - assert_eq!(extract_json_field(json, "ProduceID"), Some("abc-123".to_string())); + assert_eq!( + extract_json_field(json, "ProduceID"), + Some("abc-123".to_string()) + ); assert_eq!(extract_json_field(json, "Missing"), None); let json2 = r#"{"Label": "1", "ProduceID": "xyz"}"#; assert_eq!(extract_json_field(json2, "Label"), Some("1".to_string())); - assert_eq!(extract_json_field(json2, "ProduceID"), Some("xyz".to_string())); + assert_eq!( + extract_json_field(json2, "ProduceID"), + Some("xyz".to_string()) + ); } #[test] @@ -266,17 +393,26 @@ mod tests { #[test] fn test_detect_aigc_label() { - let entries = vec![("AIGC".to_string(), r#"{"Label":"1","ProduceID":"test-123"}"#.to_string())]; + let entries = vec![( + "AIGC".to_string(), + r#"{"Label":"1","ProduceID":"test-123"}"#.to_string(), + )]; let signals = detect_aigc_label(&entries); assert_eq!(signals.len(), 1); assert_eq!(signals[0].confidence, Confidence::Medium); assert!(signals[0].description.contains("AIGC") || signals[0].msg_key.contains("aigc")); - assert!(signals[0].description.contains("test-123") || signals[0].msg_params.iter().any(|(_, v)| v == "test-123")); + assert!( + signals[0].description.contains("test-123") + || signals[0].msg_params.iter().any(|(_, v)| v == "test-123") + ); } #[test] fn test_detect_aigc_label_not_ai() { - let entries = vec![("AIGC".to_string(), r#"{"Label":"0","ProduceID":"test"}"#.to_string())]; + let entries = vec![( + "AIGC".to_string(), + r#"{"Label":"0","ProduceID":"test"}"#.to_string(), + )]; let signals = detect_aigc_label(&entries); assert!(signals.is_empty()); } diff --git a/src/detector/png_text.rs b/src/detector/png_text.rs index d12d1b3..15c5c42 100644 --- a/src/detector/png_text.rs +++ b/src/detector/png_text.rs @@ -2,23 +2,34 @@ use anyhow::Result; use std::fs; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::known_tools; const RELEVANT_KEYWORDS: &[&str] = &[ - "Software", "Comment", "Description", "Source", "Author", "parameters", "prompt", + "Software", + "Comment", + "Description", + "Source", + "Author", + "parameters", + "prompt", ]; pub fn detect(path: &Path) -> Result> { let data = fs::read(path)?; - if data.len() < 8 || &data[..8] != b"\x89PNG\r\n\x1a\n" { return Ok(vec![]); } + if data.len() < 8 || &data[..8] != b"\x89PNG\r\n\x1a\n" { + return Ok(vec![]); + } let mut signals = Vec::new(); let mut pos: usize = 8; while pos + 12 <= data.len() { - let length = u32::from_be_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]) as usize; + let length = + u32::from_be_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize; let chunk_type = &data[pos + 4..pos + 8]; let chunk_data_end = pos + 8 + length; - if chunk_data_end > data.len() { break; } + if chunk_data_end > data.len() { + break; + } let chunk_data = &data[pos + 8..chunk_data_end]; match chunk_type { b"tEXt" => { @@ -26,7 +37,9 @@ pub fn detect(path: &Path) -> Result> { if let (Ok(keyword), Ok(value)) = ( std::str::from_utf8(&chunk_data[..null_pos]), std::str::from_utf8(&chunk_data[null_pos + 1..]), - ) { check_text_chunk(keyword, value, &mut signals); } + ) { + check_text_chunk(keyword, value, &mut signals); + } } } b"iTXt" => { @@ -36,7 +49,13 @@ pub fn detect(path: &Path) -> Result> { let mut nulls_found = 0; let mut text_start = 0; for (i, &b) in rest.iter().enumerate() { - if b == 0 { nulls_found += 1; if nulls_found == 3 { text_start = i + 1; break; } } + if b == 0 { + nulls_found += 1; + if nulls_found == 3 { + text_start = i + 1; + break; + } + } } if text_start > 0 && text_start < rest.len() { if let Ok(value) = std::str::from_utf8(&rest[text_start..]) { @@ -56,19 +75,31 @@ pub fn detect(path: &Path) -> Result> { fn check_text_chunk(keyword: &str, value: &str, signals: &mut Vec) { let keyword_lower = keyword.to_lowercase(); - let is_relevant = RELEVANT_KEYWORDS.iter().any(|k| keyword_lower == k.to_lowercase()); - if !is_relevant { return; } + let is_relevant = RELEVANT_KEYWORDS + .iter() + .any(|k| keyword_lower == k.to_lowercase()); + if !is_relevant { + return; + } if let Some(tool_name) = known_tools::match_ai_tool(value) { signals.push( - SignalBuilder::new(SignalSource::PngText, Confidence::Low, "signal_png_text_chunk") - .param("keyword", keyword) - .tool(tool_name) - .detail(keyword, truncate(value, 200)) - .build(), + SignalBuilder::new( + SignalSource::PngText, + Confidence::Low, + "signal_png_text_chunk", + ) + .param("keyword", keyword) + .tool(tool_name) + .detail(keyword, truncate(value, 200)) + .build(), ); } } fn truncate(s: &str, max: usize) -> String { - if s.len() <= max { s.to_string() } else { format!("{}...", &s[..max]) } + if s.len() <= max { + s.to_string() + } else { + format!("{}...", &s[..max]) + } } diff --git a/src/detector/watermark.rs b/src/detector/watermark.rs index a5285ec..87bd184 100644 --- a/src/detector/watermark.rs +++ b/src/detector/watermark.rs @@ -1,7 +1,7 @@ use anyhow::{Context, Result}; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::i18n; const MAX_DIM: u32 = 1024; @@ -18,12 +18,16 @@ pub fn detect(path: &Path) -> Result> { let img = image::open(path).context("Failed to open image for watermark analysis")?; let img = if img.width() > MAX_DIM || img.height() > MAX_DIM { img.resize(MAX_DIM, MAX_DIM, image::imageops::FilterType::Lanczos3) - } else { img }; + } else { + img + }; let rgba = img.to_rgba8(); let (width, height) = rgba.dimensions(); let (w, h) = (width as usize, height as usize); - if w < MIN_DIM || h < MIN_DIM { return Ok(vec![]); } + if w < MIN_DIM || h < MIN_DIM { + return Ok(vec![]); + } let debug = std::env::var("AIC_DEBUG").is_ok(); let mut indicators: Vec<&str> = Vec::new(); @@ -32,39 +36,62 @@ pub fn detect(path: &Path) -> Result> { let channels = extract_rgb_channels(&rgba, w, h); let cw = w - (w % 2); let ch = h - (h % 2); - if cw < DWT_BLOCK * 4 || ch < DWT_BLOCK * 4 { return Ok(vec![]); } + if cw < DWT_BLOCK * 4 || ch < DWT_BLOCK * 4 { + return Ok(vec![]); + } - let channel_pixels: Vec> = channels.iter().map(|channel| { - channel.iter().take(ch * w).enumerate() - .filter_map(|(i, &v)| if i % w < cw { Some(v) } else { None }).collect() - }).collect(); + let channel_pixels: Vec> = channels + .iter() + .map(|channel| { + channel + .iter() + .take(ch * w) + .enumerate() + .filter_map(|(i, &v)| if i % w < cw { Some(v) } else { None }) + .collect() + }) + .collect(); - let channel_subbands: Vec = channel_pixels.iter().map(|px| haar_dwt_2d(px, cw, ch)).collect(); + let channel_subbands: Vec = channel_pixels + .iter() + .map(|px| haar_dwt_2d(px, cw, ch)) + .collect(); let sub_w = cw / 2; let sub_h = ch / 2; // Analysis 1: Channel noise asymmetry - let channel_noises: Vec = channels.iter().map(|c| estimate_noise_level(c, w, h)).collect(); + let channel_noises: Vec = channels + .iter() + .map(|c| estimate_noise_level(c, w, h)) + .collect(); let mean_noise = channel_noises.iter().sum::() / 3.0; if mean_noise > 0.01 { let max_noise = channel_noises.iter().cloned().fold(f64::MIN, f64::max); let min_noise = channel_noises.iter().cloned().fold(f64::MAX, f64::min); let asymmetry = (max_noise - min_noise) / mean_noise; if debug { - eprintln!(" [debug] Watermark noise: R={:.3} G={:.3} B={:.3} asymmetry={:.3}", - channel_noises[0], channel_noises[1], channel_noises[2], asymmetry); + eprintln!( + " [debug] Watermark noise: R={:.3} G={:.3} B={:.3} asymmetry={:.3}", + channel_noises[0], channel_noises[1], channel_noises[2], asymmetry + ); } details.push(("noise_asymmetry".to_string(), format!("{:.3}", asymmetry))); - if asymmetry > NOISE_ASYMMETRY_THRESHOLD { indicators.push("channel noise asymmetry"); } + if asymmetry > NOISE_ASYMMETRY_THRESHOLD { + indicators.push("channel noise asymmetry"); + } } // Analysis 2: Cross-channel bit agreement - let all_quant_steps: Vec = std::iter::once(QUANT_STEP).chain(ALT_QUANT_STEPS.iter().copied()).collect(); + let all_quant_steps: Vec = std::iter::once(QUANT_STEP) + .chain(ALT_QUANT_STEPS.iter().copied()) + .collect(); let mut best_agreement = 0.0f64; let mut best_q = 0.0f64; for &q_step in &all_quant_steps { - let channel_bits: Vec> = channel_subbands.iter() - .map(|sb| extract_bits(&sb.ll, sub_w, sub_h, q_step, EMBED_INDICES)).collect(); + let channel_bits: Vec> = channel_subbands + .iter() + .map(|sb| extract_bits(&sb.ll, sub_w, sub_h, q_step, EMBED_INDICES)) + .collect(); if channel_bits.iter().all(|b| !b.is_empty()) { let min_len = channel_bits.iter().map(|b| b.len()).min().unwrap_or(0); if min_len > 0 { @@ -72,21 +99,38 @@ pub fn detect(path: &Path) -> Result> { let mut total_compared = 0usize; for i in 0..3 { for j in (i + 1)..3 { - for (bi, bj) in channel_bits[i].iter().zip(channel_bits[j].iter()).take(min_len) { - if bi == bj { total_agree += 1; } + for (bi, bj) in channel_bits[i] + .iter() + .zip(channel_bits[j].iter()) + .take(min_len) + { + if bi == bj { + total_agree += 1; + } total_compared += 1; } } } if total_compared > 0 { let agreement = total_agree as f64 / total_compared as f64; - if agreement > best_agreement { best_agreement = agreement; best_q = q_step; } + if agreement > best_agreement { + best_agreement = agreement; + best_q = q_step; + } } } } } - if debug { eprintln!(" [debug] Watermark cross-channel bit agreement: {:.3} (q={:.0})", best_agreement, best_q); } - details.push(("cross_channel_agreement".to_string(), format!("{:.3}", best_agreement))); + if debug { + eprintln!( + " [debug] Watermark cross-channel bit agreement: {:.3} (q={:.0})", + best_agreement, best_q + ); + } + details.push(( + "cross_channel_agreement".to_string(), + format!("{:.3}", best_agreement), + )); if best_agreement > BIT_AGREEMENT_THRESHOLD { indicators.push("cross-channel bit consistency"); details.push(("best_quant_step".to_string(), format!("{:.0}", best_q))); @@ -104,7 +148,10 @@ pub fn detect(path: &Path) -> Result> { energy_ratios.push(ratio); if debug { let ch_name = ["R", "G", "B"][ch_idx]; - eprintln!(" [debug] Watermark energy ratio ch={}: {:.6}", ch_name, ratio); + eprintln!( + " [debug] Watermark energy ratio ch={}: {:.6}", + ch_name, ratio + ); } } } @@ -114,25 +161,41 @@ pub fn detect(path: &Path) -> Result> { let mean_ratio = energy_ratios.iter().sum::() / energy_ratios.len() as f64; if mean_ratio > 0.0 { let ratio_spread = (max_ratio - min_ratio) / mean_ratio; - details.push(("energy_ratio_spread".to_string(), format!("{:.4}", ratio_spread))); - if debug { eprintln!(" [debug] Watermark energy ratio spread: {:.4}", ratio_spread); } - if ratio_spread > 0.25 { indicators.push("asymmetric DWT energy distribution"); } + details.push(( + "energy_ratio_spread".to_string(), + format!("{:.4}", ratio_spread), + )); + if debug { + eprintln!( + " [debug] Watermark energy ratio spread: {:.4}", + ratio_spread + ); + } + if ratio_spread > 0.25 { + indicators.push("asymmetric DWT energy distribution"); + } } } // Emit signal if indicators.len() >= MIN_INDICATORS { - let strength_key = if indicators.len() >= 3 { "signal_watermark_strong" } else { "signal_watermark_moderate" }; + let strength_key = if indicators.len() >= 3 { + "signal_watermark_strong" + } else { + "signal_watermark_moderate" + }; let strength = i18n::t(strength_key, &[]); let indicators_str = indicators.join("; "); - Ok(vec![ - SignalBuilder::new(SignalSource::Watermark, Confidence::Low, "signal_watermark_detected") - .param("strength", &strength) - .param("indicators", &indicators_str) - .details(details) - .build(), - ]) + Ok(vec![SignalBuilder::new( + SignalSource::Watermark, + Confidence::Low, + "signal_watermark_detected", + ) + .param("strength", &strength) + .param("indicators", &indicators_str) + .details(details) + .build()]) } else { Ok(vec![]) } @@ -145,14 +208,18 @@ fn extract_rgb_channels(rgba: &image::RgbaImage, w: usize, h: usize) -> [Vec f64 { - if width < 3 || height < 3 { return 0.0; } + if width < 3 || height < 3 { + return 0.0; + } let mut laplacian_values = Vec::new(); for y in 1..height - 1 { for x in 1..width - 1 { @@ -165,16 +232,26 @@ fn estimate_noise_level(channel: &[f64], width: usize, height: usize) -> f64 { laplacian_values.push(lap); } } - if laplacian_values.is_empty() { return 0.0; } + if laplacian_values.is_empty() { + return 0.0; + } laplacian_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); let median = laplacian_values[laplacian_values.len() / 2]; median / 0.6745 } -fn extract_bits(ll_subband: &[f64], width: usize, height: usize, quant_step: f64, coeff_indices: &[usize]) -> Vec { +fn extract_bits( + ll_subband: &[f64], + width: usize, + height: usize, + quant_step: f64, + coeff_indices: &[usize], +) -> Vec { let blocks_x = width / DWT_BLOCK; let blocks_y = height / DWT_BLOCK; - if blocks_x * blocks_y < 32 { return vec![]; } + if blocks_x * blocks_y < 32 { + return vec![]; + } let mut bits = Vec::new(); for by in 0..blocks_y { for bx in 0..blocks_x { @@ -183,7 +260,9 @@ fn extract_bits(ll_subband: &[f64], width: usize, height: usize, quant_step: f64 for col in 0..DWT_BLOCK { let y = by * DWT_BLOCK + row; let x = bx * DWT_BLOCK + col; - if y < height && x < width { block[row * DWT_BLOCK + col] = ll_subband[y * width + x]; } + if y < height && x < width { + block[row * DWT_BLOCK + col] = ll_subband[y * width + x]; + } } } apply_2d_dct_ortho(&mut block, DWT_BLOCK); @@ -207,9 +286,14 @@ fn apply_2d_dct_ortho(block: &mut [f64], size: usize) { for k in 0..size { let mut sum = 0.0; for (i, val) in input.iter().enumerate() { - sum += val * (std::f64::consts::PI * (2.0 * i as f64 + 1.0) * k as f64 / (2.0 * n)).cos(); + sum += val + * (std::f64::consts::PI * (2.0 * i as f64 + 1.0) * k as f64 / (2.0 * n)).cos(); } - let scale = if k == 0 { (1.0 / n).sqrt() } else { (2.0 / n).sqrt() }; + let scale = if k == 0 { + (1.0 / n).sqrt() + } else { + (2.0 / n).sqrt() + }; block[start + k] = sum * scale; } } @@ -218,15 +302,25 @@ fn apply_2d_dct_ortho(block: &mut [f64], size: usize) { for k in 0..size { let mut sum = 0.0; for (i, val) in input.iter().enumerate() { - sum += val * (std::f64::consts::PI * (2.0 * i as f64 + 1.0) * k as f64 / (2.0 * n)).cos(); + sum += val + * (std::f64::consts::PI * (2.0 * i as f64 + 1.0) * k as f64 / (2.0 * n)).cos(); } - let scale = if k == 0 { (1.0 / n).sqrt() } else { (2.0 / n).sqrt() }; + let scale = if k == 0 { + (1.0 / n).sqrt() + } else { + (2.0 / n).sqrt() + }; block[k * size + col] = sum * scale; } } } -struct DwtSubbands { ll: Vec, lh: Vec, hl: Vec, hh: Vec } +struct DwtSubbands { + ll: Vec, + lh: Vec, + hl: Vec, + hh: Vec, +} fn haar_dwt_2d(data: &[f64], width: usize, height: usize) -> DwtSubbands { let half_w = width / 2; @@ -269,16 +363,26 @@ mod tests { fn test_haar_dwt_2d_identity() { let data = vec![100.0; 16]; let result = haar_dwt_2d(&data, 4, 4); - for v in &result.lh { assert!(v.abs() < 1e-10); } - for v in &result.hl { assert!(v.abs() < 1e-10); } - for v in &result.hh { assert!(v.abs() < 1e-10); } + for v in &result.lh { + assert!(v.abs() < 1e-10); + } + for v in &result.hl { + assert!(v.abs() < 1e-10); + } + for v in &result.hh { + assert!(v.abs() < 1e-10); + } assert!(result.ll[0] > 0.0); } #[test] fn test_haar_dwt_2d_edge() { let mut data = vec![0.0; 64]; - for y in 0..8 { for x in (1..8).step_by(2) { data[y * 8 + x] = 200.0; } } + for y in 0..8 { + for x in (1..8).step_by(2) { + data[y * 8 + x] = 200.0; + } + } let result = haar_dwt_2d(&data, 8, 8); let hl_energy: f64 = result.hl.iter().map(|v| v * v).sum(); assert!(hl_energy > 0.0); @@ -286,11 +390,18 @@ mod tests { #[test] fn test_dct_ortho_energy_preservation() { - let mut block = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]; + let mut block = [ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + ]; let energy_before: f64 = block.iter().map(|x| x * x).sum(); apply_2d_dct_ortho(&mut block, 4); let energy_after: f64 = block.iter().map(|x| x * x).sum(); - assert!((energy_before - energy_after).abs() < 0.1, "before={:.1}, after={:.1}", energy_before, energy_after); + assert!( + (energy_before - energy_after).abs() < 0.1, + "before={:.1}, after={:.1}", + energy_before, + energy_after + ); } #[test] diff --git a/src/detector/wav_metadata.rs b/src/detector/wav_metadata.rs index 206cb41..f92e1fb 100644 --- a/src/detector/wav_metadata.rs +++ b/src/detector/wav_metadata.rs @@ -2,7 +2,7 @@ use anyhow::Result; use std::fs; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::known_tools; /// TTS-typical sample rates. @@ -24,7 +24,12 @@ pub(crate) struct WavFile { pub(crate) fn parse_wav_full(data: &[u8]) -> Option { let (fmt, info_entries, pcm_start, pcm_end) = parse_wav_inner(data)?; - Some(WavFile { fmt, info_entries, pcm_start, pcm_end }) + Some(WavFile { + fmt, + info_entries, + pcm_start, + pcm_end, + }) } fn parse_wav(data: &[u8]) -> Option<(WavFmt, Vec<(String, String)>)> { @@ -34,8 +39,12 @@ fn parse_wav(data: &[u8]) -> Option<(WavFmt, Vec<(String, String)>)> { #[allow(clippy::type_complexity)] fn parse_wav_inner(data: &[u8]) -> Option<(WavFmt, Vec<(String, String)>, usize, usize)> { - if data.len() < 36 { return None; } - if &data[0..4] != b"RIFF" || &data[8..12] != b"WAVE" { return None; } + if data.len() < 36 { + return None; + } + if &data[0..4] != b"RIFF" || &data[8..12] != b"WAVE" { + return None; + } let mut fmt = None; let mut info_entries = Vec::new(); @@ -45,7 +54,9 @@ fn parse_wav_inner(data: &[u8]) -> Option<(WavFmt, Vec<(String, String)>, usize, while pos + 8 <= data.len() { let chunk_id = &data[pos..pos + 4]; - let chunk_size = u32::from_le_bytes([data[pos+4], data[pos+5], data[pos+6], data[pos+7]]) as usize; + let chunk_size = + u32::from_le_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]]) + as usize; let chunk_data_start = pos + 8; let chunk_data_end = (chunk_data_start + chunk_size).min(data.len()); @@ -64,13 +75,24 @@ fn parse_wav_inner(data: &[u8]) -> Option<(WavFmt, Vec<(String, String)>, usize, if list_type == b"INFO" { let mut sub_pos = chunk_data_start + 4; while sub_pos + 8 <= chunk_data_end { - let sub_id = std::str::from_utf8(&data[sub_pos..sub_pos + 4]).unwrap_or("????").to_string(); - let sub_size = u32::from_le_bytes([data[sub_pos+4], data[sub_pos+5], data[sub_pos+6], data[sub_pos+7]]) as usize; + let sub_id = std::str::from_utf8(&data[sub_pos..sub_pos + 4]) + .unwrap_or("????") + .to_string(); + let sub_size = u32::from_le_bytes([ + data[sub_pos + 4], + data[sub_pos + 5], + data[sub_pos + 6], + data[sub_pos + 7], + ]) as usize; let sub_data_start = sub_pos + 8; let sub_data_end = (sub_data_start + sub_size).min(chunk_data_end); if sub_data_start < sub_data_end { - let value = String::from_utf8_lossy(&data[sub_data_start..sub_data_end]).trim_matches('\0').to_string(); - if !value.is_empty() { info_entries.push((sub_id, value)); } + let value = String::from_utf8_lossy(&data[sub_data_start..sub_data_end]) + .trim_matches('\0') + .to_string(); + if !value.is_empty() { + info_entries.push((sub_id, value)); + } } sub_pos = sub_data_start + ((sub_size + 1) & !1); } @@ -95,12 +117,16 @@ pub fn detect(path: &Path) -> Result> { if tool_keys.contains(&key.as_str()) { if let Some(tool_name) = known_tools::match_ai_tool(value) { signals.push( - SignalBuilder::new(SignalSource::WavMetadata, Confidence::Medium, "signal_wav_info_tool") - .param("key", key.as_str()) - .param("value", value.as_str()) - .tool(tool_name) - .detail(key.as_str(), value.as_str()) - .build(), + SignalBuilder::new( + SignalSource::WavMetadata, + Confidence::Medium, + "signal_wav_info_tool", + ) + .param("key", key.as_str()) + .param("value", value.as_str()) + .tool(tool_name) + .detail(key.as_str(), value.as_str()) + .build(), ); } } @@ -111,13 +137,17 @@ pub fn detect(path: &Path) -> Result> { if is_mono && is_tts_rate { signals.push( - SignalBuilder::new(SignalSource::WavMetadata, Confidence::Low, "signal_wav_tts_heuristic") - .param("rate", fmt.sample_rate.to_string()) - .param("bits", fmt.bits_per_sample.to_string()) - .detail("channels", fmt.channels.to_string()) - .detail("sample_rate", format!("{}Hz", fmt.sample_rate)) - .detail("bits_per_sample", fmt.bits_per_sample.to_string()) - .build(), + SignalBuilder::new( + SignalSource::WavMetadata, + Confidence::Low, + "signal_wav_tts_heuristic", + ) + .param("rate", fmt.sample_rate.to_string()) + .param("bits", fmt.bits_per_sample.to_string()) + .detail("channels", fmt.channels.to_string()) + .detail("sample_rate", format!("{}Hz", fmt.sample_rate)) + .detail("bits_per_sample", fmt.bits_per_sample.to_string()) + .build(), ); } @@ -133,12 +163,21 @@ pub fn dump_info(path: &Path) -> Result> { let mut props = Vec::new(); props.push(("Sample Rate".to_string(), format!("{}Hz", fmt.sample_rate))); props.push(("Channels".to_string(), fmt.channels.to_string())); - props.push(("Bits Per Sample".to_string(), fmt.bits_per_sample.to_string())); + props.push(( + "Bits Per Sample".to_string(), + fmt.bits_per_sample.to_string(), + )); for (key, value) in info_entries { let label = match key.as_str() { - "ISFT" => "Software (ISFT)", "ICMT" => "Comment (ICMT)", "IART" => "Artist (IART)", - "IENG" => "Engineer (IENG)", "IPRD" => "Product (IPRD)", "IGNR" => "Genre (IGNR)", - "INAM" => "Name (INAM)", "ICRD" => "Date (ICRD)", other => other, + "ISFT" => "Software (ISFT)", + "ICMT" => "Comment (ICMT)", + "IART" => "Artist (IART)", + "IENG" => "Engineer (IENG)", + "IPRD" => "Product (IPRD)", + "IGNR" => "Genre (IGNR)", + "INAM" => "Name (INAM)", + "ICRD" => "Date (ICRD)", + other => other, }; props.push((label.to_string(), value)); } @@ -149,7 +188,12 @@ pub fn dump_info(path: &Path) -> Result> { mod tests { use super::*; - fn make_wav(channels: u16, sample_rate: u32, bits_per_sample: u16, info_chunks: &[(&str, &str)]) -> Vec { + fn make_wav( + channels: u16, + sample_rate: u32, + bits_per_sample: u16, + info_chunks: &[(&str, &str)], + ) -> Vec { let byte_rate = sample_rate * channels as u32 * bits_per_sample as u32 / 8; let block_align = channels * bits_per_sample / 8; let data_size = 100u32 * block_align as u32; @@ -164,11 +208,17 @@ mod tests { info_buf.extend_from_slice(&padded_len.to_le_bytes()); info_buf.extend_from_slice(val_bytes); info_buf.push(0); - if (val_bytes.len() + 1) % 2 != 0 { info_buf.push(0); } + if (val_bytes.len() + 1) % 2 != 0 { + info_buf.push(0); + } } } let fmt_size = 16u32; - let list_chunk_size = if info_buf.is_empty() { 0 } else { 8 + info_buf.len() as u32 }; + let list_chunk_size = if info_buf.is_empty() { + 0 + } else { + 8 + info_buf.len() as u32 + }; let riff_size = 4 + 8 + fmt_size + 8 + data_size + list_chunk_size; buf.extend_from_slice(b"RIFF"); buf.extend_from_slice(&riff_size.to_le_bytes()); @@ -255,8 +305,14 @@ mod tests { let tmp = tempfile::NamedTempFile::new().unwrap(); fs::write(tmp.path(), &wav).unwrap(); let props = dump_info(tmp.path()).unwrap(); - assert!(props.iter().any(|(k, v)| k == "Sample Rate" && v == "24000Hz")); - assert!(props.iter().any(|(k, v)| k.contains("ISFT") && v == "MyTool")); - assert!(props.iter().any(|(k, v)| k.contains("ICMT") && v == "A comment")); + assert!(props + .iter() + .any(|(k, v)| k == "Sample Rate" && v == "24000Hz")); + assert!(props + .iter() + .any(|(k, v)| k.contains("ISFT") && v == "MyTool")); + assert!(props + .iter() + .any(|(k, v)| k.contains("ICMT") && v == "A comment")); } } diff --git a/src/detector/xmp.rs b/src/detector/xmp.rs index ee71ea2..13529b9 100644 --- a/src/detector/xmp.rs +++ b/src/detector/xmp.rs @@ -2,13 +2,16 @@ use anyhow::Result; use std::fs; use std::path::Path; -use super::{Confidence, SignalBuilder, Signal, SignalSource}; +use super::{Confidence, Signal, SignalBuilder, SignalSource}; use crate::known_tools; /// IPTC DigitalSourceType URIs/names that indicate AI generation. const AI_SOURCE_TYPES: &[(&str, &str)] = &[ ("trainedAlgorithmicMedia", "trainedAlgorithmicMedia"), - ("compositeWithTrainedAlgorithmicMedia", "compositeWithTrainedAlgorithmicMedia"), + ( + "compositeWithTrainedAlgorithmicMedia", + "compositeWithTrainedAlgorithmicMedia", + ), ("algorithmicMedia", "algorithmicMedia"), ("compositeSynthetic", "compositeSynthetic"), ("dataDrivenMedia", "dataDrivenMedia"), @@ -110,10 +113,14 @@ pub fn detect(path: &Path) -> Result> { for (name, pattern) in AI_SOURCE_TYPES { if value.contains(pattern) { signals.push( - SignalBuilder::new(SignalSource::Xmp, Confidence::Medium, "signal_xmp_digital_source_type") - .param("value", *name) - .detail("DigitalSourceType", &value) - .build(), + SignalBuilder::new( + SignalSource::Xmp, + Confidence::Medium, + "signal_xmp_digital_source_type", + ) + .param("value", *name) + .detail("DigitalSourceType", &value) + .build(), ); break; } @@ -123,30 +130,42 @@ pub fn detect(path: &Path) -> Result> { if let Some(value) = extract_property(&xml, "AISystemUsed") { let tool = known_tools::match_ai_tool(&value).map(|s| s.to_string()); signals.push( - SignalBuilder::new(SignalSource::Xmp, Confidence::Medium, "signal_xmp_ai_system_used") - .param("value", &value) - .tool_opt(tool) - .detail("AISystemUsed", &value) - .build(), + SignalBuilder::new( + SignalSource::Xmp, + Confidence::Medium, + "signal_xmp_ai_system_used", + ) + .param("value", &value) + .tool_opt(tool) + .detail("AISystemUsed", &value) + .build(), ); } if let Some(value) = extract_property(&xml, "AIPromptInformation") { signals.push( - SignalBuilder::new(SignalSource::Xmp, Confidence::Medium, "signal_xmp_ai_prompt") - .detail("AIPromptInformation", &value) - .build(), + SignalBuilder::new( + SignalSource::Xmp, + Confidence::Medium, + "signal_xmp_ai_prompt", + ) + .detail("AIPromptInformation", &value) + .build(), ); } if let Some(value) = extract_property(&xml, "CreatorTool") { if let Some(tool_name) = known_tools::match_ai_tool(&value) { signals.push( - SignalBuilder::new(SignalSource::Xmp, Confidence::Medium, "signal_xmp_creator_tool") - .param("value", &value) - .tool(tool_name) - .detail("CreatorTool", &value) - .build(), + SignalBuilder::new( + SignalSource::Xmp, + Confidence::Medium, + "signal_xmp_creator_tool", + ) + .param("value", &value) + .tool(tool_name) + .detail("CreatorTool", &value) + .build(), ); } } diff --git a/src/main.rs b/src/main.rs index c9cc2da..8d9460f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -95,7 +95,14 @@ fn cmd_info(info_args: &cli::InfoArgs) -> ExitCode { let id3_tags = detector::id3_metadata::dump_info(path).unwrap_or_default(); let wav_meta = detector::wav_metadata::dump_info(path).unwrap_or_default(); - output::print_info(&report, &xmp_props, &exif_fields, &mp4_meta, &id3_tags, &wav_meta); + output::print_info( + &report, + &xmp_props, + &exif_fields, + &mp4_meta, + &id3_tags, + &wav_meta, + ); ExitCode::from(0) } diff --git a/src/output.rs b/src/output.rs index 097c688..8d26794 100644 --- a/src/output.rs +++ b/src/output.rs @@ -82,11 +82,13 @@ pub fn print_human(reports: &[FileReport]) { }; format!( " Verdict: {} (confidence: {})", - label, - report.overall_confidence + label, report.overall_confidence ) } else { - format!(" Verdict: {}", i18n::t("verdict_not_detected", &[]).green()) + format!( + " Verdict: {}", + i18n::t("verdict_not_detected", &[]).green() + ) }; println!("{}", verdict); } @@ -119,15 +121,28 @@ pub fn print_json(reports: &[FileReport]) { }; match serde_json::to_string_pretty(&output) { Ok(json) => println!("{}", json), - Err(e) => eprintln!("{}", i18n::t("error_json_serialize", &[("err", &e.to_string())])), + Err(e) => eprintln!( + "{}", + i18n::t("error_json_serialize", &[("err", &e.to_string())]) + ), } } /// Print info dump for a single file. -pub fn print_info(report: &FileReport, xmp_props: &[(String, String)], exif_fields: &[(String, String)], mp4_meta: &[(String, String)], id3_tags: &[(String, String)], wav_meta: &[(String, String)]) { +pub fn print_info( + report: &FileReport, + xmp_props: &[(String, String)], + exif_fields: &[(String, String)], + mp4_meta: &[(String, String)], + id3_tags: &[(String, String)], + wav_meta: &[(String, String)], +) { println!("{}", report.path.display().to_string().bold()); if let Some(mime) = &report.mime_type { - println!(" {}", i18n::t("output_type_label", &[("mime", mime.as_str())])); + println!( + " {}", + i18n::t("output_type_label", &[("mime", mime.as_str())]) + ); } println!(); @@ -210,7 +225,14 @@ pub fn print_info(report: &FileReport, xmp_props: &[(String, String)], exif_fiel println!(); } - if c2pa_signals.is_empty() && xmp_props.is_empty() && exif_fields.is_empty() && mp4_meta.is_empty() && id3_tags.is_empty() && wav_meta.is_empty() && wm_signals.is_empty() { + if c2pa_signals.is_empty() + && xmp_props.is_empty() + && exif_fields.is_empty() + && mp4_meta.is_empty() + && id3_tags.is_empty() + && wav_meta.is_empty() + && wm_signals.is_empty() + { println!("{}", i18n::t("info_no_metadata", &[]).dimmed()); } } diff --git a/src/scanner.rs b/src/scanner.rs index b9b4bb9..58537db 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -36,7 +36,8 @@ fn is_supported(path: &Path) -> bool { if let Some(ext) = path.extension().and_then(|e| e.to_str()) { matches!( ext.to_lowercase().as_str(), - "jpg" | "jpeg" + "jpg" + | "jpeg" | "png" | "webp" | "avif" @@ -83,7 +84,10 @@ pub fn discover_files(paths: &[PathBuf], recursive: bool) -> Result } else { eprintln!( "{}", - i18n::t("scanner_not_found", &[("path", &path.display().to_string())]) + i18n::t( + "scanner_not_found", + &[("path", &path.display().to_string())] + ) ); } } diff --git a/tests/video_c2pa.rs b/tests/video_c2pa.rs index a2e5731..18fcc03 100644 --- a/tests/video_c2pa.rs +++ b/tests/video_c2pa.rs @@ -27,7 +27,9 @@ fn mp4_without_c2pa_not_detected() { .args(["--lang", "en", "check", "tests/fixtures/no_c2pa.mp4"]) .assert() .code(1) // exit 1 = no AI detected - .stdout(predicate::str::contains("No AI-generation signals detected")); + .stdout(predicate::str::contains( + "No AI-generation signals detected", + )); } #[test] diff --git a/tests/watermark_detection.rs b/tests/watermark_detection.rs index 0b972bb..34bd606 100644 --- a/tests/watermark_detection.rs +++ b/tests/watermark_detection.rs @@ -4,7 +4,13 @@ use predicates::prelude::*; #[test] fn watermarked_dwtdct_detected_with_deep() { cargo_bin_cmd!("aic") - .args(["--lang", "en", "check", "--deep", "tests/fixtures/watermarked_dwtdct.png"]) + .args([ + "--lang", + "en", + "check", + "--deep", + "tests/fixtures/watermarked_dwtdct.png", + ]) .assert() .success() // exit 0 = AI detected .stdout(predicate::str::contains("WATERMARK")) @@ -14,7 +20,11 @@ fn watermarked_dwtdct_detected_with_deep() { #[test] fn watermarked_dwtdctsvd_detected_with_deep() { cargo_bin_cmd!("aic") - .args(["check", "--deep", "tests/fixtures/watermarked_dwtdctsvd.png"]) + .args([ + "check", + "--deep", + "tests/fixtures/watermarked_dwtdctsvd.png", + ]) .assert() .success() .stdout(predicate::str::contains("WATERMARK")); @@ -23,10 +33,18 @@ fn watermarked_dwtdctsvd_detected_with_deep() { #[test] fn clean_image_not_detected_with_deep() { cargo_bin_cmd!("aic") - .args(["--lang", "en", "check", "--deep", "tests/fixtures/clean_synthetic.png"]) + .args([ + "--lang", + "en", + "check", + "--deep", + "tests/fixtures/clean_synthetic.png", + ]) .assert() .code(1) // exit 1 = no AI detected - .stdout(predicate::str::contains("No AI-generation signals detected")); + .stdout(predicate::str::contains( + "No AI-generation signals detected", + )); } #[test] @@ -68,7 +86,12 @@ fn watermarked_json_output() { #[test] fn watermark_info_command() { cargo_bin_cmd!("aic") - .args(["--lang", "en", "info", "tests/fixtures/watermarked_dwtdct.png"]) + .args([ + "--lang", + "en", + "info", + "tests/fixtures/watermarked_dwtdct.png", + ]) .assert() .success() .stdout(predicate::str::contains("Watermark Analysis"));