diff --git a/src/app/frame_processor/tasks/qr_detector.rs b/src/app/frame_processor/tasks/qr_detector.rs index 235fa3d..edbb9d6 100644 --- a/src/app/frame_processor/tasks/qr_detector.rs +++ b/src/app/frame_processor/tasks/qr_detector.rs @@ -7,7 +7,7 @@ //! returning their positions and decoded content. use crate::app::frame_processor::types::{FrameRegion, QrDetection}; -use crate::backends::camera::types::CameraFrame; +use crate::backends::camera::types::{CameraFrame, PixelFormat}; use std::sync::Arc; use tracing::{debug, trace, warn}; @@ -63,8 +63,8 @@ impl QrDetector { fn detect_sync(frame: &CameraFrame, max_dimension: u32) -> Vec { let start = std::time::Instant::now(); - // Convert RGBA frame to grayscale - let (gray_data, width, height) = convert_rgba_to_gray(frame); + // Convert frame to grayscale (handles all pixel formats) + let (gray_data, width, height) = convert_to_gray(frame); let conversion_time = start.elapsed(); trace!( @@ -176,30 +176,163 @@ fn detect_sync(frame: &CameraFrame, max_dimension: u32) -> Vec { detections } -/// Convert RGBA frame to grayscale -fn convert_rgba_to_gray(frame: &CameraFrame) -> (Vec, u32, u32) { +/// Convert frame to grayscale, handling all pixel formats +/// +/// For YUV formats (NV12, I420, YUYV, etc.), the Y plane IS the luminance, +/// so we can extract it directly - this is more efficient than RGB conversion. +fn convert_to_gray(frame: &CameraFrame) -> (Vec, u32, u32) { let width = frame.width as usize; let height = frame.height as usize; let stride = frame.stride as usize; - let mut gray = Vec::with_capacity(width * height); - - for y in 0..height { - let row_start = y * stride; - for x in 0..width { - let offset = row_start + x * 4; - if offset + 2 < frame.data.len() { - let r = frame.data[offset] as u32; - let g = frame.data[offset + 1] as u32; - let b = frame.data[offset + 2] as u32; - // Standard luminance formula: 0.299*R + 0.587*G + 0.114*B - let gray_val = ((r * 77 + g * 150 + b * 29) >> 8) as u8; - gray.push(gray_val); + match frame.format { + // RGBA: Convert RGB to grayscale + PixelFormat::RGBA => { + let mut gray = Vec::with_capacity(width * height); + for y in 0..height { + let row_start = y * stride; + for x in 0..width { + let offset = row_start + x * 4; + if offset + 2 < frame.data.len() { + let r = frame.data[offset] as u32; + let g = frame.data[offset + 1] as u32; + let b = frame.data[offset + 2] as u32; + // Standard luminance formula: 0.299*R + 0.587*G + 0.114*B + let gray_val = ((r * 77 + g * 150 + b * 29) >> 8) as u8; + gray.push(gray_val); + } + } } + (gray, frame.width, frame.height) + } + + // Gray8: Already grayscale, just copy + PixelFormat::Gray8 => { + let mut gray = Vec::with_capacity(width * height); + for y in 0..height { + let row_start = y * stride; + for x in 0..width { + let offset = row_start + x; + if offset < frame.data.len() { + gray.push(frame.data[offset]); + } + } + } + (gray, frame.width, frame.height) + } + + // RGB24: Convert RGB to grayscale (no alpha) + PixelFormat::RGB24 => { + let mut gray = Vec::with_capacity(width * height); + for y in 0..height { + let row_start = y * stride; + for x in 0..width { + let offset = row_start + x * 3; + if offset + 2 < frame.data.len() { + let r = frame.data[offset] as u32; + let g = frame.data[offset + 1] as u32; + let b = frame.data[offset + 2] as u32; + let gray_val = ((r * 77 + g * 150 + b * 29) >> 8) as u8; + gray.push(gray_val); + } + } + } + (gray, frame.width, frame.height) + } + + // NV12/NV21: Extract Y plane (full resolution luminance) + PixelFormat::NV12 | PixelFormat::NV21 => { + let mut gray = Vec::with_capacity(width * height); + if let Some(ref planes) = frame.yuv_planes { + for y in 0..height { + let row_start = planes.y_offset + y * stride; + for x in 0..width { + let offset = row_start + x; + if offset < frame.data.len() { + gray.push(frame.data[offset]); + } + } + } + } else { + // Fallback: assume Y plane is at start of buffer + for y in 0..height { + let row_start = y * stride; + for x in 0..width { + let offset = row_start + x; + if offset < frame.data.len() { + gray.push(frame.data[offset]); + } + } + } + } + (gray, frame.width, frame.height) + } + + // I420: Extract Y plane (full resolution luminance) + PixelFormat::I420 => { + let mut gray = Vec::with_capacity(width * height); + if let Some(ref planes) = frame.yuv_planes { + for y in 0..height { + let row_start = planes.y_offset + y * stride; + for x in 0..width { + let offset = row_start + x; + if offset < frame.data.len() { + gray.push(frame.data[offset]); + } + } + } + } else { + // Fallback: assume Y plane is at start of buffer + for y in 0..height { + let row_start = y * stride; + for x in 0..width { + let offset = row_start + x; + if offset < frame.data.len() { + gray.push(frame.data[offset]); + } + } + } + } + (gray, frame.width, frame.height) + } + + // YUYV/UYVY/YVYU/VYUY: Extract Y values from packed format + // YUYV: Y0 U Y1 V (Y at positions 0, 2) + // UYVY: U Y0 V Y1 (Y at positions 1, 3) + // YVYU: Y0 V Y1 U (Y at positions 0, 2) + // VYUY: V Y0 U Y1 (Y at positions 1, 3) + PixelFormat::YUYV | PixelFormat::YVYU => { + let mut gray = Vec::with_capacity(width * height); + for y in 0..height { + let row_start = y * stride; + for x in 0..width { + // In YUYV/YVYU: Y0 is at byte 0, Y1 is at byte 2 of each 4-byte pair + let pair_offset = row_start + (x / 2) * 4; + let y_offset = pair_offset + (x % 2) * 2; + if y_offset < frame.data.len() { + gray.push(frame.data[y_offset]); + } + } + } + (gray, frame.width, frame.height) } - } - (gray, frame.width, frame.height) + PixelFormat::UYVY | PixelFormat::VYUY => { + let mut gray = Vec::with_capacity(width * height); + for y in 0..height { + let row_start = y * stride; + for x in 0..width { + // In UYVY/VYUY: Y0 is at byte 1, Y1 is at byte 3 of each 4-byte pair + let pair_offset = row_start + (x / 2) * 4; + let y_offset = pair_offset + 1 + (x % 2) * 2; + if y_offset < frame.data.len() { + gray.push(frame.data[y_offset]); + } + } + } + (gray, frame.width, frame.height) + } + } } /// Downscale grayscale image using bilinear interpolation @@ -275,7 +408,7 @@ mod tests { captured_at: std::time::Instant::now(), }; - let (gray, w, h) = convert_rgba_to_gray(&frame); + let (gray, w, h) = convert_to_gray(&frame); assert_eq!(w, 2); assert_eq!(h, 2); assert_eq!(gray.len(), 4); diff --git a/src/pipelines/photo/burst_mode/mod.rs b/src/pipelines/photo/burst_mode/mod.rs index 97f7d39..6634f5b 100644 --- a/src/pipelines/photo/burst_mode/mod.rs +++ b/src/pipelines/photo/burst_mode/mod.rs @@ -41,8 +41,9 @@ pub mod fft_gpu; mod gpu_helpers; pub mod params; -use crate::backends::camera::types::CameraFrame; +use crate::backends::camera::types::{CameraFrame, PixelFormat}; use crate::gpu::{self, wgpu}; +use crate::shaders::{GpuFrameInput, get_gpu_convert_pipeline}; use std::sync::{Arc, RwLock}; use tracing::{debug, info, warn}; @@ -105,6 +106,113 @@ pub(crate) fn u8_to_f32_normalized(data: &[u8]) -> Vec { data.iter().map(|&x| x as f32 / 255.0).collect() } +/// Convert a camera frame to RGBA format using GPU compute shader +/// +/// If the frame is already RGBA, returns a copy of the data. +/// For YUV and other formats, uses GPU compute shader for conversion. +async fn convert_frame_to_rgba(frame: &CameraFrame) -> Result, String> { + // Fast path: already RGBA + if frame.format == PixelFormat::RGBA { + return Ok(frame.data.to_vec()); + } + + let buffer_data = frame.data.as_ref(); + let yuv_planes = frame.yuv_planes.as_ref(); + + // Build GpuFrameInput from the frame + let input = match frame.format { + PixelFormat::NV12 | PixelFormat::NV21 => { + let planes = yuv_planes.ok_or("NV12/NV21 frame missing yuv_planes")?; + let y_end = planes.y_offset + planes.y_size; + let uv_end = planes.uv_offset + planes.uv_size; + + GpuFrameInput { + format: frame.format, + width: frame.width, + height: frame.height, + y_data: &buffer_data[planes.y_offset..y_end], + y_stride: frame.stride, + uv_data: Some(&buffer_data[planes.uv_offset..uv_end]), + uv_stride: planes.uv_stride, + v_data: None, + v_stride: 0, + } + } + PixelFormat::I420 => { + let planes = yuv_planes.ok_or("I420 frame missing yuv_planes")?; + let y_end = planes.y_offset + planes.y_size; + let u_end = planes.uv_offset + planes.uv_size; + let v_end = planes.v_offset + planes.v_size; + + GpuFrameInput { + format: frame.format, + width: frame.width, + height: frame.height, + y_data: &buffer_data[planes.y_offset..y_end], + y_stride: frame.stride, + uv_data: Some(&buffer_data[planes.uv_offset..u_end]), + uv_stride: planes.uv_stride, + v_data: if planes.v_size > 0 { + Some(&buffer_data[planes.v_offset..v_end]) + } else { + None + }, + v_stride: planes.v_stride, + } + } + // Packed 4:2:2 formats - all have same structure, just different byte order + PixelFormat::YUYV | PixelFormat::UYVY | PixelFormat::YVYU | PixelFormat::VYUY => { + GpuFrameInput { + format: frame.format, + width: frame.width, + height: frame.height, + y_data: buffer_data, + y_stride: frame.stride, + uv_data: None, + uv_stride: 0, + v_data: None, + v_stride: 0, + } + } + // Single-plane formats: Gray8, RGB24 + PixelFormat::Gray8 | PixelFormat::RGB24 => GpuFrameInput { + format: frame.format, + width: frame.width, + height: frame.height, + y_data: buffer_data, + y_stride: frame.stride, + uv_data: None, + uv_stride: 0, + v_data: None, + v_stride: 0, + }, + PixelFormat::RGBA => { + // Should not reach here - handled at function start + return Ok(buffer_data.to_vec()); + } + }; + + // Use GPU compute shader pipeline for conversion + let mut pipeline_guard = get_gpu_convert_pipeline() + .await + .map_err(|e| format!("Failed to get GPU convert pipeline: {}", e))?; + + let pipeline = pipeline_guard + .as_mut() + .ok_or("GPU convert pipeline not initialized")?; + + // Run GPU conversion (synchronous, just dispatches compute shader) + pipeline + .convert(&input) + .map_err(|e| format!("GPU conversion failed: {}", e))?; + + // Read back RGBA data from GPU to CPU memory + pipeline + .read_rgba_to_cpu(frame.width, frame.height) + .await + .map_err(|e| format!("Failed to read RGBA from GPU: {}", e)) +} + /// Hierarchical alignment configuration per pyramid level. /// Each entry: (tile_size, search_distance, use_l2_metric) /// Level 0 (full): coarse tiles, L1 metric, small search @@ -975,8 +1083,11 @@ impl BurstModeGpuPipeline { let height = frame.height; let pixel_count = (width * height) as usize; + // Convert frame to RGBA if needed (handles YUV formats) + let rgba_data = convert_frame_to_rgba(frame).await?; + // Convert to f32 - let frame_f32 = u8_to_f32_normalized(&frame.data); + let frame_f32 = u8_to_f32_normalized(&rgba_data); // Create buffers let frame_buffer = self.create_storage_buffer( @@ -2038,10 +2149,13 @@ impl BurstModeGpuPipeline { let width = reference.width; let height = reference.height; + // Convert reference frame to RGBA if needed (handles YUV formats) + let reference_rgba = convert_frame_to_rgba(reference).await?; + // Estimate noise using GPU let step_start = std::time::Instant::now(); let noise_sd = self - .estimate_noise_gpu(&reference.data, width, height) + .estimate_noise_gpu(&reference_rgba, width, height) .await?; info!( elapsed_ms = step_start.elapsed().as_millis(), @@ -2052,7 +2166,7 @@ impl BurstModeGpuPipeline { let result = self .fft_pipeline .merge_gpu( - &reference.data, + &reference_rgba, aligned, width, height, @@ -2611,8 +2725,13 @@ pub async fn export_raw_frames( let filename = format!("frame_{:03}.png", i); let output_path = burst_dir.join(&filename); + // Convert frame to RGBA if needed (handles YUV formats) + let rgba_data = convert_frame_to_rgba(frame) + .await + .map_err(|e| format!("Failed to convert frame {} to RGBA: {}", i, e))?; + let img: ImageBuffer, _> = - ImageBuffer::from_raw(frame.width, frame.height, frame.data.clone()) + ImageBuffer::from_raw(frame.width, frame.height, rgba_data) .ok_or_else(|| format!("Failed to create image buffer for frame {}", i))?; let output_path_clone = output_path.clone(); @@ -2679,10 +2798,13 @@ pub async fn export_burst_frames_dng( let filename = format!("frame_{:03}.dng", i); let output_path = burst_dir.join(&filename); - // Convert RGBA to RGB - need to convert Arc<[u8]> to Vec - let data_vec: Vec = frame.data.to_vec(); + // Convert frame to RGBA if needed (handles YUV formats) + let rgba_data = convert_frame_to_rgba(frame) + .await + .map_err(|e| format!("Failed to convert frame {} to RGBA: {}", i, e))?; + let img: ImageBuffer, Vec> = - ImageBuffer::from_raw(frame.width, frame.height, data_vec) + ImageBuffer::from_raw(frame.width, frame.height, rgba_data) .ok_or_else(|| format!("Failed to create image buffer for frame {}", i))?; let rgb_img = image::DynamicImage::ImageRgba8(img).to_rgb8();