diff --git a/src/app/frame_processor/tasks/qr_detector.rs b/src/app/frame_processor/tasks/qr_detector.rs index 7f70e8e..f6cfffc 100644 --- a/src/app/frame_processor/tasks/qr_detector.rs +++ b/src/app/frame_processor/tasks/qr_detector.rs @@ -255,7 +255,7 @@ fn downscale_gray( #[cfg(test)] mod tests { use super::*; - use crate::backends::camera::types::PixelFormat; + use crate::backends::camera::types::{FrameData, PixelFormat}; #[test] fn test_rgba_to_gray() { @@ -270,7 +270,7 @@ mod tests { let frame = CameraFrame { width: 2, height: 2, - data: Arc::from(data.as_slice()), + data: FrameData::Copied(Arc::from(data.as_slice())), format: PixelFormat::RGBA, stride: 8, // 2 pixels * 4 bytes = 8 bytes per row captured_at: std::time::Instant::now(), diff --git a/src/app/video_primitive.rs b/src/app/video_primitive.rs index 0a220aa..4bc1be6 100644 --- a/src/app/video_primitive.rs +++ b/src/app/video_primitive.rs @@ -8,6 +8,7 @@ //! - Persistent textures across frames use crate::app::state::FilterType; +use crate::backends::camera::types::FrameData; use cosmic::iced::Rectangle; use cosmic::iced_wgpu::graphics::Viewport; use cosmic::iced_wgpu::primitive::{self, Primitive as PrimitiveTrait}; @@ -20,8 +21,8 @@ pub struct VideoFrame { pub id: u64, pub width: u32, pub height: u32, - // Frame data buffer (shared Arc - no copy, RGBA format) - pub data: Arc<[u8]>, + // Frame data buffer (zero-copy when from GStreamer, RGBA format) + pub data: FrameData, // Row stride for RGBA data (bytes per row including padding) pub stride: u32, } @@ -30,7 +31,7 @@ impl VideoFrame { /// Get RGBA data slice #[inline] pub fn rgba_data(&self) -> &[u8] { - &self.data[..] + &self.data } } @@ -750,7 +751,8 @@ impl VideoPipeline { // Update last frame pointer before upload tex.last_frame_ptr = frame_data_ptr; - // Direct RGBA texture upload + // Direct RGBA texture upload (CPU to GPU copy) + let gpu_copy_start = Instant::now(); queue.write_texture( wgpu::ImageCopyTexture { texture: &tex.texture, @@ -770,11 +772,25 @@ impl VideoPipeline { depth_or_array_layers: 1, }, ); + let gpu_copy_time = gpu_copy_start.elapsed(); // Track upload duration for frame skipping decisions let upload_duration = upload_start.elapsed(); self.last_upload_duration.set(upload_duration); + // Log GPU upload performance periodically (every ~30 frames based on frame.id) + if frame.id % 30 == 0 { + let size_bytes = frame.rgba_data().len(); + tracing::debug!( + gpu_upload_ms = format!("{:.2}", gpu_copy_time.as_micros() as f64 / 1000.0), + total_prepare_ms = format!("{:.2}", upload_duration.as_micros() as f64 / 1000.0), + width = frame.width, + height = frame.height, + size_mb = format!("{:.1}", size_bytes as f64 / 1_000_000.0), + "GPU texture upload" + ); + } + // Reset skip counter on successful upload if self.frames_skipped.get() > 0 { tracing::info!( diff --git a/src/app/video_widget.rs b/src/app/video_widget.rs index 74218af..78d6c60 100644 --- a/src/app/video_widget.rs +++ b/src/app/video_widget.rs @@ -82,7 +82,7 @@ impl VideoWidget { }; // Create VideoFrame for RGBA format - // IMPORTANT: We share the Arc without copying to avoid ~3MB copy per frame + // IMPORTANT: We share the FrameData without copying to maintain zero-copy from GStreamer if frame.width > 0 && frame.height > 0 { let stride = if frame.stride > 0 { frame.stride @@ -94,7 +94,7 @@ impl VideoWidget { id: video_id, width: frame.width, height: frame.height, - data: Arc::clone(&frame.data), // No copy - just increment refcount + data: frame.data.clone(), // Clone FrameData - just refcount increment, no data copy stride, }; diff --git a/src/backends/camera/pipewire/pipeline.rs b/src/backends/camera/pipewire/pipeline.rs index d82d789..1a27a93 100644 --- a/src/backends/camera/pipewire/pipeline.rs +++ b/src/backends/camera/pipewire/pipeline.rs @@ -9,7 +9,6 @@ use gstreamer::prelude::*; use gstreamer_app::AppSink; use gstreamer_video::VideoInfo; use std::path::PathBuf; -use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Instant; use tracing::{debug, error, info, warn}; @@ -233,9 +232,12 @@ impl PipeWirePipeline { gstreamer::FlowError::Error })?; - let map = buffer.map_readable().map_err(|e| { + // Get owned buffer (increments refcount, shares underlying memory) + // then convert to mapped buffer (zero-copy - keeps buffer mapped until dropped) + let owned_buffer = buffer.copy(); + let mapped = owned_buffer.into_mapped_buffer_readable().map_err(|_| { if frame_num % 30 == 0 { - error!(frame = frame_num, error = ?e, "Failed to map buffer"); + error!(frame = frame_num, "Failed to map buffer for zero-copy"); } gstreamer::FlowError::Error })?; @@ -257,16 +259,23 @@ impl PipeWirePipeline { ); } - // Use Arc::from to avoid intermediate Vec allocation + // Measure frame wrap time (zero-copy: just wraps mapped buffer, no data copy) + let copy_start = Instant::now(); + let frame_data = FrameData::from_mapped_buffer(mapped); + let copy_time = copy_start.elapsed(); + let frame = CameraFrame { width: video_info.width(), height: video_info.height(), - data: Arc::from(map.as_slice()), + data: frame_data, format: PixelFormat::RGBA, // Pipeline outputs RGBA stride, captured_at: frame_start, // Use frame_start as capture timestamp }; + // Capture size before send (frame is moved) + let size_bytes = frame.data.len(); + // Send frame to the app (non-blocking using try_send) let send_start = Instant::now(); let mut sender = frame_sender.clone(); @@ -280,13 +289,14 @@ impl PipeWirePipeline { let total_time = frame_start.elapsed(); debug!( frame = frame_num, - decode_us = decode_time.as_micros(), - send_us = send_time.as_micros(), - total_us = total_time.as_micros(), + decode_ms = format!("{:.2}", decode_time.as_micros() as f64 / 1000.0), + copy_ms = format!("{:.2}", copy_time.as_micros() as f64 / 1000.0), + send_ms = format!("{:.2}", send_time.as_micros() as f64 / 1000.0), + total_ms = format!("{:.2}", total_time.as_micros() as f64 / 1000.0), width = video_info.width(), height = video_info.height(), - size_kb = map.as_slice().len() / 1024, - "Frame performance" + size_mb = format!("{:.1}", size_bytes as f64 / 1_000_000.0), + "Frame capture (zero-copy)" ); } } diff --git a/src/backends/camera/types.rs b/src/backends/camera/types.rs index 631ff72..f238d7e 100644 --- a/src/backends/camera/types.rs +++ b/src/backends/camera/types.rs @@ -4,10 +4,83 @@ //! Shared types for camera backends +use gstreamer::buffer::{MappedBuffer, Readable}; use serde::{Deserialize, Serialize}; use std::sync::Arc; use std::time::Instant; +/// Frame data storage - either pre-copied bytes or zero-copy GStreamer buffer +/// +/// This enum allows frames to be passed around without copying the underlying +/// pixel data when coming from GStreamer pipelines. The `Mapped` variant keeps +/// the GStreamer buffer mapped and alive until all references are dropped. +#[derive(Clone)] +pub enum FrameData { + /// Pre-copied bytes (used for photo capture, file sources, tests, etc.) + Copied(Arc<[u8]>), + /// Zero-copy mapped GStreamer buffer - no data copy, just reference counting + Mapped(Arc>), +} + +impl FrameData { + /// Create FrameData from pre-copied bytes + pub fn from_bytes(data: Arc<[u8]>) -> Self { + FrameData::Copied(data) + } + + /// Create FrameData from a mapped GStreamer buffer (zero-copy) + pub fn from_mapped_buffer(buffer: MappedBuffer) -> Self { + FrameData::Mapped(Arc::new(buffer)) + } + + /// Get the length of the frame data in bytes + pub fn len(&self) -> usize { + match self { + FrameData::Copied(data) => data.len(), + FrameData::Mapped(buf) => buf.len(), + } + } + + /// Check if the frame data is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get a raw pointer to the data for deduplication checks + pub fn as_ptr(&self) -> *const u8 { + match self { + FrameData::Copied(data) => data.as_ptr(), + FrameData::Mapped(buf) => buf.as_ptr(), + } + } +} + +impl std::fmt::Debug for FrameData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FrameData::Copied(data) => write!(f, "FrameData::Copied({} bytes)", data.len()), + FrameData::Mapped(buf) => write!(f, "FrameData::Mapped({} bytes)", buf.len()), + } + } +} + +impl AsRef<[u8]> for FrameData { + fn as_ref(&self) -> &[u8] { + match self { + FrameData::Copied(data) => data.as_ref(), + FrameData::Mapped(buf) => buf.as_slice(), + } + } +} + +impl std::ops::Deref for FrameData { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + self.as_ref() + } +} + /// Camera backend type (PipeWire only) #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum CameraBackendType { @@ -151,12 +224,26 @@ pub enum PixelFormat { pub struct CameraFrame { pub width: u32, pub height: u32, - pub data: Arc<[u8]>, // Zero-copy frame data (RGBA format) - pub format: PixelFormat, // Pixel format of the data (always RGBA) - pub stride: u32, // Row stride (bytes per row, may include padding) + pub data: FrameData, // Frame data (RGBA format) - zero-copy when from GStreamer + pub format: PixelFormat, // Pixel format of the data (always RGBA) + pub stride: u32, // Row stride (bytes per row, may include padding) pub captured_at: Instant, // Timestamp when frame was captured (for latency diagnostics) } +impl CameraFrame { + /// Get the frame data as a byte slice + #[inline] + pub fn data_slice(&self) -> &[u8] { + &self.data + } + + /// Get a raw pointer to the data for deduplication checks + #[inline] + pub fn data_ptr(&self) -> usize { + self.data.as_ptr() as usize + } +} + /// Frame receiver type for preview streams pub type FrameReceiver = cosmic::iced::futures::channel::mpsc::Receiver; diff --git a/src/backends/virtual_camera/file_source.rs b/src/backends/virtual_camera/file_source.rs index 7e16137..6c42d45 100644 --- a/src/backends/virtual_camera/file_source.rs +++ b/src/backends/virtual_camera/file_source.rs @@ -6,7 +6,9 @@ //! for use with the virtual camera output. Videos also stream audio //! to a virtual microphone via PipeWire. -use crate::backends::camera::types::{BackendError, BackendResult, CameraFrame, PixelFormat}; +use crate::backends::camera::types::{ + BackendError, BackendResult, CameraFrame, FrameData, PixelFormat, +}; use crate::constants::{file_formats, virtual_camera as vc_timing}; use std::path::Path; use std::sync::Arc; @@ -201,7 +203,7 @@ fn extract_frame_from_sample(sample: &gstreamer::Sample) -> BackendResult = map.as_slice().to_vec(); Ok(CameraFrame { - data: Arc::from(data.into_boxed_slice()), + data: FrameData::Copied(Arc::from(data.into_boxed_slice())), width, height, stride: width * 4, @@ -289,7 +291,7 @@ pub fn load_image_as_frame(path: &Path) -> BackendResult { info!(width, height, "Image loaded successfully"); Ok(CameraFrame { - data: Arc::from(data.into_boxed_slice()), + data: FrameData::Copied(Arc::from(data.into_boxed_slice())), width, height, stride: width * 4, // RGBA = 4 bytes per pixel @@ -508,7 +510,7 @@ impl VideoDecoder { let data: Vec = map.as_slice().to_vec(); Some(CameraFrame { - data: Arc::from(data.into_boxed_slice()), + data: FrameData::Copied(Arc::from(data.into_boxed_slice())), width: self.width, height: self.height, stride: self.width * 4, diff --git a/src/backends/virtual_camera/mod.rs b/src/backends/virtual_camera/mod.rs index 64d3b01..ba9084c 100644 --- a/src/backends/virtual_camera/mod.rs +++ b/src/backends/virtual_camera/mod.rs @@ -247,7 +247,7 @@ impl VirtualCameraManager { // If stride matches expected row size and no flip needed, use data directly if stride == row_bytes && !self.flip_horizontal { - return pipeline.push_frame_rgba(Arc::clone(&frame.data), frame.width, frame.height); + return pipeline.push_frame_rgba(frame.data.clone(), frame.width, frame.height); } // Copy data (and apply horizontal flip if needed) diff --git a/src/cli.rs b/src/cli.rs index 984458b..dcdff36 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -485,7 +485,7 @@ fn is_supported_image(path: &PathBuf) -> bool { /// Load a DNG file and convert to RGBA CameraFrame fn load_dng_frame(path: &PathBuf) -> Result> { - use camera::backends::camera::types::{CameraFrame, PixelFormat}; + use camera::backends::camera::types::{CameraFrame, FrameData, PixelFormat}; use image::GenericImageView; use std::fs::File; use std::io::BufReader; @@ -499,7 +499,7 @@ fn load_dng_frame(path: &PathBuf) -> Result = Arc::from(rgba.into_raw().into_boxed_slice()); + let data = FrameData::Copied(Arc::from(rgba.into_raw().into_boxed_slice())); Ok(CameraFrame { width, @@ -515,7 +515,7 @@ fn load_dng_frame(path: &PathBuf) -> Result Result>, Box> { - use camera::backends::camera::types::{CameraFrame, PixelFormat}; + use camera::backends::camera::types::{CameraFrame, FrameData, PixelFormat}; use image::GenericImageView; let mut frames = Vec::new(); @@ -532,7 +532,7 @@ fn load_burst_mode_frames( let img = image::open(path)?; let (width, height) = img.dimensions(); let rgba = img.to_rgba8(); - let data: Arc<[u8]> = Arc::from(rgba.into_raw().into_boxed_slice()); + let data = FrameData::Copied(Arc::from(rgba.into_raw().into_boxed_slice())); CameraFrame { width, diff --git a/src/pipelines/photo/capture.rs b/src/pipelines/photo/capture.rs index c3f5d8a..ee21c94 100644 --- a/src/pipelines/photo/capture.rs +++ b/src/pipelines/photo/capture.rs @@ -106,14 +106,14 @@ impl PhotoCapture { #[cfg(test)] mod tests { use super::*; - use crate::backends::camera::types::PixelFormat; + use crate::backends::camera::types::{FrameData, PixelFormat}; #[test] fn test_capture_from_frame() { let frame = CameraFrame { width: 1920, height: 1080, - data: Arc::from(vec![0u8; 1920 * 1080 * 4]), // RGBA size (4 bytes per pixel) + data: FrameData::Copied(Arc::from(vec![0u8; 1920 * 1080 * 4].into_boxed_slice())), // RGBA size (4 bytes per pixel) format: PixelFormat::RGBA, stride: 1920 * 4, // RGBA stride captured_at: std::time::Instant::now(), diff --git a/src/pipelines/video/recorder.rs b/src/pipelines/video/recorder.rs index 400b5ce..a32eb4d 100644 --- a/src/pipelines/video/recorder.rs +++ b/src/pipelines/video/recorder.rs @@ -10,11 +10,12 @@ use super::encoder_selection::{EncoderConfig, select_encoders}; use super::muxer::{create_muxer, link_audio_to_muxer, link_muxer_to_sink, link_video_to_muxer}; -use crate::backends::camera::types::{CameraFrame, SensorRotation}; +use crate::backends::camera::types::{CameraFrame, FrameData, SensorRotation}; use gstreamer as gst; use gstreamer::prelude::*; use gstreamer_app as gst_app; use std::path::PathBuf; +use std::sync::Arc; use tracing::{debug, error, info, warn}; /// Video recorder using the new pipeline architecture @@ -574,7 +575,9 @@ impl VideoRecorder { let stride = video_info.stride()[0] as u32; let frame = CameraFrame { - data: map.as_slice().to_vec().into(), + data: FrameData::Copied(Arc::from( + map.as_slice().to_vec().into_boxed_slice(), + )), width: video_info.width(), height: video_info.height(), format: