From cf617d0d1e907834c5d0f7c7443126a61634c91d Mon Sep 17 00:00:00 2001 From: Butterfly Dev Date: Tue, 7 Apr 2026 04:57:24 +0000 Subject: [PATCH] =?UTF-8?q?agent:=20capture.rs=20=E2=80=94=20raw=20BGRA=20?= =?UTF-8?q?output=20(no=20encoding=20here),=20encoder=20handles=20the=20re?= =?UTF-8?q?st?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agent/src/capture.rs | 194 +++++++++++++------------------------------ 1 file changed, 57 insertions(+), 137 deletions(-) diff --git a/agent/src/capture.rs b/agent/src/capture.rs index 381546a..287ae87 100644 --- a/agent/src/capture.rs +++ b/agent/src/capture.rs @@ -1,33 +1,36 @@ //! Screen capture module. //! //! Uses the `scrap` crate to capture display frames efficiently via platform APIs -//! (DXGI on Windows, X11 on Linux, CoreGraphics on macOS). Frames are converted -//! from BGRA to RGB and encoded as JPEG with configurable quality. +//! (DXGI on Windows, X11 on Linux, CoreGraphics on macOS). Returns raw BGRA pixel +//! data that can be fed directly into an encoder (H.264 or JPEG). use anyhow::{Context, Result}; -use base64::{Engine, engine::general_purpose::STANDARD}; -use image::{ImageBuffer, Rgb}; use log::{info, warn}; use scrap::{Capturer, Display}; -use std::time::Instant; + +/// A captured display frame containing raw BGRA pixel data. +pub struct RawFrame { + /// BGRA pixel data (4 bytes per pixel). + pub bgra: Vec, + /// Frame width in pixels. + pub width: usize, + /// Frame height in pixels. + pub height: usize, +} /// Manages screen capture for a specific display. pub struct ScreenCapture { capturer: Capturer, width: usize, height: usize, - quality: u8, frame_count: u64, - bytes_sent: u64, } impl ScreenCapture { /// Initialize capture for the given display index. /// /// `display_idx` 0 = primary monitor, 1 = second, etc. - /// `quality` is the JPEG encoding quality (1–100). - pub fn new(display_idx: usize, quality: u8) -> Result { - // Get all available displays. + pub fn new(display_idx: usize) -> Result { let displays = Display::all() .map_err(|e| anyhow::anyhow!("failed to enumerate displays: {}", e))?; @@ -46,114 +49,88 @@ impl ScreenCapture { let width = display.width(); let height = display.height(); - info!( - "initializing capture: display {} ({}x{})", - display_idx, width, height - ); + info!("initializing capture: display {} ({}x{})", display_idx, width, height); let capturer = Capturer::new(display) .map_err(|e| anyhow::anyhow!("failed to create capturer: {}", e))?; - // The first call to `frame()` may fail on some platforms because the - // internal buffer isn't ready yet. Spin until we get a frame. let capturer = wait_for_first_frame(capturer); Ok(Self { capturer, width, height, - quality: quality.clamp(1, 100), frame_count: 0, - bytes_sent: 0, }) } - /// Capture a single frame and return it as a base64-encoded JPEG string. + /// Capture a single frame as raw BGRA pixel data. /// - /// This method: - /// 1. Waits for the next frame from the OS (blocks briefly). - /// 2. Converts BGRA pixel data to RGB. - /// 3. Encodes as JPEG with the configured quality. - /// 4. Base64-encodes the result. - pub fn capture_frame(&mut self) -> Result { - let start = Instant::now(); - - // Capture the raw frame (BGRA format on all platforms). + /// The caller is responsible for encoding (H.264, JPEG, etc.). + pub fn capture_raw(&mut self) -> Result { let frame = self .capturer .frame() .map_err(|e| anyhow::anyhow!("frame capture failed: {}", e))?; - let frame_len = frame.len(); - let expected_len = self.width * self.height * 4; // BGRA = 4 bytes/pixel + let expected_len = self.width * self.height * 4; if frame.len() < expected_len { anyhow::bail!( "frame size mismatch: got {} bytes, expected at least {}", - frame_len, + frame.len(), expected_len ); } - // Convert BGRA → RGB (drop the alpha channel). - let rgb_data = bgra_to_rgb(frame, self.width, self.height); - - // Encode as JPEG. - let jpeg_bytes = encode_jpeg( - &rgb_data, - self.width as u32, - self.height as u32, - self.quality, - ) - .context("JPEG encoding failed")?; - - // Base64 encode. - let b64 = STANDARD.encode(&jpeg_bytes); - - // Track stats. self.frame_count += 1; - self.bytes_sent += jpeg_bytes.len() as u64; - let elapsed = start.elapsed(); - log::trace!( - "captured frame: {}x{} → {} bytes JPEG ({} bytes base64) in {:.1}ms", - self.width, - self.height, - jpeg_bytes.len(), - b64.len(), - elapsed.as_secs_f64() * 1000.0 - ); - - Ok(b64) + Ok(RawFrame { + bgra: frame.to_vec(), + width: self.width, + height: self.height, + }) } - /// Get the capture resolution as a string (e.g., "1920x1080"). + /// Capture and encode as JPEG (legacy helper, returns base64 string). + pub fn capture_jpeg(&mut self, quality: u8) -> Result<(String, usize, usize)> { + use base64::{Engine, engine::general_purpose::STANDARD}; + use image::{ImageBuffer, Rgb}; + + let raw = self.capture_raw()?; + + // BGRA → RGB. + let mut rgb = Vec::with_capacity(raw.width * raw.height * 3); + for chunk in raw.bgra.chunks_exact(4) { + rgb.push(chunk[2]); // R + rgb.push(chunk[1]); // G + rgb.push(chunk[0]); // B + } + + let img_buffer = ImageBuffer::>::from_raw( + raw.width as u32, + raw.height as u32, + rgb, + ).context("failed to create image buffer")?; + + let mut jpeg_bytes = Vec::new(); + let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut jpeg_bytes, quality.clamp(1, 100)); + encoder.encode_image(&img_buffer).context("JPEG encode failed")?; + + let b64 = STANDARD.encode(&jpeg_bytes); + Ok((b64, raw.width, raw.height)) + } + + /// Get the capture resolution as a string. pub fn resolution(&self) -> String { format!("{}x{}", self.width, self.height) } - /// Get the display width. - pub fn width(&self) -> usize { - self.width - } - - /// Get the display height. - pub fn height(&self) -> usize { - self.height - } - - /// Get total frames captured since start. - pub fn frame_count(&self) -> u64 { - self.frame_count - } - - /// Get total bytes sent (JPEG, before base64). - pub fn bytes_sent(&self) -> u64 { - self.bytes_sent - } + pub fn width(&self) -> usize { self.width } + pub fn height(&self) -> usize { self.height } + pub fn frame_count(&self) -> u64 { self.frame_count } } /// Spin until the capturer produces its first valid frame. -/// On some platforms the first few `frame()` calls fail. fn wait_for_first_frame(mut capturer: Capturer) -> Capturer { let max_attempts = 30; for i in 0..max_attempts { @@ -175,60 +152,3 @@ fn wait_for_first_frame(mut capturer: Capturer) -> Capturer { } capturer } - -/// Convert BGRA pixel data to RGB by dropping the alpha channel. -fn bgra_to_rgb(bgra: &[u8], width: usize, height: usize) -> Vec { - let mut rgb = Vec::with_capacity(width * height * 3); - for chunk in bgra.chunks_exact(4) { - // BGRA → RGB: reverse B,G,R order to R,G,B, skip A - rgb.push(chunk[2]); // R - rgb.push(chunk[1]); // G - rgb.push(chunk[0]); // B - } - rgb -} - -/// Encode raw RGB data as JPEG with the given quality. -fn encode_jpeg( - rgb_data: &[u8], - width: u32, - height: u32, - quality: u8, -) -> Result> { - let img_buffer = ImageBuffer::>::from_raw(width, height, rgb_data) - .context("failed to create image buffer from RGB data")?; - - let mut jpeg_bytes = Vec::new(); - let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut jpeg_bytes, quality); - encoder - .encode_image(&img_buffer) - .context("JPEG encode failed")?; - - Ok(jpeg_bytes) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_bgra_to_rgb() { - // BGRA pixel: Blue=10, Green=20, Red=30, Alpha=255 - let bgra = vec![10, 20, 30, 255, 40, 50, 60, 128]; - let rgb = bgra_to_rgb(&bgra, 2, 1); - assert_eq!(rgb, vec![30, 20, 10, 60, 50, 40]); // RGB reversed, alpha dropped - } - - #[test] - fn test_encode_jpeg() { - // Create a 4x4 red image - let rgb_data = vec![255u8; 4 * 4 * 3]; - let result = encode_jpeg(&rgb_data, 4, 4, 80); - assert!(result.is_ok()); - let jpeg = result.unwrap(); - assert!(!jpeg.is_empty()); - // JPEG files start with FF D8 - assert_eq!(jpeg[0], 0xFF); - assert_eq!(jpeg[1], 0xD8); - } -}