agent: capture.rs — raw BGRA output (no encoding here), encoder handles the rest

This commit is contained in:
Butterfly Dev 2026-04-07 04:57:24 +00:00
parent b7c254a2c0
commit cf617d0d1e

View File

@ -1,33 +1,36 @@
//! Screen capture module.
//!
//! Uses the `scrap` crate to capture display frames efficiently via platform APIs
//! (DXGI on Windows, X11 on Linux, CoreGraphics on macOS). Frames are converted
//! from BGRA to RGB and encoded as JPEG with configurable quality.
//! (DXGI on Windows, X11 on Linux, CoreGraphics on macOS). Returns raw BGRA pixel
//! data that can be fed directly into an encoder (H.264 or JPEG).
use anyhow::{Context, Result};
use base64::{Engine, engine::general_purpose::STANDARD};
use image::{ImageBuffer, Rgb};
use log::{info, warn};
use scrap::{Capturer, Display};
use std::time::Instant;
/// A captured display frame containing raw BGRA pixel data.
pub struct RawFrame {
/// BGRA pixel data (4 bytes per pixel).
pub bgra: Vec<u8>,
/// Frame width in pixels.
pub width: usize,
/// Frame height in pixels.
pub height: usize,
}
/// Manages screen capture for a specific display.
pub struct ScreenCapture {
capturer: Capturer,
width: usize,
height: usize,
quality: u8,
frame_count: u64,
bytes_sent: u64,
}
impl ScreenCapture {
/// Initialize capture for the given display index.
///
/// `display_idx` 0 = primary monitor, 1 = second, etc.
/// `quality` is the JPEG encoding quality (1100).
pub fn new(display_idx: usize, quality: u8) -> Result<Self> {
// Get all available displays.
pub fn new(display_idx: usize) -> Result<Self> {
let displays = Display::all()
.map_err(|e| anyhow::anyhow!("failed to enumerate displays: {}", e))?;
@ -46,114 +49,88 @@ impl ScreenCapture {
let width = display.width();
let height = display.height();
info!(
"initializing capture: display {} ({}x{})",
display_idx, width, height
);
info!("initializing capture: display {} ({}x{})", display_idx, width, height);
let capturer = Capturer::new(display)
.map_err(|e| anyhow::anyhow!("failed to create capturer: {}", e))?;
// The first call to `frame()` may fail on some platforms because the
// internal buffer isn't ready yet. Spin until we get a frame.
let capturer = wait_for_first_frame(capturer);
Ok(Self {
capturer,
width,
height,
quality: quality.clamp(1, 100),
frame_count: 0,
bytes_sent: 0,
})
}
/// Capture a single frame and return it as a base64-encoded JPEG string.
/// Capture a single frame as raw BGRA pixel data.
///
/// This method:
/// 1. Waits for the next frame from the OS (blocks briefly).
/// 2. Converts BGRA pixel data to RGB.
/// 3. Encodes as JPEG with the configured quality.
/// 4. Base64-encodes the result.
pub fn capture_frame(&mut self) -> Result<String> {
let start = Instant::now();
// Capture the raw frame (BGRA format on all platforms).
/// The caller is responsible for encoding (H.264, JPEG, etc.).
pub fn capture_raw(&mut self) -> Result<RawFrame> {
let frame = self
.capturer
.frame()
.map_err(|e| anyhow::anyhow!("frame capture failed: {}", e))?;
let frame_len = frame.len();
let expected_len = self.width * self.height * 4; // BGRA = 4 bytes/pixel
let expected_len = self.width * self.height * 4;
if frame.len() < expected_len {
anyhow::bail!(
"frame size mismatch: got {} bytes, expected at least {}",
frame_len,
frame.len(),
expected_len
);
}
// Convert BGRA → RGB (drop the alpha channel).
let rgb_data = bgra_to_rgb(frame, self.width, self.height);
// Encode as JPEG.
let jpeg_bytes = encode_jpeg(
&rgb_data,
self.width as u32,
self.height as u32,
self.quality,
)
.context("JPEG encoding failed")?;
// Base64 encode.
let b64 = STANDARD.encode(&jpeg_bytes);
// Track stats.
self.frame_count += 1;
self.bytes_sent += jpeg_bytes.len() as u64;
let elapsed = start.elapsed();
log::trace!(
"captured frame: {}x{} → {} bytes JPEG ({} bytes base64) in {:.1}ms",
self.width,
self.height,
jpeg_bytes.len(),
b64.len(),
elapsed.as_secs_f64() * 1000.0
);
Ok(b64)
Ok(RawFrame {
bgra: frame.to_vec(),
width: self.width,
height: self.height,
})
}
/// Get the capture resolution as a string (e.g., "1920x1080").
/// Capture and encode as JPEG (legacy helper, returns base64 string).
pub fn capture_jpeg(&mut self, quality: u8) -> Result<(String, usize, usize)> {
use base64::{Engine, engine::general_purpose::STANDARD};
use image::{ImageBuffer, Rgb};
let raw = self.capture_raw()?;
// BGRA → RGB.
let mut rgb = Vec::with_capacity(raw.width * raw.height * 3);
for chunk in raw.bgra.chunks_exact(4) {
rgb.push(chunk[2]); // R
rgb.push(chunk[1]); // G
rgb.push(chunk[0]); // B
}
let img_buffer = ImageBuffer::<Rgb<u8>>::from_raw(
raw.width as u32,
raw.height as u32,
rgb,
).context("failed to create image buffer")?;
let mut jpeg_bytes = Vec::new();
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut jpeg_bytes, quality.clamp(1, 100));
encoder.encode_image(&img_buffer).context("JPEG encode failed")?;
let b64 = STANDARD.encode(&jpeg_bytes);
Ok((b64, raw.width, raw.height))
}
/// Get the capture resolution as a string.
pub fn resolution(&self) -> String {
format!("{}x{}", self.width, self.height)
}
/// Get the display width.
pub fn width(&self) -> usize {
self.width
}
/// Get the display height.
pub fn height(&self) -> usize {
self.height
}
/// Get total frames captured since start.
pub fn frame_count(&self) -> u64 {
self.frame_count
}
/// Get total bytes sent (JPEG, before base64).
pub fn bytes_sent(&self) -> u64 {
self.bytes_sent
}
pub fn width(&self) -> usize { self.width }
pub fn height(&self) -> usize { self.height }
pub fn frame_count(&self) -> u64 { self.frame_count }
}
/// Spin until the capturer produces its first valid frame.
/// On some platforms the first few `frame()` calls fail.
fn wait_for_first_frame(mut capturer: Capturer) -> Capturer {
let max_attempts = 30;
for i in 0..max_attempts {
@ -175,60 +152,3 @@ fn wait_for_first_frame(mut capturer: Capturer) -> Capturer {
}
capturer
}
/// Convert BGRA pixel data to RGB by dropping the alpha channel.
fn bgra_to_rgb(bgra: &[u8], width: usize, height: usize) -> Vec<u8> {
let mut rgb = Vec::with_capacity(width * height * 3);
for chunk in bgra.chunks_exact(4) {
// BGRA → RGB: reverse B,G,R order to R,G,B, skip A
rgb.push(chunk[2]); // R
rgb.push(chunk[1]); // G
rgb.push(chunk[0]); // B
}
rgb
}
/// Encode raw RGB data as JPEG with the given quality.
fn encode_jpeg(
rgb_data: &[u8],
width: u32,
height: u32,
quality: u8,
) -> Result<Vec<u8>> {
let img_buffer = ImageBuffer::<Rgb<u8>>::from_raw(width, height, rgb_data)
.context("failed to create image buffer from RGB data")?;
let mut jpeg_bytes = Vec::new();
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut jpeg_bytes, quality);
encoder
.encode_image(&img_buffer)
.context("JPEG encode failed")?;
Ok(jpeg_bytes)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bgra_to_rgb() {
// BGRA pixel: Blue=10, Green=20, Red=30, Alpha=255
let bgra = vec![10, 20, 30, 255, 40, 50, 60, 128];
let rgb = bgra_to_rgb(&bgra, 2, 1);
assert_eq!(rgb, vec![30, 20, 10, 60, 50, 40]); // RGB reversed, alpha dropped
}
#[test]
fn test_encode_jpeg() {
// Create a 4x4 red image
let rgb_data = vec![255u8; 4 * 4 * 3];
let result = encode_jpeg(&rgb_data, 4, 4, 80);
assert!(result.is_ok());
let jpeg = result.unwrap();
assert!(!jpeg.is_empty());
// JPEG files start with FF D8
assert_eq!(jpeg[0], 0xFF);
assert_eq!(jpeg[1], 0xD8);
}
}