agent: capture.rs — screen capture (scrap), BGRA→RGB, JPEG encoding, base64

This commit is contained in:
Butterfly Dev 2026-04-07 04:35:39 +00:00
parent 5a26c7c60e
commit 4c93b47a5d

234
agent/src/capture.rs Normal file
View File

@ -0,0 +1,234 @@
//! Screen capture module.
//!
//! Uses the `scrap` crate to capture display frames efficiently via platform APIs
//! (DXGI on Windows, X11 on Linux, CoreGraphics on macOS). Frames are converted
//! from BGRA to RGB and encoded as JPEG with configurable quality.
use anyhow::{Context, Result};
use base64::{Engine, engine::general_purpose::STANDARD};
use image::{ImageBuffer, Rgb};
use log::{info, warn};
use scrap::{Capturer, Display};
use std::time::Instant;
/// Manages screen capture for a specific display.
pub struct ScreenCapture {
capturer: Capturer,
width: usize,
height: usize,
quality: u8,
frame_count: u64,
bytes_sent: u64,
}
impl ScreenCapture {
/// Initialize capture for the given display index.
///
/// `display_idx` 0 = primary monitor, 1 = second, etc.
/// `quality` is the JPEG encoding quality (1100).
pub fn new(display_idx: usize, quality: u8) -> Result<Self> {
// Get all available displays.
let displays = Display::all()
.map_err(|e| anyhow::anyhow!("failed to enumerate displays: {}", e))?;
if displays.is_empty() {
anyhow::bail!("no displays detected — is a display / X server available?");
}
if display_idx >= displays.len() {
anyhow::bail!(
"display index {} out of range ({} displays available)",
display_idx,
displays.len()
);
}
let display = displays.into_iter().nth(display_idx).unwrap();
let width = display.width();
let height = display.height();
info!(
"initializing capture: display {} ({}x{})",
display_idx, width, height
);
let capturer = Capturer::new(display)
.map_err(|e| anyhow::anyhow!("failed to create capturer: {}", e))?;
// The first call to `frame()` may fail on some platforms because the
// internal buffer isn't ready yet. Spin until we get a frame.
let capturer = wait_for_first_frame(capturer);
Ok(Self {
capturer,
width,
height,
quality: quality.clamp(1, 100),
frame_count: 0,
bytes_sent: 0,
})
}
/// Capture a single frame and return it as a base64-encoded JPEG string.
///
/// This method:
/// 1. Waits for the next frame from the OS (blocks briefly).
/// 2. Converts BGRA pixel data to RGB.
/// 3. Encodes as JPEG with the configured quality.
/// 4. Base64-encodes the result.
pub fn capture_frame(&mut self) -> Result<String> {
let start = Instant::now();
// Capture the raw frame (BGRA format on all platforms).
let frame = self
.capturer
.frame()
.map_err(|e| anyhow::anyhow!("frame capture failed: {}", e))?;
let frame_len = frame.len();
let expected_len = self.width * self.height * 4; // BGRA = 4 bytes/pixel
if frame.len() < expected_len {
anyhow::bail!(
"frame size mismatch: got {} bytes, expected at least {}",
frame_len,
expected_len
);
}
// Convert BGRA → RGB (drop the alpha channel).
let rgb_data = bgra_to_rgb(frame, self.width, self.height);
// Encode as JPEG.
let jpeg_bytes = encode_jpeg(
&rgb_data,
self.width as u32,
self.height as u32,
self.quality,
)
.context("JPEG encoding failed")?;
// Base64 encode.
let b64 = STANDARD.encode(&jpeg_bytes);
// Track stats.
self.frame_count += 1;
self.bytes_sent += jpeg_bytes.len() as u64;
let elapsed = start.elapsed();
log::trace!(
"captured frame: {}x{} → {} bytes JPEG ({} bytes base64) in {:.1}ms",
self.width,
self.height,
jpeg_bytes.len(),
b64.len(),
elapsed.as_secs_f64() * 1000.0
);
Ok(b64)
}
/// Get the capture resolution as a string (e.g., "1920x1080").
pub fn resolution(&self) -> String {
format!("{}x{}", self.width, self.height)
}
/// Get the display width.
pub fn width(&self) -> usize {
self.width
}
/// Get the display height.
pub fn height(&self) -> usize {
self.height
}
/// Get total frames captured since start.
pub fn frame_count(&self) -> u64 {
self.frame_count
}
/// Get total bytes sent (JPEG, before base64).
pub fn bytes_sent(&self) -> u64 {
self.bytes_sent
}
}
/// Spin until the capturer produces its first valid frame.
/// On some platforms the first few `frame()` calls fail.
fn wait_for_first_frame(mut capturer: Capturer) -> Capturer {
let max_attempts = 30;
for i in 0..max_attempts {
match capturer.frame() {
Ok(_) => {
info!("capture ready (took {} attempts)", i + 1);
return capturer;
}
Err(e) => {
if i < max_attempts - 1 {
log::debug!("waiting for first frame (attempt {}): {}", i + 1, e);
std::thread::sleep(std::time::Duration::from_millis(100));
} else {
warn!("first frame never ready after {} attempts: {}", max_attempts, e);
return capturer;
}
}
}
}
capturer
}
/// Convert BGRA pixel data to RGB by dropping the alpha channel.
fn bgra_to_rgb(bgra: &[u8], width: usize, height: usize) -> Vec<u8> {
let mut rgb = Vec::with_capacity(width * height * 3);
for chunk in bgra.chunks_exact(4) {
// BGRA → RGB: reverse B,G,R order to R,G,B, skip A
rgb.push(chunk[2]); // R
rgb.push(chunk[1]); // G
rgb.push(chunk[0]); // B
}
rgb
}
/// Encode raw RGB data as JPEG with the given quality.
fn encode_jpeg(
rgb_data: &[u8],
width: u32,
height: u32,
quality: u8,
) -> Result<Vec<u8>> {
let img_buffer = ImageBuffer::<Rgb<u8>>::from_raw(width, height, rgb_data)
.context("failed to create image buffer from RGB data")?;
let mut jpeg_bytes = Vec::new();
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut jpeg_bytes, quality);
encoder
.encode_image(&img_buffer)
.context("JPEG encode failed")?;
Ok(jpeg_bytes)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bgra_to_rgb() {
// BGRA pixel: Blue=10, Green=20, Red=30, Alpha=255
let bgra = vec![10, 20, 30, 255, 40, 50, 60, 128];
let rgb = bgra_to_rgb(&bgra, 2, 1);
assert_eq!(rgb, vec![30, 20, 10, 60, 50, 40]); // RGB reversed, alpha dropped
}
#[test]
fn test_encode_jpeg() {
// Create a 4x4 red image
let rgb_data = vec![255u8; 4 * 4 * 3];
let result = encode_jpeg(&rgb_data, 4, 4, 80);
assert!(result.is_ok());
let jpeg = result.unwrap();
assert!(!jpeg.is_empty());
// JPEG files start with FF D8
assert_eq!(jpeg[0], 0xFF);
assert_eq!(jpeg[1], 0xD8);
}
}