projects/agent/src/capture.rs

235 lines
7.1 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Screen capture module.
//!
//! Uses the `scrap` crate to capture display frames efficiently via platform APIs
//! (DXGI on Windows, X11 on Linux, CoreGraphics on macOS). Frames are converted
//! from BGRA to RGB and encoded as JPEG with configurable quality.
use anyhow::{Context, Result};
use base64::{Engine, engine::general_purpose::STANDARD};
use image::{ImageBuffer, Rgb};
use log::{info, warn};
use scrap::{Capturer, Display};
use std::time::Instant;
/// Manages screen capture for a specific display.
pub struct ScreenCapture {
capturer: Capturer,
width: usize,
height: usize,
quality: u8,
frame_count: u64,
bytes_sent: u64,
}
impl ScreenCapture {
/// Initialize capture for the given display index.
///
/// `display_idx` 0 = primary monitor, 1 = second, etc.
/// `quality` is the JPEG encoding quality (1100).
pub fn new(display_idx: usize, quality: u8) -> Result<Self> {
// Get all available displays.
let displays = Display::all()
.map_err(|e| anyhow::anyhow!("failed to enumerate displays: {}", e))?;
if displays.is_empty() {
anyhow::bail!("no displays detected — is a display / X server available?");
}
if display_idx >= displays.len() {
anyhow::bail!(
"display index {} out of range ({} displays available)",
display_idx,
displays.len()
);
}
let display = displays.into_iter().nth(display_idx).unwrap();
let width = display.width();
let height = display.height();
info!(
"initializing capture: display {} ({}x{})",
display_idx, width, height
);
let capturer = Capturer::new(display)
.map_err(|e| anyhow::anyhow!("failed to create capturer: {}", e))?;
// The first call to `frame()` may fail on some platforms because the
// internal buffer isn't ready yet. Spin until we get a frame.
let capturer = wait_for_first_frame(capturer);
Ok(Self {
capturer,
width,
height,
quality: quality.clamp(1, 100),
frame_count: 0,
bytes_sent: 0,
})
}
/// Capture a single frame and return it as a base64-encoded JPEG string.
///
/// This method:
/// 1. Waits for the next frame from the OS (blocks briefly).
/// 2. Converts BGRA pixel data to RGB.
/// 3. Encodes as JPEG with the configured quality.
/// 4. Base64-encodes the result.
pub fn capture_frame(&mut self) -> Result<String> {
let start = Instant::now();
// Capture the raw frame (BGRA format on all platforms).
let frame = self
.capturer
.frame()
.map_err(|e| anyhow::anyhow!("frame capture failed: {}", e))?;
let frame_len = frame.len();
let expected_len = self.width * self.height * 4; // BGRA = 4 bytes/pixel
if frame.len() < expected_len {
anyhow::bail!(
"frame size mismatch: got {} bytes, expected at least {}",
frame_len,
expected_len
);
}
// Convert BGRA → RGB (drop the alpha channel).
let rgb_data = bgra_to_rgb(frame, self.width, self.height);
// Encode as JPEG.
let jpeg_bytes = encode_jpeg(
&rgb_data,
self.width as u32,
self.height as u32,
self.quality,
)
.context("JPEG encoding failed")?;
// Base64 encode.
let b64 = STANDARD.encode(&jpeg_bytes);
// Track stats.
self.frame_count += 1;
self.bytes_sent += jpeg_bytes.len() as u64;
let elapsed = start.elapsed();
log::trace!(
"captured frame: {}x{} → {} bytes JPEG ({} bytes base64) in {:.1}ms",
self.width,
self.height,
jpeg_bytes.len(),
b64.len(),
elapsed.as_secs_f64() * 1000.0
);
Ok(b64)
}
/// Get the capture resolution as a string (e.g., "1920x1080").
pub fn resolution(&self) -> String {
format!("{}x{}", self.width, self.height)
}
/// Get the display width.
pub fn width(&self) -> usize {
self.width
}
/// Get the display height.
pub fn height(&self) -> usize {
self.height
}
/// Get total frames captured since start.
pub fn frame_count(&self) -> u64 {
self.frame_count
}
/// Get total bytes sent (JPEG, before base64).
pub fn bytes_sent(&self) -> u64 {
self.bytes_sent
}
}
/// Spin until the capturer produces its first valid frame.
/// On some platforms the first few `frame()` calls fail.
fn wait_for_first_frame(mut capturer: Capturer) -> Capturer {
let max_attempts = 30;
for i in 0..max_attempts {
match capturer.frame() {
Ok(_) => {
info!("capture ready (took {} attempts)", i + 1);
return capturer;
}
Err(e) => {
if i < max_attempts - 1 {
log::debug!("waiting for first frame (attempt {}): {}", i + 1, e);
std::thread::sleep(std::time::Duration::from_millis(100));
} else {
warn!("first frame never ready after {} attempts: {}", max_attempts, e);
return capturer;
}
}
}
}
capturer
}
/// Convert BGRA pixel data to RGB by dropping the alpha channel.
fn bgra_to_rgb(bgra: &[u8], width: usize, height: usize) -> Vec<u8> {
let mut rgb = Vec::with_capacity(width * height * 3);
for chunk in bgra.chunks_exact(4) {
// BGRA → RGB: reverse B,G,R order to R,G,B, skip A
rgb.push(chunk[2]); // R
rgb.push(chunk[1]); // G
rgb.push(chunk[0]); // B
}
rgb
}
/// Encode raw RGB data as JPEG with the given quality.
fn encode_jpeg(
rgb_data: &[u8],
width: u32,
height: u32,
quality: u8,
) -> Result<Vec<u8>> {
let img_buffer = ImageBuffer::<Rgb<u8>>::from_raw(width, height, rgb_data)
.context("failed to create image buffer from RGB data")?;
let mut jpeg_bytes = Vec::new();
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut jpeg_bytes, quality);
encoder
.encode_image(&img_buffer)
.context("JPEG encode failed")?;
Ok(jpeg_bytes)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bgra_to_rgb() {
// BGRA pixel: Blue=10, Green=20, Red=30, Alpha=255
let bgra = vec![10, 20, 30, 255, 40, 50, 60, 128];
let rgb = bgra_to_rgb(&bgra, 2, 1);
assert_eq!(rgb, vec![30, 20, 10, 60, 50, 40]); // RGB reversed, alpha dropped
}
#[test]
fn test_encode_jpeg() {
// Create a 4x4 red image
let rgb_data = vec![255u8; 4 * 4 * 3];
let result = encode_jpeg(&rgb_data, 4, 4, 80);
assert!(result.is_ok());
let jpeg = result.unwrap();
assert!(!jpeg.is_empty());
// JPEG files start with FF D8
assert_eq!(jpeg[0], 0xFF);
assert_eq!(jpeg[1], 0xD8);
}
}