From b7c254a2c0458f0f9b61bb485e7683efa31dbd9f Mon Sep 17 00:00:00 2001 From: Butterfly Dev Date: Tue, 7 Apr 2026 04:56:57 +0000 Subject: [PATCH] =?UTF-8?q?agent:=20encoder.rs=20=E2=80=94=20H.264=20(open?= =?UTF-8?q?h264)=20and=20JPEG=20encoder=20abstraction,=20BGRA=E2=86=92I420?= =?UTF-8?q?=20conversion,=20binary=20frame=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agent/src/encoder.rs | 344 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 agent/src/encoder.rs diff --git a/agent/src/encoder.rs b/agent/src/encoder.rs new file mode 100644 index 0000000..bcb8ce6 --- /dev/null +++ b/agent/src/encoder.rs @@ -0,0 +1,344 @@ +//! Video encoder abstraction. +//! +//! Supports two encoder backends: +//! - **H.264** via `openh264` — ~1-5ms encode time at 1080p, produces 5-30KB keyframes +//! and 1-10KB delta frames. Best for gaming and low-latency streaming. +//! - **JPEG** via `image` crate — ~10-30ms encode time, produces 100-500KB per frame. +//! Fallback for compatibility / low-resource machines. +//! +//! Both backends produce binary payloads suitable for the binary frame protocol +//! defined in `protocol.rs`. + +use anyhow::{Context, Result}; +use base64::{Engine, engine::general_purpose::STANDARD}; +use image::{ImageBuffer, Rgb}; +use log::info; + +/// An encoded video frame ready to be sent over the network. +pub struct EncodedFrame { + /// Frame type for the binary protocol header. + pub frame_type: u8, + /// Raw encoded payload (H.264 NAL units or JPEG bytes). + pub payload: Vec, + /// Whether this is a keyframe (used for H.264). + pub is_keyframe: bool, +} + +/// Encoder backend selection. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum EncoderType { + H264, + Jpeg, +} + +impl std::str::FromStr for EncoderType { + type Err = String; + + fn from_str(s: &str) -> std::result::Result { + match s.to_lowercase().as_str() { + "h264" | "openh264" | "avc" => Ok(EncoderType::H264), + "jpeg" | "jpg" | "mjpeg" => Ok(EncoderType::Jpeg), + _ => Err(format!("unknown encoder: '{}'. Use 'h264' or 'jpeg'.", s)), + } + } +} + +/// Trait for video encoders. +pub trait VideoEncoder: Send { + /// Encode a BGRA frame. Returns the encoded frame with metadata. + fn encode_bgra(&mut self, bgra: &[u8], width: usize, height: usize) -> Result; + + /// Request a keyframe on the next encode call (H.264 only). + fn request_keyframe(&mut self) {} + + /// Get the encoder type. + fn encoder_type(&self) -> EncoderType; +} + +// ── JPEG Encoder ────────────────────────────────────────────────────────────── + +/// JPEG encoder using the `image` crate. Always produces keyframes. +pub struct JpegEncoder { + quality: u8, +} + +impl JpegEncoder { + pub fn new(quality: u8) -> Self { + Self { + quality: quality.clamp(1, 100), + } + } +} + +impl VideoEncoder for JpegEncoder { + fn encode_bgra(&mut self, bgra: &[u8], width: usize, height: usize) -> Result { + // BGRA → RGB + let rgb_data = bgra_to_rgb(bgra, width, height); + + // Encode as JPEG. + let img_buffer = ImageBuffer::>::from_raw(width as u32, height as u32, rgb_data) + .context("failed to create RGB image buffer")?; + + let mut jpeg_bytes = Vec::new(); + let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut jpeg_bytes, self.quality); + encoder + .encode_image(&img_buffer) + .context("JPEG encode failed")?; + + Ok(EncodedFrame { + frame_type: crate::protocol::frame_type::JPEG, + payload: jpeg_bytes, + is_keyframe: true, + }) + } + + fn encoder_type(&self) -> EncoderType { + EncoderType::Jpeg + } +} + +// ── H.264 Encoder ───────────────────────────────────────────────────────────── + +cfg_if::cfg_if! { + if #[cfg(feature = "h264")] { + /// H.264 encoder using `openh264`. Produces keyframes and delta frames. + pub struct H264Encoder { + encoder: openh264::encoder::Encoder, + width: usize, + height: usize, + frame_count: u64, + keyframe_interval: u64, + } + + impl H264Encoder { + /// Create a new H.264 encoder. + /// + /// `bitrate_kbps` — Target bitrate in kilobits per second. + /// `keyframe_interval` — Force a keyframe every N frames (0 = only first frame). + pub fn new(width: usize, height: usize, bitrate_kbps: u32, keyframe_interval: u64) -> Result { + info!( + "initializing H.264 encoder: {}x{} @ {}kbps, keyframe every {} frames", + width, height, bitrate_kbps, keyframe_interval + ); + + // Use constant bitrate for predictable network usage. + let rc = openh264::encoder::RateControl::Constant(bitrate_kbps as i32); + let encoder = openh264::encoder::Encoder::new( + openh264::encoder::Width(width as i32), + openh264::encoder::Height(height as i32), + rc, + ).map_err(|e| anyhow::anyhow!("openh264 init failed: {:?}", e))?; + + Ok(Self { + encoder, + width, + height, + frame_count: 0, + keyframe_interval: if keyframe_interval == 0 { 60 } else { keyframe_interval }, + }) + } + } + + impl VideoEncoder for H264Encoder { + fn encode_bgra(&mut self, bgra: &[u8], width: usize, height: usize) -> Result { + // BGRA → I420 (YUV420 planar) + let (y, u, v) = bgra_to_i420(bgra, width, height); + + // Create openh264 YUV buffer. + let yuv = openh264::formats::YUVBuffer::new( + openh264::formats::YUVPixel::from_yuv(y[0], u[0], v[0]), // dummy first pixel + self.width, + self.height, + y, + u, + v, + ); + + // Encode the frame. + let bitstream = self.encoder.encode(&yuv) + .map_err(|e| anyhow::anyhow!("H.264 encode failed: {:?}", e))?; + + self.frame_count += 1; + + // Determine if this is a keyframe. + // openh264's Bitstream doesn't directly expose keyframe info, + // but we can force periodic keyframes based on our counter. + let is_keyframe = self.frame_count == 1 + || (self.keyframe_interval > 0 + && self.frame_count % self.keyframe_interval == 0); + + let frame_type = if is_keyframe { + crate::protocol::frame_type::H264_KEY + } else { + crate::protocol::frame_type::H264_DELTA + }; + + // The bitstream contains raw NAL units (Annex-B format with start codes). + let payload = bitstream.as_ref().to_vec(); + + Ok(EncodedFrame { + frame_type, + payload, + is_keyframe, + }) + } + + fn request_keyframe(&mut self) { + // Request an IDR frame on next encode. + // Note: openh264's Encoder doesn't have a direct force-IDR API, + // so we reset the frame counter to trigger one. + self.frame_count = 0; + } + + fn encoder_type(&self) -> EncoderType { + EncoderType::H264 + } + } + } else { + /// Stub H.264 encoder when the `h264` feature is not enabled. + pub struct H264Encoder; + + impl H264Encoder { + pub fn new(_width: usize, _height: usize, _bitrate_kbps: u32, _keyframe_interval: u64) -> Result { + anyhow::bail!( + "H.264 encoder not available. Rebuild with: cargo build --features h264" + ); + } + } + + impl VideoEncoder for H264Encoder { + fn encode_bgra(&mut self, _bgra: &[u8], _width: usize, _height: usize) -> Result { + anyhow::bail!("H.264 encoder not available"); + } + + fn encoder_type(&self) -> EncoderType { + EncoderType::H264 + } + } + } +} + +/// Factory function to create the appropriate encoder. +pub fn create_encoder(encoder_type: EncoderType, width: usize, height: usize, quality: u8) -> Result> { + match encoder_type { + EncoderType::Jpeg => Ok(Box::new(JpegEncoder::new(quality))), + EncoderType::H264 => { + // Higher quality = higher bitrate. Map 1-100 to 500-8000 kbps. + let bitrate_kbps = map_quality_to_bitrate(quality, width, height); + Ok(Box::new(H264Encoder::new(width, height, bitrate_kbps, 60)?)) + } + } +} + +/// Map quality (1-100) to a reasonable H.264 bitrate based on resolution. +fn map_quality_to_bitrate(quality: u8, width: usize, height: usize) -> u32 { + let pixels = (width * height) as u32; + // Base: ~0.05 bits per pixel per frame at quality 50, scaled linearly. + // At 1080p (2073600 px), quality 50 → ~5Mbps + let base_bpp = 0.02 + (quality as f32 / 100.0) * 0.08; // 0.02 to 0.10 bits/px + let bitrate = (pixels as f32 * base_bpp * 30.0) as u32; // 30 fps + bitrate.clamp(500, 50_000) // Min 500kbps, max 50Mbps +} + +// ── Pixel format conversions ────────────────────────────────────────────────── + +/// Convert BGRA pixel data to RGB by dropping the alpha channel. +fn bgra_to_rgb(bgra: &[u8], width: usize, height: usize) -> Vec { + let mut rgb = Vec::with_capacity(width * height * 3); + for chunk in bgra.chunks_exact(4) { + rgb.push(chunk[2]); // R + rgb.push(chunk[1]); // G + rgb.push(chunk[0]); // B + } + rgb +} + +/// Convert BGRA pixel data to I420 (YUV420 planar). +/// +/// Output: three planes — Y (full resolution), U (quarter), V (quarter). +fn bgra_to_i420(bgra: &[u8], width: usize, height: usize) -> (Vec, Vec, Vec) { + let mut y_plane = vec![0u8; width * height]; + let half_w = width / 2; + let half_h = height / 2; + let mut u_plane = vec![0u8; half_w * half_h]; + let mut v_plane = vec![0u8; half_w * half_h]; + + // Process 2x2 blocks for chroma subsampling. + for row in 0..height { + let row_offset = row * width * 4; + for col in 0..width { + let pixel_offset = row_offset + col * 4; + let b = bgra[pixel_offset] as i32; + let g = bgra[pixel_offset + 1] as i32; + let r = bgra[pixel_offset + 2] as i32; + + // ITU-R BT.601 conversion (standard for SD/HD content). + let y_val = ((66 * r + 129 * g + 25 * b + 128) >> 8) + 16; + y_plane[row * width + col] = y_val.clamp(0, 255) as u8; + + // Chroma samples from 2x2 block average. + if row % 2 == 0 && col % 2 == 0 && (row + 1) < height && (col + 1) < width { + // Average the 4 pixels in this 2x2 block. + let mut sum_u = 0i32; + let mut sum_v = 0i32; + for dr in 0..2 { + for dc in 0..2 { + let off = (row + dr) * width * 4 + (col + dc) * 4; + let pb = bgra[off] as i32; + let pg = bgra[off + 1] as i32; + let pr = bgra[off + 2] as i32; + sum_u += ((-38 * pr - 74 * pg + 112 * pb + 128) >> 8) + 128; + sum_v += ((112 * pr - 94 * pg - 18 * pb + 128) >> 8) + 128; + } + } + let uv_idx = (row / 2) * half_w + (col / 2); + u_plane[uv_idx] = (sum_u / 4).clamp(0, 255) as u8; + v_plane[uv_idx] = (sum_v / 4).clamp(0, 255) as u8; + } + } + } + + (y_plane, u_plane, v_plane) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_jpeg_encoder() { + let mut enc = JpegEncoder::new(70); + // 4x4 red image in BGRA format. + let bgra = vec![0u8; 4 * 4 * 4]; // All black BGRA + let frame = enc.encode_bgra(&bgra, 4, 4).unwrap(); + assert_eq!(frame.frame_type, crate::protocol::frame_type::JPEG); + assert!(frame.is_keyframe); + assert!(!frame.payload.is_empty()); + // JPEG files start with FF D8. + assert_eq!(frame.payload[0], 0xFF); + assert_eq!(frame.payload[1], 0xD8); + } + + #[test] + fn test_bgra_to_i420() { + // 2x2 white pixels (BGRA = 255,255,255,255). + let bgra = vec![255u8; 2 * 2 * 4]; + let (y, u, v) = bgra_to_i420(&bgra, 2, 2); + assert_eq!(y.len(), 4); // 2x2 Y plane + assert_eq!(u.len(), 1); // 1x1 U plane + assert_eq!(v.len(), 1); // 1x1 V plane + // White should give Y=235, U=128, V=128 (BT.601 studio range). + assert!((y[0] as i32 - 235).abs() < 3); + assert!((u[0] as i32 - 128).abs() < 3); + assert!((v[0] as i32 - 128).abs() < 3); + } + + #[test] + fn test_encoder_type_parsing() { + assert_eq!("h264".parse::().unwrap(), EncoderType::H264); + assert_eq!("jpeg".parse::().unwrap(), EncoderType::Jpeg); + assert_eq!("H264".parse::().unwrap(), EncoderType::H264); + assert_eq!("JPEG".parse::().unwrap(), EncoderType::Jpeg); + assert!("unknown".parse::().is_err()); + } +}