""" Eye processor module for VR180 separate eye processing. Handles splitting VR180 side-by-side frames into separate left/right eyes and recombining. """ import os import cv2 import numpy as np import logging import subprocess from typing import Dict, List, Any, Optional, Tuple logger = logging.getLogger(__name__) class EyeProcessor: """Handles VR180 eye-specific processing operations.""" def __init__(self, eye_overlap_pixels: int = 0): """ Initialize eye processor. Args: eye_overlap_pixels: Number of pixels to overlap between eyes for blending """ self.eye_overlap_pixels = eye_overlap_pixels def split_frame_into_eyes(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Split a VR180 side-by-side frame into separate left and right eye frames. Args: frame: Input VR180 frame (BGR format) Returns: Tuple of (left_eye_frame, right_eye_frame) """ if len(frame.shape) != 3: raise ValueError("Frame must be a 3-channel BGR image") height, width, channels = frame.shape half_width = width // 2 # Extract left and right eye frames left_eye = frame[:, :half_width + self.eye_overlap_pixels, :] right_eye = frame[:, half_width - self.eye_overlap_pixels:, :] logger.debug(f"Split frame {width}x{height} into left: {left_eye.shape} and right: {right_eye.shape}") return left_eye, right_eye def split_video_into_eyes(self, input_video_path: str, left_output_path: str, right_output_path: str, scale: float = 1.0) -> bool: """ Split a VR180 video into separate left and right eye videos using FFmpeg. Args: input_video_path: Path to input VR180 video left_output_path: Output path for left eye video right_output_path: Output path for right eye video scale: Scale factor for output videos (default: 1.0) Returns: True if successful, False otherwise """ try: # Get video properties cap = cv2.VideoCapture(input_video_path) if not cap.isOpened(): logger.error(f"Could not open video: {input_video_path}") return False width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) cap.release() # Calculate output dimensions half_width = int((width // 2) * scale) output_height = int(height * scale) # Create output directories if they don't exist os.makedirs(os.path.dirname(left_output_path), exist_ok=True) os.makedirs(os.path.dirname(right_output_path), exist_ok=True) # FFmpeg command for left eye (crop left half) left_command = [ 'ffmpeg', '-y', '-i', input_video_path, '-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:0:0,scale={half_width}:{output_height}', '-c:v', 'libx264', '-preset', 'fast', '-crf', '18', left_output_path ] # FFmpeg command for right eye (crop right half) right_command = [ 'ffmpeg', '-y', '-i', input_video_path, '-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:{width//2 - self.eye_overlap_pixels}:0,scale={half_width}:{output_height}', '-c:v', 'libx264', '-preset', 'fast', '-crf', '18', right_output_path ] logger.info(f"Splitting video into left eye: {left_output_path}") result_left = subprocess.run(left_command, capture_output=True, text=True) if result_left.returncode != 0: logger.error(f"FFmpeg failed for left eye: {result_left.stderr}") return False logger.info(f"Splitting video into right eye: {right_output_path}") result_right = subprocess.run(right_command, capture_output=True, text=True) if result_right.returncode != 0: logger.error(f"FFmpeg failed for right eye: {result_right.stderr}") return False logger.info(f"Successfully split video into separate eye videos") return True except Exception as e: logger.error(f"Error splitting video into eyes: {e}") return False def combine_eye_masks(self, left_masks: Optional[Dict[int, np.ndarray]], right_masks: Optional[Dict[int, np.ndarray]], full_frame_shape: Tuple[int, int]) -> Dict[int, np.ndarray]: """ Combine left and right eye masks back into full-frame format. Args: left_masks: Dictionary of masks from left eye processing (frame_idx -> mask) right_masks: Dictionary of masks from right eye processing (frame_idx -> mask) full_frame_shape: Shape of the full VR180 frame (height, width) Returns: Dictionary of combined masks in full-frame format """ combined_masks = {} full_height, full_width = full_frame_shape half_width = full_width // 2 # Get all frame indices from both eyes left_frames = set(left_masks.keys()) if left_masks else set() right_frames = set(right_masks.keys()) if right_masks else set() all_frames = left_frames.union(right_frames) for frame_idx in all_frames: # Create full-frame mask combined_mask = np.zeros((full_height, full_width), dtype=np.uint8) # Add left eye mask to left half of frame if left_masks and frame_idx in left_masks: left_mask = left_masks[frame_idx] if len(left_mask.shape) == 3: left_mask = left_mask.squeeze() # Resize left mask to fit left half of full frame left_target_width = half_width + self.eye_overlap_pixels if left_mask.shape != (full_height, left_target_width): left_mask = cv2.resize(left_mask.astype(np.uint8), (left_target_width, full_height), interpolation=cv2.INTER_NEAREST) # Place in left half of combined mask combined_mask[:, :left_target_width] = left_mask[:, :left_target_width] # Add right eye mask to right half of frame if right_masks and frame_idx in right_masks: right_mask = right_masks[frame_idx] if len(right_mask.shape) == 3: right_mask = right_mask.squeeze() # Resize right mask to fit right half of full frame right_target_width = half_width + self.eye_overlap_pixels right_start_x = half_width - self.eye_overlap_pixels if right_mask.shape != (full_height, right_target_width): right_mask = cv2.resize(right_mask.astype(np.uint8), (right_target_width, full_height), interpolation=cv2.INTER_NEAREST) # Place in right half of combined mask combined_mask[:, right_start_x:] = right_mask # Store combined mask for this frame (using object ID 1 for simplicity) combined_masks[frame_idx] = {1: combined_mask} logger.debug(f"Combined {len(combined_masks)} frame masks from left/right eyes") return combined_masks def is_in_left_half(self, detection: Dict[str, Any], frame_width: int) -> bool: """ Check if a detection is in the left half of a VR180 frame. Args: detection: YOLO detection dictionary with 'bbox' key frame_width: Width of the full VR180 frame Returns: True if detection center is in left half """ bbox = detection['bbox'] center_x = (bbox[0] + bbox[2]) / 2 return center_x < (frame_width // 2) def is_in_right_half(self, detection: Dict[str, Any], frame_width: int) -> bool: """ Check if a detection is in the right half of a VR180 frame. Args: detection: YOLO detection dictionary with 'bbox' key frame_width: Width of the full VR180 frame Returns: True if detection center is in right half """ return not self.is_in_left_half(detection, frame_width) def convert_detection_to_eye_coordinates(self, detection: Dict[str, Any], eye_side: str, frame_width: int) -> Dict[str, Any]: """ Convert a full-frame detection to eye-specific coordinates. Args: detection: YOLO detection dictionary with 'bbox' key eye_side: 'left' or 'right' frame_width: Width of the full VR180 frame Returns: Detection with converted coordinates for the specific eye """ bbox = detection['bbox'].copy() half_width = frame_width // 2 if eye_side == 'right': # Shift right eye coordinates to start from 0 bbox[0] -= (half_width - self.eye_overlap_pixels) # x1 bbox[2] -= (half_width - self.eye_overlap_pixels) # x2 # Ensure coordinates are within bounds eye_width = half_width + self.eye_overlap_pixels bbox[0] = max(0, min(bbox[0], eye_width - 1)) bbox[2] = max(0, min(bbox[2], eye_width - 1)) converted_detection = detection.copy() converted_detection['bbox'] = bbox return converted_detection def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int], green_color: List[int] = [0, 255, 0]) -> np.ndarray: """ Create a full greenscreen frame for fallback when no humans are detected. Args: frame_shape: Shape of the frame (height, width, channels) green_color: RGB values for green screen color Returns: Full greenscreen frame """ greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8) logger.debug(f"Created full greenscreen frame with shape {frame_shape}") return greenscreen_frame