266 lines
11 KiB
Python
266 lines
11 KiB
Python
"""
|
|
Eye processor module for VR180 separate eye processing.
|
|
Handles splitting VR180 side-by-side frames into separate left/right eyes and recombining.
|
|
"""
|
|
|
|
import os
|
|
import cv2
|
|
import numpy as np
|
|
import logging
|
|
import subprocess
|
|
from typing import Dict, List, Any, Optional, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class EyeProcessor:
|
|
"""Handles VR180 eye-specific processing operations."""
|
|
|
|
def __init__(self, eye_overlap_pixels: int = 0):
|
|
"""
|
|
Initialize eye processor.
|
|
|
|
Args:
|
|
eye_overlap_pixels: Number of pixels to overlap between eyes for blending
|
|
"""
|
|
self.eye_overlap_pixels = eye_overlap_pixels
|
|
|
|
def split_frame_into_eyes(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
Split a VR180 side-by-side frame into separate left and right eye frames.
|
|
|
|
Args:
|
|
frame: Input VR180 frame (BGR format)
|
|
|
|
Returns:
|
|
Tuple of (left_eye_frame, right_eye_frame)
|
|
"""
|
|
if len(frame.shape) != 3:
|
|
raise ValueError("Frame must be a 3-channel BGR image")
|
|
|
|
height, width, channels = frame.shape
|
|
half_width = width // 2
|
|
|
|
# Extract left and right eye frames
|
|
left_eye = frame[:, :half_width + self.eye_overlap_pixels, :]
|
|
right_eye = frame[:, half_width - self.eye_overlap_pixels:, :]
|
|
|
|
logger.debug(f"Split frame {width}x{height} into left: {left_eye.shape} and right: {right_eye.shape}")
|
|
|
|
return left_eye, right_eye
|
|
|
|
def split_video_into_eyes(self, input_video_path: str, left_output_path: str,
|
|
right_output_path: str, scale: float = 1.0) -> bool:
|
|
"""
|
|
Split a VR180 video into separate left and right eye videos using FFmpeg.
|
|
|
|
Args:
|
|
input_video_path: Path to input VR180 video
|
|
left_output_path: Output path for left eye video
|
|
right_output_path: Output path for right eye video
|
|
scale: Scale factor for output videos (default: 1.0)
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
try:
|
|
# Get video properties
|
|
cap = cv2.VideoCapture(input_video_path)
|
|
if not cap.isOpened():
|
|
logger.error(f"Could not open video: {input_video_path}")
|
|
return False
|
|
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
cap.release()
|
|
|
|
# Calculate output dimensions
|
|
half_width = int((width // 2) * scale)
|
|
output_height = int(height * scale)
|
|
|
|
# Create output directories if they don't exist
|
|
os.makedirs(os.path.dirname(left_output_path), exist_ok=True)
|
|
os.makedirs(os.path.dirname(right_output_path), exist_ok=True)
|
|
|
|
# FFmpeg command for left eye (crop left half)
|
|
left_command = [
|
|
'ffmpeg', '-y',
|
|
'-i', input_video_path,
|
|
'-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:0:0,scale={half_width}:{output_height}',
|
|
'-c:v', 'libx264',
|
|
'-preset', 'fast',
|
|
'-crf', '18',
|
|
left_output_path
|
|
]
|
|
|
|
# FFmpeg command for right eye (crop right half)
|
|
right_command = [
|
|
'ffmpeg', '-y',
|
|
'-i', input_video_path,
|
|
'-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:{width//2 - self.eye_overlap_pixels}:0,scale={half_width}:{output_height}',
|
|
'-c:v', 'libx264',
|
|
'-preset', 'fast',
|
|
'-crf', '18',
|
|
right_output_path
|
|
]
|
|
|
|
logger.info(f"Splitting video into left eye: {left_output_path}")
|
|
result_left = subprocess.run(left_command, capture_output=True, text=True)
|
|
if result_left.returncode != 0:
|
|
logger.error(f"FFmpeg failed for left eye: {result_left.stderr}")
|
|
return False
|
|
|
|
logger.info(f"Splitting video into right eye: {right_output_path}")
|
|
result_right = subprocess.run(right_command, capture_output=True, text=True)
|
|
if result_right.returncode != 0:
|
|
logger.error(f"FFmpeg failed for right eye: {result_right.stderr}")
|
|
return False
|
|
|
|
logger.info(f"Successfully split video into separate eye videos")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error splitting video into eyes: {e}")
|
|
return False
|
|
|
|
def combine_eye_masks(self, left_masks: Optional[Dict[int, np.ndarray]],
|
|
right_masks: Optional[Dict[int, np.ndarray]],
|
|
full_frame_shape: Tuple[int, int]) -> Dict[int, np.ndarray]:
|
|
"""
|
|
Combine left and right eye masks back into full-frame format.
|
|
|
|
Args:
|
|
left_masks: Dictionary of masks from left eye processing (frame_idx -> mask)
|
|
right_masks: Dictionary of masks from right eye processing (frame_idx -> mask)
|
|
full_frame_shape: Shape of the full VR180 frame (height, width)
|
|
|
|
Returns:
|
|
Dictionary of combined masks in full-frame format
|
|
"""
|
|
combined_masks = {}
|
|
full_height, full_width = full_frame_shape
|
|
half_width = full_width // 2
|
|
|
|
# Get all frame indices from both eyes
|
|
left_frames = set(left_masks.keys()) if left_masks else set()
|
|
right_frames = set(right_masks.keys()) if right_masks else set()
|
|
all_frames = left_frames.union(right_frames)
|
|
|
|
for frame_idx in all_frames:
|
|
# Create full-frame mask
|
|
combined_mask = np.zeros((full_height, full_width), dtype=np.uint8)
|
|
|
|
# Add left eye mask to left half of frame
|
|
if left_masks and frame_idx in left_masks:
|
|
left_mask = left_masks[frame_idx]
|
|
if len(left_mask.shape) == 3:
|
|
left_mask = left_mask.squeeze()
|
|
|
|
# Resize left mask to fit left half of full frame
|
|
left_target_width = half_width + self.eye_overlap_pixels
|
|
if left_mask.shape != (full_height, left_target_width):
|
|
left_mask = cv2.resize(left_mask.astype(np.uint8),
|
|
(left_target_width, full_height),
|
|
interpolation=cv2.INTER_NEAREST)
|
|
|
|
# Place in left half of combined mask
|
|
combined_mask[:, :left_target_width] = left_mask[:, :left_target_width]
|
|
|
|
# Add right eye mask to right half of frame
|
|
if right_masks and frame_idx in right_masks:
|
|
right_mask = right_masks[frame_idx]
|
|
if len(right_mask.shape) == 3:
|
|
right_mask = right_mask.squeeze()
|
|
|
|
# Resize right mask to fit right half of full frame
|
|
right_target_width = half_width + self.eye_overlap_pixels
|
|
right_start_x = half_width - self.eye_overlap_pixels
|
|
|
|
if right_mask.shape != (full_height, right_target_width):
|
|
right_mask = cv2.resize(right_mask.astype(np.uint8),
|
|
(right_target_width, full_height),
|
|
interpolation=cv2.INTER_NEAREST)
|
|
|
|
# Place in right half of combined mask
|
|
combined_mask[:, right_start_x:] = right_mask
|
|
|
|
# Store combined mask for this frame (using object ID 1 for simplicity)
|
|
combined_masks[frame_idx] = {1: combined_mask}
|
|
|
|
logger.debug(f"Combined {len(combined_masks)} frame masks from left/right eyes")
|
|
return combined_masks
|
|
|
|
def is_in_left_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
|
|
"""
|
|
Check if a detection is in the left half of a VR180 frame.
|
|
|
|
Args:
|
|
detection: YOLO detection dictionary with 'bbox' key
|
|
frame_width: Width of the full VR180 frame
|
|
|
|
Returns:
|
|
True if detection center is in left half
|
|
"""
|
|
bbox = detection['bbox']
|
|
center_x = (bbox[0] + bbox[2]) / 2
|
|
return center_x < (frame_width // 2)
|
|
|
|
def is_in_right_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
|
|
"""
|
|
Check if a detection is in the right half of a VR180 frame.
|
|
|
|
Args:
|
|
detection: YOLO detection dictionary with 'bbox' key
|
|
frame_width: Width of the full VR180 frame
|
|
|
|
Returns:
|
|
True if detection center is in right half
|
|
"""
|
|
return not self.is_in_left_half(detection, frame_width)
|
|
|
|
def convert_detection_to_eye_coordinates(self, detection: Dict[str, Any],
|
|
eye_side: str, frame_width: int) -> Dict[str, Any]:
|
|
"""
|
|
Convert a full-frame detection to eye-specific coordinates.
|
|
|
|
Args:
|
|
detection: YOLO detection dictionary with 'bbox' key
|
|
eye_side: 'left' or 'right'
|
|
frame_width: Width of the full VR180 frame
|
|
|
|
Returns:
|
|
Detection with converted coordinates for the specific eye
|
|
"""
|
|
bbox = detection['bbox'].copy()
|
|
half_width = frame_width // 2
|
|
|
|
if eye_side == 'right':
|
|
# Shift right eye coordinates to start from 0
|
|
bbox[0] -= (half_width - self.eye_overlap_pixels) # x1
|
|
bbox[2] -= (half_width - self.eye_overlap_pixels) # x2
|
|
|
|
# Ensure coordinates are within bounds
|
|
eye_width = half_width + self.eye_overlap_pixels
|
|
bbox[0] = max(0, min(bbox[0], eye_width - 1))
|
|
bbox[2] = max(0, min(bbox[2], eye_width - 1))
|
|
|
|
converted_detection = detection.copy()
|
|
converted_detection['bbox'] = bbox
|
|
|
|
return converted_detection
|
|
|
|
def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int],
|
|
green_color: List[int] = [0, 255, 0]) -> np.ndarray:
|
|
"""
|
|
Create a full greenscreen frame for fallback when no humans are detected.
|
|
|
|
Args:
|
|
frame_shape: Shape of the frame (height, width, channels)
|
|
green_color: RGB values for green screen color
|
|
|
|
Returns:
|
|
Full greenscreen frame
|
|
"""
|
|
greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8)
|
|
logger.debug(f"Created full greenscreen frame with shape {frame_shape}")
|
|
return greenscreen_frame |