sbs working phase 1
This commit is contained in:
266
core/eye_processor.py
Normal file
266
core/eye_processor.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""
|
||||
Eye processor module for VR180 separate eye processing.
|
||||
Handles splitting VR180 side-by-side frames into separate left/right eyes and recombining.
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EyeProcessor:
|
||||
"""Handles VR180 eye-specific processing operations."""
|
||||
|
||||
def __init__(self, eye_overlap_pixels: int = 0):
|
||||
"""
|
||||
Initialize eye processor.
|
||||
|
||||
Args:
|
||||
eye_overlap_pixels: Number of pixels to overlap between eyes for blending
|
||||
"""
|
||||
self.eye_overlap_pixels = eye_overlap_pixels
|
||||
|
||||
def split_frame_into_eyes(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Split a VR180 side-by-side frame into separate left and right eye frames.
|
||||
|
||||
Args:
|
||||
frame: Input VR180 frame (BGR format)
|
||||
|
||||
Returns:
|
||||
Tuple of (left_eye_frame, right_eye_frame)
|
||||
"""
|
||||
if len(frame.shape) != 3:
|
||||
raise ValueError("Frame must be a 3-channel BGR image")
|
||||
|
||||
height, width, channels = frame.shape
|
||||
half_width = width // 2
|
||||
|
||||
# Extract left and right eye frames
|
||||
left_eye = frame[:, :half_width + self.eye_overlap_pixels, :]
|
||||
right_eye = frame[:, half_width - self.eye_overlap_pixels:, :]
|
||||
|
||||
logger.debug(f"Split frame {width}x{height} into left: {left_eye.shape} and right: {right_eye.shape}")
|
||||
|
||||
return left_eye, right_eye
|
||||
|
||||
def split_video_into_eyes(self, input_video_path: str, left_output_path: str,
|
||||
right_output_path: str, scale: float = 1.0) -> bool:
|
||||
"""
|
||||
Split a VR180 video into separate left and right eye videos using FFmpeg.
|
||||
|
||||
Args:
|
||||
input_video_path: Path to input VR180 video
|
||||
left_output_path: Output path for left eye video
|
||||
right_output_path: Output path for right eye video
|
||||
scale: Scale factor for output videos (default: 1.0)
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Get video properties
|
||||
cap = cv2.VideoCapture(input_video_path)
|
||||
if not cap.isOpened():
|
||||
logger.error(f"Could not open video: {input_video_path}")
|
||||
return False
|
||||
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
cap.release()
|
||||
|
||||
# Calculate output dimensions
|
||||
half_width = int((width // 2) * scale)
|
||||
output_height = int(height * scale)
|
||||
|
||||
# Create output directories if they don't exist
|
||||
os.makedirs(os.path.dirname(left_output_path), exist_ok=True)
|
||||
os.makedirs(os.path.dirname(right_output_path), exist_ok=True)
|
||||
|
||||
# FFmpeg command for left eye (crop left half)
|
||||
left_command = [
|
||||
'ffmpeg', '-y',
|
||||
'-i', input_video_path,
|
||||
'-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:0:0,scale={half_width}:{output_height}',
|
||||
'-c:v', 'libx264',
|
||||
'-preset', 'fast',
|
||||
'-crf', '18',
|
||||
left_output_path
|
||||
]
|
||||
|
||||
# FFmpeg command for right eye (crop right half)
|
||||
right_command = [
|
||||
'ffmpeg', '-y',
|
||||
'-i', input_video_path,
|
||||
'-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:{width//2 - self.eye_overlap_pixels}:0,scale={half_width}:{output_height}',
|
||||
'-c:v', 'libx264',
|
||||
'-preset', 'fast',
|
||||
'-crf', '18',
|
||||
right_output_path
|
||||
]
|
||||
|
||||
logger.info(f"Splitting video into left eye: {left_output_path}")
|
||||
result_left = subprocess.run(left_command, capture_output=True, text=True)
|
||||
if result_left.returncode != 0:
|
||||
logger.error(f"FFmpeg failed for left eye: {result_left.stderr}")
|
||||
return False
|
||||
|
||||
logger.info(f"Splitting video into right eye: {right_output_path}")
|
||||
result_right = subprocess.run(right_command, capture_output=True, text=True)
|
||||
if result_right.returncode != 0:
|
||||
logger.error(f"FFmpeg failed for right eye: {result_right.stderr}")
|
||||
return False
|
||||
|
||||
logger.info(f"Successfully split video into separate eye videos")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error splitting video into eyes: {e}")
|
||||
return False
|
||||
|
||||
def combine_eye_masks(self, left_masks: Optional[Dict[int, np.ndarray]],
|
||||
right_masks: Optional[Dict[int, np.ndarray]],
|
||||
full_frame_shape: Tuple[int, int]) -> Dict[int, np.ndarray]:
|
||||
"""
|
||||
Combine left and right eye masks back into full-frame format.
|
||||
|
||||
Args:
|
||||
left_masks: Dictionary of masks from left eye processing (frame_idx -> mask)
|
||||
right_masks: Dictionary of masks from right eye processing (frame_idx -> mask)
|
||||
full_frame_shape: Shape of the full VR180 frame (height, width)
|
||||
|
||||
Returns:
|
||||
Dictionary of combined masks in full-frame format
|
||||
"""
|
||||
combined_masks = {}
|
||||
full_height, full_width = full_frame_shape
|
||||
half_width = full_width // 2
|
||||
|
||||
# Get all frame indices from both eyes
|
||||
left_frames = set(left_masks.keys()) if left_masks else set()
|
||||
right_frames = set(right_masks.keys()) if right_masks else set()
|
||||
all_frames = left_frames.union(right_frames)
|
||||
|
||||
for frame_idx in all_frames:
|
||||
# Create full-frame mask
|
||||
combined_mask = np.zeros((full_height, full_width), dtype=np.uint8)
|
||||
|
||||
# Add left eye mask to left half of frame
|
||||
if left_masks and frame_idx in left_masks:
|
||||
left_mask = left_masks[frame_idx]
|
||||
if len(left_mask.shape) == 3:
|
||||
left_mask = left_mask.squeeze()
|
||||
|
||||
# Resize left mask to fit left half of full frame
|
||||
left_target_width = half_width + self.eye_overlap_pixels
|
||||
if left_mask.shape != (full_height, left_target_width):
|
||||
left_mask = cv2.resize(left_mask.astype(np.uint8),
|
||||
(left_target_width, full_height),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
# Place in left half of combined mask
|
||||
combined_mask[:, :left_target_width] = left_mask[:, :left_target_width]
|
||||
|
||||
# Add right eye mask to right half of frame
|
||||
if right_masks and frame_idx in right_masks:
|
||||
right_mask = right_masks[frame_idx]
|
||||
if len(right_mask.shape) == 3:
|
||||
right_mask = right_mask.squeeze()
|
||||
|
||||
# Resize right mask to fit right half of full frame
|
||||
right_target_width = half_width + self.eye_overlap_pixels
|
||||
right_start_x = half_width - self.eye_overlap_pixels
|
||||
|
||||
if right_mask.shape != (full_height, right_target_width):
|
||||
right_mask = cv2.resize(right_mask.astype(np.uint8),
|
||||
(right_target_width, full_height),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
# Place in right half of combined mask
|
||||
combined_mask[:, right_start_x:] = right_mask
|
||||
|
||||
# Store combined mask for this frame (using object ID 1 for simplicity)
|
||||
combined_masks[frame_idx] = {1: combined_mask}
|
||||
|
||||
logger.debug(f"Combined {len(combined_masks)} frame masks from left/right eyes")
|
||||
return combined_masks
|
||||
|
||||
def is_in_left_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
|
||||
"""
|
||||
Check if a detection is in the left half of a VR180 frame.
|
||||
|
||||
Args:
|
||||
detection: YOLO detection dictionary with 'bbox' key
|
||||
frame_width: Width of the full VR180 frame
|
||||
|
||||
Returns:
|
||||
True if detection center is in left half
|
||||
"""
|
||||
bbox = detection['bbox']
|
||||
center_x = (bbox[0] + bbox[2]) / 2
|
||||
return center_x < (frame_width // 2)
|
||||
|
||||
def is_in_right_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
|
||||
"""
|
||||
Check if a detection is in the right half of a VR180 frame.
|
||||
|
||||
Args:
|
||||
detection: YOLO detection dictionary with 'bbox' key
|
||||
frame_width: Width of the full VR180 frame
|
||||
|
||||
Returns:
|
||||
True if detection center is in right half
|
||||
"""
|
||||
return not self.is_in_left_half(detection, frame_width)
|
||||
|
||||
def convert_detection_to_eye_coordinates(self, detection: Dict[str, Any],
|
||||
eye_side: str, frame_width: int) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert a full-frame detection to eye-specific coordinates.
|
||||
|
||||
Args:
|
||||
detection: YOLO detection dictionary with 'bbox' key
|
||||
eye_side: 'left' or 'right'
|
||||
frame_width: Width of the full VR180 frame
|
||||
|
||||
Returns:
|
||||
Detection with converted coordinates for the specific eye
|
||||
"""
|
||||
bbox = detection['bbox'].copy()
|
||||
half_width = frame_width // 2
|
||||
|
||||
if eye_side == 'right':
|
||||
# Shift right eye coordinates to start from 0
|
||||
bbox[0] -= (half_width - self.eye_overlap_pixels) # x1
|
||||
bbox[2] -= (half_width - self.eye_overlap_pixels) # x2
|
||||
|
||||
# Ensure coordinates are within bounds
|
||||
eye_width = half_width + self.eye_overlap_pixels
|
||||
bbox[0] = max(0, min(bbox[0], eye_width - 1))
|
||||
bbox[2] = max(0, min(bbox[2], eye_width - 1))
|
||||
|
||||
converted_detection = detection.copy()
|
||||
converted_detection['bbox'] = bbox
|
||||
|
||||
return converted_detection
|
||||
|
||||
def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int],
|
||||
green_color: List[int] = [0, 255, 0]) -> np.ndarray:
|
||||
"""
|
||||
Create a full greenscreen frame for fallback when no humans are detected.
|
||||
|
||||
Args:
|
||||
frame_shape: Shape of the frame (height, width, channels)
|
||||
green_color: RGB values for green screen color
|
||||
|
||||
Returns:
|
||||
Full greenscreen frame
|
||||
"""
|
||||
greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8)
|
||||
logger.debug(f"Created full greenscreen frame with shape {frame_shape}")
|
||||
return greenscreen_frame
|
||||
Reference in New Issue
Block a user