sbs working phase 1

This commit is contained in:
2025-07-30 18:07:26 -07:00
parent 6617acb1c9
commit 70044e1b10
8 changed files with 2417 additions and 7 deletions

266
core/eye_processor.py Normal file
View File

@@ -0,0 +1,266 @@
"""
Eye processor module for VR180 separate eye processing.
Handles splitting VR180 side-by-side frames into separate left/right eyes and recombining.
"""
import os
import cv2
import numpy as np
import logging
import subprocess
from typing import Dict, List, Any, Optional, Tuple
logger = logging.getLogger(__name__)
class EyeProcessor:
"""Handles VR180 eye-specific processing operations."""
def __init__(self, eye_overlap_pixels: int = 0):
"""
Initialize eye processor.
Args:
eye_overlap_pixels: Number of pixels to overlap between eyes for blending
"""
self.eye_overlap_pixels = eye_overlap_pixels
def split_frame_into_eyes(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Split a VR180 side-by-side frame into separate left and right eye frames.
Args:
frame: Input VR180 frame (BGR format)
Returns:
Tuple of (left_eye_frame, right_eye_frame)
"""
if len(frame.shape) != 3:
raise ValueError("Frame must be a 3-channel BGR image")
height, width, channels = frame.shape
half_width = width // 2
# Extract left and right eye frames
left_eye = frame[:, :half_width + self.eye_overlap_pixels, :]
right_eye = frame[:, half_width - self.eye_overlap_pixels:, :]
logger.debug(f"Split frame {width}x{height} into left: {left_eye.shape} and right: {right_eye.shape}")
return left_eye, right_eye
def split_video_into_eyes(self, input_video_path: str, left_output_path: str,
right_output_path: str, scale: float = 1.0) -> bool:
"""
Split a VR180 video into separate left and right eye videos using FFmpeg.
Args:
input_video_path: Path to input VR180 video
left_output_path: Output path for left eye video
right_output_path: Output path for right eye video
scale: Scale factor for output videos (default: 1.0)
Returns:
True if successful, False otherwise
"""
try:
# Get video properties
cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
logger.error(f"Could not open video: {input_video_path}")
return False
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()
# Calculate output dimensions
half_width = int((width // 2) * scale)
output_height = int(height * scale)
# Create output directories if they don't exist
os.makedirs(os.path.dirname(left_output_path), exist_ok=True)
os.makedirs(os.path.dirname(right_output_path), exist_ok=True)
# FFmpeg command for left eye (crop left half)
left_command = [
'ffmpeg', '-y',
'-i', input_video_path,
'-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:0:0,scale={half_width}:{output_height}',
'-c:v', 'libx264',
'-preset', 'fast',
'-crf', '18',
left_output_path
]
# FFmpeg command for right eye (crop right half)
right_command = [
'ffmpeg', '-y',
'-i', input_video_path,
'-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:{width//2 - self.eye_overlap_pixels}:0,scale={half_width}:{output_height}',
'-c:v', 'libx264',
'-preset', 'fast',
'-crf', '18',
right_output_path
]
logger.info(f"Splitting video into left eye: {left_output_path}")
result_left = subprocess.run(left_command, capture_output=True, text=True)
if result_left.returncode != 0:
logger.error(f"FFmpeg failed for left eye: {result_left.stderr}")
return False
logger.info(f"Splitting video into right eye: {right_output_path}")
result_right = subprocess.run(right_command, capture_output=True, text=True)
if result_right.returncode != 0:
logger.error(f"FFmpeg failed for right eye: {result_right.stderr}")
return False
logger.info(f"Successfully split video into separate eye videos")
return True
except Exception as e:
logger.error(f"Error splitting video into eyes: {e}")
return False
def combine_eye_masks(self, left_masks: Optional[Dict[int, np.ndarray]],
right_masks: Optional[Dict[int, np.ndarray]],
full_frame_shape: Tuple[int, int]) -> Dict[int, np.ndarray]:
"""
Combine left and right eye masks back into full-frame format.
Args:
left_masks: Dictionary of masks from left eye processing (frame_idx -> mask)
right_masks: Dictionary of masks from right eye processing (frame_idx -> mask)
full_frame_shape: Shape of the full VR180 frame (height, width)
Returns:
Dictionary of combined masks in full-frame format
"""
combined_masks = {}
full_height, full_width = full_frame_shape
half_width = full_width // 2
# Get all frame indices from both eyes
left_frames = set(left_masks.keys()) if left_masks else set()
right_frames = set(right_masks.keys()) if right_masks else set()
all_frames = left_frames.union(right_frames)
for frame_idx in all_frames:
# Create full-frame mask
combined_mask = np.zeros((full_height, full_width), dtype=np.uint8)
# Add left eye mask to left half of frame
if left_masks and frame_idx in left_masks:
left_mask = left_masks[frame_idx]
if len(left_mask.shape) == 3:
left_mask = left_mask.squeeze()
# Resize left mask to fit left half of full frame
left_target_width = half_width + self.eye_overlap_pixels
if left_mask.shape != (full_height, left_target_width):
left_mask = cv2.resize(left_mask.astype(np.uint8),
(left_target_width, full_height),
interpolation=cv2.INTER_NEAREST)
# Place in left half of combined mask
combined_mask[:, :left_target_width] = left_mask[:, :left_target_width]
# Add right eye mask to right half of frame
if right_masks and frame_idx in right_masks:
right_mask = right_masks[frame_idx]
if len(right_mask.shape) == 3:
right_mask = right_mask.squeeze()
# Resize right mask to fit right half of full frame
right_target_width = half_width + self.eye_overlap_pixels
right_start_x = half_width - self.eye_overlap_pixels
if right_mask.shape != (full_height, right_target_width):
right_mask = cv2.resize(right_mask.astype(np.uint8),
(right_target_width, full_height),
interpolation=cv2.INTER_NEAREST)
# Place in right half of combined mask
combined_mask[:, right_start_x:] = right_mask
# Store combined mask for this frame (using object ID 1 for simplicity)
combined_masks[frame_idx] = {1: combined_mask}
logger.debug(f"Combined {len(combined_masks)} frame masks from left/right eyes")
return combined_masks
def is_in_left_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
"""
Check if a detection is in the left half of a VR180 frame.
Args:
detection: YOLO detection dictionary with 'bbox' key
frame_width: Width of the full VR180 frame
Returns:
True if detection center is in left half
"""
bbox = detection['bbox']
center_x = (bbox[0] + bbox[2]) / 2
return center_x < (frame_width // 2)
def is_in_right_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
"""
Check if a detection is in the right half of a VR180 frame.
Args:
detection: YOLO detection dictionary with 'bbox' key
frame_width: Width of the full VR180 frame
Returns:
True if detection center is in right half
"""
return not self.is_in_left_half(detection, frame_width)
def convert_detection_to_eye_coordinates(self, detection: Dict[str, Any],
eye_side: str, frame_width: int) -> Dict[str, Any]:
"""
Convert a full-frame detection to eye-specific coordinates.
Args:
detection: YOLO detection dictionary with 'bbox' key
eye_side: 'left' or 'right'
frame_width: Width of the full VR180 frame
Returns:
Detection with converted coordinates for the specific eye
"""
bbox = detection['bbox'].copy()
half_width = frame_width // 2
if eye_side == 'right':
# Shift right eye coordinates to start from 0
bbox[0] -= (half_width - self.eye_overlap_pixels) # x1
bbox[2] -= (half_width - self.eye_overlap_pixels) # x2
# Ensure coordinates are within bounds
eye_width = half_width + self.eye_overlap_pixels
bbox[0] = max(0, min(bbox[0], eye_width - 1))
bbox[2] = max(0, min(bbox[2], eye_width - 1))
converted_detection = detection.copy()
converted_detection['bbox'] = bbox
return converted_detection
def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int],
green_color: List[int] = [0, 255, 0]) -> np.ndarray:
"""
Create a full greenscreen frame for fallback when no humans are detected.
Args:
frame_shape: Shape of the frame (height, width, channels)
green_color: RGB values for green screen color
Returns:
Full greenscreen frame
"""
greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8)
logger.debug(f"Created full greenscreen frame with shape {frame_shape}")
return greenscreen_frame