sbs working phase 1

2025-07-30 18:07:26 -07:00
parent 6617acb1c9
commit 70044e1b10
8 changed files with 2417 additions and 7 deletions
--- a/core/eye_processor.py
+++ b/core/eye_processor.py
@@ -0,0 +1,266 @@
+"""
+Eye processor module for VR180 separate eye processing.
+Handles splitting VR180 side-by-side frames into separate left/right eyes and recombining.
+"""
+
+import os
+import cv2
+import numpy as np
+import logging
+import subprocess
+from typing import Dict, List, Any, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+class EyeProcessor:
+    """Handles VR180 eye-specific processing operations."""
+    
+    def __init__(self, eye_overlap_pixels: int = 0):
+        """
+        Initialize eye processor.
+        
+        Args:
+            eye_overlap_pixels: Number of pixels to overlap between eyes for blending
+        """
+        self.eye_overlap_pixels = eye_overlap_pixels
+    
+    def split_frame_into_eyes(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Split a VR180 side-by-side frame into separate left and right eye frames.
+        
+        Args:
+            frame: Input VR180 frame (BGR format)
+            
+        Returns:
+            Tuple of (left_eye_frame, right_eye_frame)
+        """
+        if len(frame.shape) != 3:
+            raise ValueError("Frame must be a 3-channel BGR image")
+        
+        height, width, channels = frame.shape
+        half_width = width // 2
+        
+        # Extract left and right eye frames
+        left_eye = frame[:, :half_width + self.eye_overlap_pixels, :]
+        right_eye = frame[:, half_width - self.eye_overlap_pixels:, :]
+        
+        logger.debug(f"Split frame {width}x{height} into left: {left_eye.shape} and right: {right_eye.shape}")
+        
+        return left_eye, right_eye
+    
+    def split_video_into_eyes(self, input_video_path: str, left_output_path: str, 
+                            right_output_path: str, scale: float = 1.0) -> bool:
+        """
+        Split a VR180 video into separate left and right eye videos using FFmpeg.
+        
+        Args:
+            input_video_path: Path to input VR180 video
+            left_output_path: Output path for left eye video
+            right_output_path: Output path for right eye video  
+            scale: Scale factor for output videos (default: 1.0)
+            
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            # Get video properties
+            cap = cv2.VideoCapture(input_video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {input_video_path}")
+                return False
+            
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            cap.release()
+            
+            # Calculate output dimensions
+            half_width = int((width // 2) * scale)
+            output_height = int(height * scale)
+            
+            # Create output directories if they don't exist
+            os.makedirs(os.path.dirname(left_output_path), exist_ok=True)
+            os.makedirs(os.path.dirname(right_output_path), exist_ok=True)
+            
+            # FFmpeg command for left eye (crop left half)
+            left_command = [
+                'ffmpeg', '-y',
+                '-i', input_video_path,
+                '-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:0:0,scale={half_width}:{output_height}',
+                '-c:v', 'libx264',
+                '-preset', 'fast',
+                '-crf', '18',
+                left_output_path
+            ]
+            
+            # FFmpeg command for right eye (crop right half)
+            right_command = [
+                'ffmpeg', '-y', 
+                '-i', input_video_path,
+                '-vf', f'crop={width//2 + self.eye_overlap_pixels}:{height}:{width//2 - self.eye_overlap_pixels}:0,scale={half_width}:{output_height}',
+                '-c:v', 'libx264',
+                '-preset', 'fast', 
+                '-crf', '18',
+                right_output_path
+            ]
+            
+            logger.info(f"Splitting video into left eye: {left_output_path}")
+            result_left = subprocess.run(left_command, capture_output=True, text=True)
+            if result_left.returncode != 0:
+                logger.error(f"FFmpeg failed for left eye: {result_left.stderr}")
+                return False
+            
+            logger.info(f"Splitting video into right eye: {right_output_path}")
+            result_right = subprocess.run(right_command, capture_output=True, text=True)
+            if result_right.returncode != 0:
+                logger.error(f"FFmpeg failed for right eye: {result_right.stderr}")
+                return False
+            
+            logger.info(f"Successfully split video into separate eye videos")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error splitting video into eyes: {e}")
+            return False
+    
+    def combine_eye_masks(self, left_masks: Optional[Dict[int, np.ndarray]], 
+                         right_masks: Optional[Dict[int, np.ndarray]], 
+                         full_frame_shape: Tuple[int, int]) -> Dict[int, np.ndarray]:
+        """
+        Combine left and right eye masks back into full-frame format.
+        
+        Args:
+            left_masks: Dictionary of masks from left eye processing (frame_idx -> mask)
+            right_masks: Dictionary of masks from right eye processing (frame_idx -> mask)
+            full_frame_shape: Shape of the full VR180 frame (height, width)
+            
+        Returns:
+            Dictionary of combined masks in full-frame format
+        """
+        combined_masks = {}
+        full_height, full_width = full_frame_shape
+        half_width = full_width // 2
+        
+        # Get all frame indices from both eyes
+        left_frames = set(left_masks.keys()) if left_masks else set()
+        right_frames = set(right_masks.keys()) if right_masks else set()
+        all_frames = left_frames.union(right_frames)
+        
+        for frame_idx in all_frames:
+            # Create full-frame mask
+            combined_mask = np.zeros((full_height, full_width), dtype=np.uint8)
+            
+            # Add left eye mask to left half of frame
+            if left_masks and frame_idx in left_masks:
+                left_mask = left_masks[frame_idx]
+                if len(left_mask.shape) == 3:
+                    left_mask = left_mask.squeeze()
+                
+                # Resize left mask to fit left half of full frame
+                left_target_width = half_width + self.eye_overlap_pixels
+                if left_mask.shape != (full_height, left_target_width):
+                    left_mask = cv2.resize(left_mask.astype(np.uint8), 
+                                         (left_target_width, full_height), 
+                                         interpolation=cv2.INTER_NEAREST)
+                
+                # Place in left half of combined mask
+                combined_mask[:, :left_target_width] = left_mask[:, :left_target_width]
+            
+            # Add right eye mask to right half of frame  
+            if right_masks and frame_idx in right_masks:
+                right_mask = right_masks[frame_idx]
+                if len(right_mask.shape) == 3:
+                    right_mask = right_mask.squeeze()
+                
+                # Resize right mask to fit right half of full frame
+                right_target_width = half_width + self.eye_overlap_pixels
+                right_start_x = half_width - self.eye_overlap_pixels
+                
+                if right_mask.shape != (full_height, right_target_width):
+                    right_mask = cv2.resize(right_mask.astype(np.uint8),
+                                          (right_target_width, full_height),
+                                          interpolation=cv2.INTER_NEAREST)
+                
+                # Place in right half of combined mask
+                combined_mask[:, right_start_x:] = right_mask
+            
+            # Store combined mask for this frame (using object ID 1 for simplicity)
+            combined_masks[frame_idx] = {1: combined_mask}
+        
+        logger.debug(f"Combined {len(combined_masks)} frame masks from left/right eyes")
+        return combined_masks
+    
+    def is_in_left_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
+        """
+        Check if a detection is in the left half of a VR180 frame.
+        
+        Args:
+            detection: YOLO detection dictionary with 'bbox' key
+            frame_width: Width of the full VR180 frame
+            
+        Returns:
+            True if detection center is in left half
+        """
+        bbox = detection['bbox']
+        center_x = (bbox[0] + bbox[2]) / 2
+        return center_x < (frame_width // 2)
+    
+    def is_in_right_half(self, detection: Dict[str, Any], frame_width: int) -> bool:
+        """
+        Check if a detection is in the right half of a VR180 frame.
+        
+        Args:
+            detection: YOLO detection dictionary with 'bbox' key
+            frame_width: Width of the full VR180 frame
+            
+        Returns:
+            True if detection center is in right half
+        """
+        return not self.is_in_left_half(detection, frame_width)
+    
+    def convert_detection_to_eye_coordinates(self, detection: Dict[str, Any], 
+                                           eye_side: str, frame_width: int) -> Dict[str, Any]:
+        """
+        Convert a full-frame detection to eye-specific coordinates.
+        
+        Args:
+            detection: YOLO detection dictionary with 'bbox' key
+            eye_side: 'left' or 'right'
+            frame_width: Width of the full VR180 frame
+            
+        Returns:
+            Detection with converted coordinates for the specific eye
+        """
+        bbox = detection['bbox'].copy()
+        half_width = frame_width // 2
+        
+        if eye_side == 'right':
+            # Shift right eye coordinates to start from 0
+            bbox[0] -= (half_width - self.eye_overlap_pixels)  # x1
+            bbox[2] -= (half_width - self.eye_overlap_pixels)  # x2
+        
+        # Ensure coordinates are within bounds
+        eye_width = half_width + self.eye_overlap_pixels
+        bbox[0] = max(0, min(bbox[0], eye_width - 1))
+        bbox[2] = max(0, min(bbox[2], eye_width - 1))
+        
+        converted_detection = detection.copy()
+        converted_detection['bbox'] = bbox
+        
+        return converted_detection
+    
+    def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int], 
+                                    green_color: List[int] = [0, 255, 0]) -> np.ndarray:
+        """
+        Create a full greenscreen frame for fallback when no humans are detected.
+        
+        Args:
+            frame_shape: Shape of the frame (height, width, channels)
+            green_color: RGB values for green screen color
+            
+        Returns:
+            Full greenscreen frame
+        """
+        greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8)
+        logger.debug(f"Created full greenscreen frame with shape {frame_shape}")
+        return greenscreen_frame