sbs working phase 1

2025-07-30 18:07:26 -07:00
parent 6617acb1c9
commit 70044e1b10
8 changed files with 2417 additions and 7 deletions
--- a/core/yolo_detector.py
+++ b/core/yolo_detector.py
@@ -732,4 +732,300 @@ class YOLODetector:
            
        except Exception as e:
            logger.error(f"Error creating debug frame: {e}")
+            return False
+    
+    def detect_humans_in_single_eye(self, frame: np.ndarray, eye_side: str) -> List[Dict[str, Any]]:
+        """
+        Detect humans in a single eye frame (left or right).
+        
+        Args:
+            frame: Input eye frame (BGR format)
+            eye_side: 'left' or 'right' eye
+            
+        Returns:
+            List of human detection dictionaries for the single eye
+        """
+        logger.info(f"Running YOLO detection on {eye_side} eye frame")
+        
+        # Run standard detection on the eye frame
+        detections = self.detect_humans_in_frame(frame)
+        
+        logger.info(f"YOLO {eye_side.upper()} Eye: Found {len(detections)} human detections")
+        
+        for i, detection in enumerate(detections):
+            bbox = detection['bbox']
+            conf = detection['confidence']
+            has_mask = detection.get('has_mask', False)
+            logger.debug(f"YOLO {eye_side.upper()} Eye Detection {i+1}: bbox={bbox}, conf={conf:.3f}, has_mask={has_mask}")
+        
+        return detections
+    
+    def convert_eye_detections_to_sam2_prompts(self, detections: List[Dict[str, Any]], 
+                                             eye_side: str) -> List[Dict[str, Any]]:
+        """
+        Convert single eye detections to SAM2 prompts (always uses obj_id=1 for single eye processing).
+        
+        Args:
+            detections: List of YOLO detection results for single eye
+            eye_side: 'left' or 'right' eye
+            
+        Returns:
+            List of SAM2 prompt dictionaries with obj_id=1 for single eye processing
+        """
+        if not detections:
+            logger.warning(f"No detections provided for {eye_side} eye SAM2 prompt conversion")
+            return []
+        
+        logger.info(f"Converting {len(detections)} {eye_side} eye detections to SAM2 prompts")
+        
+        prompts = []
+        
+        # For single eye processing, always use obj_id=1 and take the best detection
+        best_detection = max(detections, key=lambda x: x['confidence'])
+        
+        prompts.append({
+            'obj_id': 1,  # Always use obj_id=1 for single eye processing
+            'bbox': best_detection['bbox'].copy(),
+            'confidence': best_detection['confidence']
+        })
+        
+        logger.info(f"{eye_side.upper()} Eye: Converted best detection (conf={best_detection['confidence']:.3f}) to SAM2 Object 1")
+        
+        return prompts
+    
+    def has_any_detections(self, detections_list: List[List[Dict[str, Any]]]) -> bool:
+        """
+        Check if any detections exist in a list of detection lists.
+        
+        Args:
+            detections_list: List of detection lists (e.g., [left_detections, right_detections])
+            
+        Returns:
+            True if any detections are found
+        """
+        for detections in detections_list:
+            if detections:
+                return True
+        return False
+    
+    def split_detections_by_eye(self, detections: List[Dict[str, Any]], frame_width: int) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+        """
+        Split VR180 detections into left and right eye detections with coordinate conversion.
+        
+        Args:
+            detections: List of full-frame VR180 detections
+            frame_width: Width of the full VR180 frame
+            
+        Returns:
+            Tuple of (left_eye_detections, right_eye_detections) with converted coordinates
+        """
+        half_width = frame_width // 2
+        left_detections = []
+        right_detections = []
+        
+        logger.info(f"Splitting {len(detections)} VR180 detections by eye (frame_width={frame_width}, half_width={half_width})")
+        
+        for i, detection in enumerate(detections):
+            bbox = detection['bbox']
+            center_x = (bbox[0] + bbox[2]) / 2
+            
+            logger.info(f"Detection {i}: bbox={bbox}, center_x={center_x:.1f}")
+            
+            # Create a copy with converted coordinates
+            converted_detection = detection.copy()
+            converted_bbox = bbox.copy()
+            
+            if center_x < half_width:
+                # Left eye detection - coordinates remain the same
+                # For segmentation mode, we also need to crop the mask to the left eye
+                if detection.get('has_mask', False) and 'mask' in detection:
+                    original_mask = detection['mask']  
+                    # Crop mask to left half (keep original coordinates for now, will be handled in eye processing)
+                    converted_detection['mask'] = original_mask
+                    logger.info(f"Detection {i}: LEFT eye mask shape: {original_mask.shape}")
+                
+                left_detections.append(converted_detection)
+                logger.info(f"Detection {i}: Assigned to LEFT eye, center_x={center_x:.1f} < {half_width}, bbox={bbox}")
+            else:
+                # Right eye detection - shift coordinates to start from 0
+                original_bbox = converted_bbox.copy()
+                converted_bbox[0] -= half_width  # x1
+                converted_bbox[2] -= half_width  # x2
+                
+                # Ensure coordinates are within bounds
+                converted_bbox[0] = max(0, converted_bbox[0])
+                converted_bbox[2] = max(0, min(converted_bbox[2], half_width))
+                
+                converted_detection['bbox'] = converted_bbox
+                
+                # For segmentation mode, we also need to crop the mask to the right eye
+                if detection.get('has_mask', False) and 'mask' in detection:
+                    original_mask = detection['mask']
+                    # Crop mask to right half and shift coordinates
+                    # Note: This is a simplified approach - the mask coordinates need to be handled properly
+                    converted_detection['mask'] = original_mask  # Will be properly handled in eye processing
+                    logger.info(f"Detection {i}: RIGHT eye mask shape: {original_mask.shape}")
+                
+                right_detections.append(converted_detection)
+                
+                logger.info(f"Detection {i}: Assigned to RIGHT eye, center_x={center_x:.1f} >= {half_width}, original_bbox={original_bbox}, converted_bbox={converted_bbox}")
+        
+        logger.info(f"Split result: {len(left_detections)} left eye, {len(right_detections)} right eye detections")
+        
+        return left_detections, right_detections
+    
+    def save_eye_debug_frames(self, left_frame: np.ndarray, right_frame: np.ndarray,
+                            left_detections: List[Dict[str, Any]], right_detections: List[Dict[str, Any]],
+                            left_output_path: str, right_output_path: str) -> Tuple[bool, bool]:
+        """
+        Save debug frames for both left and right eye detections.
+        
+        Args:
+            left_frame: Left eye frame
+            right_frame: Right eye frame  
+            left_detections: Left eye detections
+            right_detections: Right eye detections
+            left_output_path: Output path for left eye debug frame
+            right_output_path: Output path for right eye debug frame
+            
+        Returns:
+            Tuple of (left_success, right_success)
+        """
+        logger.info(f"Saving eye-specific debug frames")
+        
+        # Save left eye debug frame (eye-specific version)
+        left_success = self._save_single_eye_debug_frame(
+            left_frame, left_detections, left_output_path, "LEFT"
+        )
+        
+        # Save right eye debug frame (eye-specific version)
+        right_success = self._save_single_eye_debug_frame(
+            right_frame, right_detections, right_output_path, "RIGHT"
+        )
+        
+        if left_success:
+            logger.info(f"Saved left eye debug frame: {left_output_path}")
+        if right_success:
+            logger.info(f"Saved right eye debug frame: {right_output_path}")
+            
+        return left_success, right_success
+    
+    def _save_single_eye_debug_frame(self, frame: np.ndarray, detections: List[Dict[str, Any]], 
+                                   output_path: str, eye_side: str) -> bool:
+        """
+        Save a debug frame for a single eye with eye-specific visualizations.
+        
+        Args:
+            frame: Single eye frame (BGR format from OpenCV)
+            detections: List of detection dictionaries for this eye
+            output_path: Path to save the debug image
+            eye_side: "LEFT" or "RIGHT"
+            
+        Returns:
+            True if saved successfully
+        """
+        try:
+            debug_frame = frame.copy()
+            
+            # Draw masks or bounding boxes for each detection
+            for i, detection in enumerate(detections):
+                bbox = detection['bbox']
+                confidence = detection['confidence']
+                has_mask = detection.get('has_mask', False)
+                
+                # Extract coordinates
+                x1, y1, x2, y2 = map(int, bbox)
+                
+                # Choose color based on confidence (green for high, yellow for medium, red for low)
+                if confidence >= 0.8:
+                    color = (0, 255, 0)  # Green
+                elif confidence >= 0.6:
+                    color = (0, 255, 255)  # Yellow
+                else:
+                    color = (0, 0, 255)  # Red
+                
+                if has_mask and 'mask' in detection:
+                    # Draw segmentation mask
+                    mask = detection['mask']
+                    
+                    # Resize mask to match frame if needed
+                    if mask.shape != debug_frame.shape[:2]:
+                        mask = cv2.resize(mask.astype(np.float32), (debug_frame.shape[1], debug_frame.shape[0]), interpolation=cv2.INTER_NEAREST)
+                        mask = mask > 0.5
+                    
+                    mask = mask.astype(bool)
+                    
+                    # Apply colored overlay with transparency
+                    overlay = debug_frame.copy()
+                    overlay[mask] = color
+                    cv2.addWeighted(overlay, 0.3, debug_frame, 0.7, 0, debug_frame)
+                    
+                    # Draw mask outline
+                    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                    cv2.drawContours(debug_frame, contours, -1, color, 2)
+                    
+                    # Prepare label text for segmentation
+                    label = f"Person {i+1}: {confidence:.2f} (MASK)"
+                else:
+                    # Draw bounding box (detection mode or no mask available)
+                    cv2.rectangle(debug_frame, (x1, y1), (x2, y2), color, 2)
+                    
+                    # Prepare label text for detection
+                    label = f"Person {i+1}: {confidence:.2f} (BBOX)"
+                
+                label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
+                
+                # Draw label background
+                cv2.rectangle(debug_frame, 
+                            (x1, y1 - label_size[1] - 10), 
+                            (x1 + label_size[0], y1), 
+                            color, -1)
+                
+                # Draw label text
+                cv2.putText(debug_frame, label, 
+                          (x1, y1 - 5), 
+                          cv2.FONT_HERSHEY_SIMPLEX, 0.6, 
+                          (255, 255, 255), 2)
+            
+            # Add title specific to this eye
+            frame_height, frame_width = debug_frame.shape[:2]
+            title = f"{eye_side} EYE: {len(detections)} detections"
+            cv2.putText(debug_frame, title, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)
+            
+            # Add mode information
+            mode_text = f"YOLO Mode: {self.mode.upper()}"
+            masks_available = sum(1 for d in detections if d.get('has_mask', False))
+            
+            if self.supports_segmentation and masks_available > 0:
+                summary = f"{len(detections)} detections → {masks_available} MASKS"
+            else:
+                summary = f"{len(detections)} detections → BOUNDING BOXES"
+            
+            cv2.putText(debug_frame, mode_text, 
+                      (10, 60), 
+                      cv2.FONT_HERSHEY_SIMPLEX, 0.8, 
+                      (0, 255, 255), 2)  # Yellow for mode
+            cv2.putText(debug_frame, summary, 
+                      (10, 90), 
+                      cv2.FONT_HERSHEY_SIMPLEX, 0.8, 
+                      (255, 255, 255), 2)
+            
+            # Add frame dimensions info
+            dims_info = f"Frame: {frame_width}x{frame_height}"
+            cv2.putText(debug_frame, dims_info, 
+                      (10, 120), 
+                      cv2.FONT_HERSHEY_SIMPLEX, 0.6, 
+                      (255, 255, 255), 2)
+            
+            # Save debug frame
+            success = cv2.imwrite(output_path, debug_frame)
+            if success:
+                logger.info(f"Saved {eye_side} eye debug frame to {output_path}")
+            else:
+                logger.error(f"Failed to save {eye_side} eye debug frame to {output_path}")
+            
+            return success
+            
+        except Exception as e:
+            logger.error(f"Error creating {eye_side} eye debug frame: {e}")
            return False