stage 1 working

2025-07-27 12:11:36 -07:00
parent ed08ef2b4b
commit 46363a8a11
6 changed files with 993 additions and 51 deletions
--- a/core/yolo_detector.py
+++ b/core/yolo_detector.py
@@ -13,17 +13,17 @@ from ultralytics import YOLO
 logger = logging.getLogger(__name__)

 class YOLODetector:
-    \"\"\"Handles YOLO-based human detection for video segments.\"\"\"
+    """Handles YOLO-based human detection for video segments."""
    
    def __init__(self, model_path: str, confidence_threshold: float = 0.6, human_class_id: int = 0):
-        \"\"\"
+        """
        Initialize YOLO detector.
        
        Args:
            model_path: Path to YOLO model weights
            confidence_threshold: Detection confidence threshold
            human_class_id: COCO class ID for humans (0 = person)
-        \"\"\"
+        """
        self.model_path = model_path
        self.confidence_threshold = confidence_threshold
        self.human_class_id = human_class_id
@@ -31,13 +31,13 @@ class YOLODetector:
        # Load YOLO model
        try:
            self.model = YOLO(model_path)
-            logger.info(f\"Loaded YOLO model from {model_path}\")
+            logger.info(f"Loaded YOLO model from {model_path}")
        except Exception as e:
-            logger.error(f\"Failed to load YOLO model: {e}\")
+            logger.error(f"Failed to load YOLO model: {e}")
            raise
    
    def detect_humans_in_frame(self, frame: np.ndarray) -> List[Dict[str, Any]]:
-        \"\"\"
+        """
        Detect humans in a single frame using YOLO.
        
        Args:
@@ -45,7 +45,7 @@ class YOLODetector:
            
        Returns:
            List of human detection dictionaries with bbox and confidence
-        \"\"\"
+        """
        # Run YOLO detection
        results = self.model(frame, conf=self.confidence_threshold, verbose=False)
        
@@ -70,12 +70,12 @@ class YOLODetector:
                            'confidence': conf
                        })
                        
-                        logger.debug(f\"Detected human with confidence {conf:.2f} at {coords}\")
+                        logger.debug(f"Detected human with confidence {conf:.2f} at {coords}")
        
        return human_detections
    
    def detect_humans_in_video_first_frame(self, video_path: str, scale: float = 1.0) -> List[Dict[str, Any]]:
-        \"\"\"
+        """
        Detect humans in the first frame of a video.
        
        Args:
@@ -84,21 +84,21 @@ class YOLODetector:
            
        Returns:
            List of human detection dictionaries
-        \"\"\"
+        """
        if not os.path.exists(video_path):
-            logger.error(f\"Video file not found: {video_path}\")
+            logger.error(f"Video file not found: {video_path}")
            return []
        
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
-            logger.error(f\"Could not open video: {video_path}\")
+            logger.error(f"Could not open video: {video_path}")
            return []
        
        ret, frame = cap.read()
        cap.release()
        
        if not ret:
-            logger.error(f\"Could not read first frame from: {video_path}\")
+            logger.error(f"Could not read first frame from: {video_path}")
            return []
        
        # Scale frame if needed
@@ -108,7 +108,7 @@ class YOLODetector:
        return self.detect_humans_in_frame(frame)
    
    def save_detections_to_file(self, detections: List[Dict[str, Any]], output_path: str) -> bool:
-        \"\"\"
+        """
        Save detection results to file.
        
        Args:
@@ -117,26 +117,26 @@ class YOLODetector:
            
        Returns:
            True if saved successfully
-        \"\"\"
+        """
        try:
            with open(output_path, 'w') as f:
-                f.write(\"# YOLO Human Detections\\n\")
+                f.write("# YOLO Human Detections\\n")
                if detections:
                    for detection in detections:
                        bbox = detection['bbox']
                        conf = detection['confidence']
-                        f.write(f\"{bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]},{conf}\\n\")
-                    logger.info(f\"Saved {len(detections)} detections to {output_path}\")
+                        f.write(f"{bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]},{conf}\\n")
+                    logger.info(f"Saved {len(detections)} detections to {output_path}")
                else:
-                    f.write(\"# No humans detected\\n\")
-                    logger.info(f\"Saved empty detection file to {output_path}\")
+                    f.write("# No humans detected\\n")
+                    logger.info(f"Saved empty detection file to {output_path}")
            return True
        except Exception as e:
-            logger.error(f\"Failed to save detections to {output_path}: {e}\")
+            logger.error(f"Failed to save detections to {output_path}: {e}")
            return False
    
    def load_detections_from_file(self, file_path: str) -> List[Dict[str, Any]]:
-        \"\"\"
+        """
        Load detection results from file.
        
        Args:
@@ -144,11 +144,11 @@ class YOLODetector:
            
        Returns:
            List of detection dictionaries
-        \"\"\"
+        """
        detections = []
        
        if not os.path.exists(file_path):
-            logger.warning(f\"Detection file not found: {file_path}\")
+            logger.warning(f"Detection file not found: {file_path}")
            return detections
        
        try:
@@ -170,18 +170,18 @@ class YOLODetector:
                                'confidence': conf
                            })
                        except ValueError:
-                            logger.warning(f\"Invalid detection line: {line}\")
+                            logger.warning(f"Invalid detection line: {line}")
                            continue
            
-            logger.info(f\"Loaded {len(detections)} detections from {file_path}\")
+            logger.info(f"Loaded {len(detections)} detections from {file_path}")
        except Exception as e:
-            logger.error(f\"Failed to load detections from {file_path}: {e}\")
+            logger.error(f"Failed to load detections from {file_path}: {e}")
        
        return detections
    
    def process_segments_batch(self, segments_info: List[dict], detect_segments: List[int], 
                              scale: float = 0.5) -> Dict[int, List[Dict[str, Any]]]:
-        \"\"\"
+        """
        Process multiple segments for human detection.
        
        Args:
@@ -191,7 +191,7 @@ class YOLODetector:
            
        Returns:
            Dictionary mapping segment index to detection results
-        \"\"\"
+        """
        results = {}
        
        for segment_info in segments_info:
@@ -202,17 +202,17 @@ class YOLODetector:
                continue
            
            video_path = segment_info['video_file']
-            detection_file = os.path.join(segment_info['directory'], \"yolo_detections\")
+            detection_file = os.path.join(segment_info['directory'], "yolo_detections")
            
            # Skip if already processed
            if os.path.exists(detection_file):
-                logger.info(f\"Segment {segment_idx} already has detections, skipping\")
+                logger.info(f"Segment {segment_idx} already has detections, skipping")
                detections = self.load_detections_from_file(detection_file)
                results[segment_idx] = detections
                continue
            
            # Run detection
-            logger.info(f\"Processing segment {segment_idx} for human detection\")
+            logger.info(f"Processing segment {segment_idx} for human detection")
            detections = self.detect_humans_in_video_first_frame(video_path, scale)
            
            # Save results
@@ -223,7 +223,7 @@ class YOLODetector:
    
    def convert_detections_to_sam2_prompts(self, detections: List[Dict[str, Any]], 
                                         frame_width: int) -> List[Dict[str, Any]]:
-        \"\"\"
+        """
        Convert YOLO detections to SAM2-compatible prompts for stereo video.
        
        Args:
@@ -232,7 +232,7 @@ class YOLODetector:
            
        Returns:
            List of SAM2 prompt dictionaries with obj_id and bbox
-        \"\"\"
+        """
        if not detections:
            return []
        
@@ -282,5 +282,5 @@ class YOLODetector:
                'confidence': detection['confidence']
            })
        
-        logger.debug(f\"Converted {len(detections)} detections to {len(prompts)} SAM2 prompts\")
+        logger.debug(f"Converted {len(detections)} detections to {len(prompts)} SAM2 prompts")
        return prompts