stage 1 working

This commit is contained in:
2025-07-27 12:11:36 -07:00
parent ed08ef2b4b
commit 46363a8a11
6 changed files with 993 additions and 51 deletions

View File

@@ -7,7 +7,7 @@ import os
import subprocess
import logging
from typing import List, Tuple
from ..utils.file_utils import ensure_directory, get_video_file_name
from utils.file_utils import ensure_directory, get_video_file_name
logger = logging.getLogger(__name__)

View File

@@ -13,17 +13,17 @@ from ultralytics import YOLO
logger = logging.getLogger(__name__)
class YOLODetector:
\"\"\"Handles YOLO-based human detection for video segments.\"\"\"
"""Handles YOLO-based human detection for video segments."""
def __init__(self, model_path: str, confidence_threshold: float = 0.6, human_class_id: int = 0):
\"\"\"
"""
Initialize YOLO detector.
Args:
model_path: Path to YOLO model weights
confidence_threshold: Detection confidence threshold
human_class_id: COCO class ID for humans (0 = person)
\"\"\"
"""
self.model_path = model_path
self.confidence_threshold = confidence_threshold
self.human_class_id = human_class_id
@@ -31,13 +31,13 @@ class YOLODetector:
# Load YOLO model
try:
self.model = YOLO(model_path)
logger.info(f\"Loaded YOLO model from {model_path}\")
logger.info(f"Loaded YOLO model from {model_path}")
except Exception as e:
logger.error(f\"Failed to load YOLO model: {e}\")
logger.error(f"Failed to load YOLO model: {e}")
raise
def detect_humans_in_frame(self, frame: np.ndarray) -> List[Dict[str, Any]]:
\"\"\"
"""
Detect humans in a single frame using YOLO.
Args:
@@ -45,7 +45,7 @@ class YOLODetector:
Returns:
List of human detection dictionaries with bbox and confidence
\"\"\"
"""
# Run YOLO detection
results = self.model(frame, conf=self.confidence_threshold, verbose=False)
@@ -70,12 +70,12 @@ class YOLODetector:
'confidence': conf
})
logger.debug(f\"Detected human with confidence {conf:.2f} at {coords}\")
logger.debug(f"Detected human with confidence {conf:.2f} at {coords}")
return human_detections
def detect_humans_in_video_first_frame(self, video_path: str, scale: float = 1.0) -> List[Dict[str, Any]]:
\"\"\"
"""
Detect humans in the first frame of a video.
Args:
@@ -84,21 +84,21 @@ class YOLODetector:
Returns:
List of human detection dictionaries
\"\"\"
"""
if not os.path.exists(video_path):
logger.error(f\"Video file not found: {video_path}\")
logger.error(f"Video file not found: {video_path}")
return []
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
logger.error(f\"Could not open video: {video_path}\")
logger.error(f"Could not open video: {video_path}")
return []
ret, frame = cap.read()
cap.release()
if not ret:
logger.error(f\"Could not read first frame from: {video_path}\")
logger.error(f"Could not read first frame from: {video_path}")
return []
# Scale frame if needed
@@ -108,7 +108,7 @@ class YOLODetector:
return self.detect_humans_in_frame(frame)
def save_detections_to_file(self, detections: List[Dict[str, Any]], output_path: str) -> bool:
\"\"\"
"""
Save detection results to file.
Args:
@@ -117,26 +117,26 @@ class YOLODetector:
Returns:
True if saved successfully
\"\"\"
"""
try:
with open(output_path, 'w') as f:
f.write(\"# YOLO Human Detections\\n\")
f.write("# YOLO Human Detections\\n")
if detections:
for detection in detections:
bbox = detection['bbox']
conf = detection['confidence']
f.write(f\"{bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]},{conf}\\n\")
logger.info(f\"Saved {len(detections)} detections to {output_path}\")
f.write(f"{bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]},{conf}\\n")
logger.info(f"Saved {len(detections)} detections to {output_path}")
else:
f.write(\"# No humans detected\\n\")
logger.info(f\"Saved empty detection file to {output_path}\")
f.write("# No humans detected\\n")
logger.info(f"Saved empty detection file to {output_path}")
return True
except Exception as e:
logger.error(f\"Failed to save detections to {output_path}: {e}\")
logger.error(f"Failed to save detections to {output_path}: {e}")
return False
def load_detections_from_file(self, file_path: str) -> List[Dict[str, Any]]:
\"\"\"
"""
Load detection results from file.
Args:
@@ -144,11 +144,11 @@ class YOLODetector:
Returns:
List of detection dictionaries
\"\"\"
"""
detections = []
if not os.path.exists(file_path):
logger.warning(f\"Detection file not found: {file_path}\")
logger.warning(f"Detection file not found: {file_path}")
return detections
try:
@@ -170,18 +170,18 @@ class YOLODetector:
'confidence': conf
})
except ValueError:
logger.warning(f\"Invalid detection line: {line}\")
logger.warning(f"Invalid detection line: {line}")
continue
logger.info(f\"Loaded {len(detections)} detections from {file_path}\")
logger.info(f"Loaded {len(detections)} detections from {file_path}")
except Exception as e:
logger.error(f\"Failed to load detections from {file_path}: {e}\")
logger.error(f"Failed to load detections from {file_path}: {e}")
return detections
def process_segments_batch(self, segments_info: List[dict], detect_segments: List[int],
scale: float = 0.5) -> Dict[int, List[Dict[str, Any]]]:
\"\"\"
"""
Process multiple segments for human detection.
Args:
@@ -191,7 +191,7 @@ class YOLODetector:
Returns:
Dictionary mapping segment index to detection results
\"\"\"
"""
results = {}
for segment_info in segments_info:
@@ -202,17 +202,17 @@ class YOLODetector:
continue
video_path = segment_info['video_file']
detection_file = os.path.join(segment_info['directory'], \"yolo_detections\")
detection_file = os.path.join(segment_info['directory'], "yolo_detections")
# Skip if already processed
if os.path.exists(detection_file):
logger.info(f\"Segment {segment_idx} already has detections, skipping\")
logger.info(f"Segment {segment_idx} already has detections, skipping")
detections = self.load_detections_from_file(detection_file)
results[segment_idx] = detections
continue
# Run detection
logger.info(f\"Processing segment {segment_idx} for human detection\")
logger.info(f"Processing segment {segment_idx} for human detection")
detections = self.detect_humans_in_video_first_frame(video_path, scale)
# Save results
@@ -223,7 +223,7 @@ class YOLODetector:
def convert_detections_to_sam2_prompts(self, detections: List[Dict[str, Any]],
frame_width: int) -> List[Dict[str, Any]]:
\"\"\"
"""
Convert YOLO detections to SAM2-compatible prompts for stereo video.
Args:
@@ -232,7 +232,7 @@ class YOLODetector:
Returns:
List of SAM2 prompt dictionaries with obj_id and bbox
\"\"\"
"""
if not detections:
return []
@@ -282,5 +282,5 @@ class YOLODetector:
'confidence': detection['confidence']
})
logger.debug(f\"Converted {len(detections)} detections to {len(prompts)} SAM2 prompts\")
logger.debug(f"Converted {len(detections)} detections to {len(prompts)} SAM2 prompts")
return prompts