stage 1 working
This commit is contained in:
@@ -13,17 +13,17 @@ from ultralytics import YOLO
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class YOLODetector:
|
||||
\"\"\"Handles YOLO-based human detection for video segments.\"\"\"
|
||||
"""Handles YOLO-based human detection for video segments."""
|
||||
|
||||
def __init__(self, model_path: str, confidence_threshold: float = 0.6, human_class_id: int = 0):
|
||||
\"\"\"
|
||||
"""
|
||||
Initialize YOLO detector.
|
||||
|
||||
Args:
|
||||
model_path: Path to YOLO model weights
|
||||
confidence_threshold: Detection confidence threshold
|
||||
human_class_id: COCO class ID for humans (0 = person)
|
||||
\"\"\"
|
||||
"""
|
||||
self.model_path = model_path
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.human_class_id = human_class_id
|
||||
@@ -31,13 +31,13 @@ class YOLODetector:
|
||||
# Load YOLO model
|
||||
try:
|
||||
self.model = YOLO(model_path)
|
||||
logger.info(f\"Loaded YOLO model from {model_path}\")
|
||||
logger.info(f"Loaded YOLO model from {model_path}")
|
||||
except Exception as e:
|
||||
logger.error(f\"Failed to load YOLO model: {e}\")
|
||||
logger.error(f"Failed to load YOLO model: {e}")
|
||||
raise
|
||||
|
||||
def detect_humans_in_frame(self, frame: np.ndarray) -> List[Dict[str, Any]]:
|
||||
\"\"\"
|
||||
"""
|
||||
Detect humans in a single frame using YOLO.
|
||||
|
||||
Args:
|
||||
@@ -45,7 +45,7 @@ class YOLODetector:
|
||||
|
||||
Returns:
|
||||
List of human detection dictionaries with bbox and confidence
|
||||
\"\"\"
|
||||
"""
|
||||
# Run YOLO detection
|
||||
results = self.model(frame, conf=self.confidence_threshold, verbose=False)
|
||||
|
||||
@@ -70,12 +70,12 @@ class YOLODetector:
|
||||
'confidence': conf
|
||||
})
|
||||
|
||||
logger.debug(f\"Detected human with confidence {conf:.2f} at {coords}\")
|
||||
logger.debug(f"Detected human with confidence {conf:.2f} at {coords}")
|
||||
|
||||
return human_detections
|
||||
|
||||
def detect_humans_in_video_first_frame(self, video_path: str, scale: float = 1.0) -> List[Dict[str, Any]]:
|
||||
\"\"\"
|
||||
"""
|
||||
Detect humans in the first frame of a video.
|
||||
|
||||
Args:
|
||||
@@ -84,21 +84,21 @@ class YOLODetector:
|
||||
|
||||
Returns:
|
||||
List of human detection dictionaries
|
||||
\"\"\"
|
||||
"""
|
||||
if not os.path.exists(video_path):
|
||||
logger.error(f\"Video file not found: {video_path}\")
|
||||
logger.error(f"Video file not found: {video_path}")
|
||||
return []
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
logger.error(f\"Could not open video: {video_path}\")
|
||||
logger.error(f"Could not open video: {video_path}")
|
||||
return []
|
||||
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if not ret:
|
||||
logger.error(f\"Could not read first frame from: {video_path}\")
|
||||
logger.error(f"Could not read first frame from: {video_path}")
|
||||
return []
|
||||
|
||||
# Scale frame if needed
|
||||
@@ -108,7 +108,7 @@ class YOLODetector:
|
||||
return self.detect_humans_in_frame(frame)
|
||||
|
||||
def save_detections_to_file(self, detections: List[Dict[str, Any]], output_path: str) -> bool:
|
||||
\"\"\"
|
||||
"""
|
||||
Save detection results to file.
|
||||
|
||||
Args:
|
||||
@@ -117,26 +117,26 @@ class YOLODetector:
|
||||
|
||||
Returns:
|
||||
True if saved successfully
|
||||
\"\"\"
|
||||
"""
|
||||
try:
|
||||
with open(output_path, 'w') as f:
|
||||
f.write(\"# YOLO Human Detections\\n\")
|
||||
f.write("# YOLO Human Detections\\n")
|
||||
if detections:
|
||||
for detection in detections:
|
||||
bbox = detection['bbox']
|
||||
conf = detection['confidence']
|
||||
f.write(f\"{bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]},{conf}\\n\")
|
||||
logger.info(f\"Saved {len(detections)} detections to {output_path}\")
|
||||
f.write(f"{bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]},{conf}\\n")
|
||||
logger.info(f"Saved {len(detections)} detections to {output_path}")
|
||||
else:
|
||||
f.write(\"# No humans detected\\n\")
|
||||
logger.info(f\"Saved empty detection file to {output_path}\")
|
||||
f.write("# No humans detected\\n")
|
||||
logger.info(f"Saved empty detection file to {output_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f\"Failed to save detections to {output_path}: {e}\")
|
||||
logger.error(f"Failed to save detections to {output_path}: {e}")
|
||||
return False
|
||||
|
||||
def load_detections_from_file(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
\"\"\"
|
||||
"""
|
||||
Load detection results from file.
|
||||
|
||||
Args:
|
||||
@@ -144,11 +144,11 @@ class YOLODetector:
|
||||
|
||||
Returns:
|
||||
List of detection dictionaries
|
||||
\"\"\"
|
||||
"""
|
||||
detections = []
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
logger.warning(f\"Detection file not found: {file_path}\")
|
||||
logger.warning(f"Detection file not found: {file_path}")
|
||||
return detections
|
||||
|
||||
try:
|
||||
@@ -170,18 +170,18 @@ class YOLODetector:
|
||||
'confidence': conf
|
||||
})
|
||||
except ValueError:
|
||||
logger.warning(f\"Invalid detection line: {line}\")
|
||||
logger.warning(f"Invalid detection line: {line}")
|
||||
continue
|
||||
|
||||
logger.info(f\"Loaded {len(detections)} detections from {file_path}\")
|
||||
logger.info(f"Loaded {len(detections)} detections from {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f\"Failed to load detections from {file_path}: {e}\")
|
||||
logger.error(f"Failed to load detections from {file_path}: {e}")
|
||||
|
||||
return detections
|
||||
|
||||
def process_segments_batch(self, segments_info: List[dict], detect_segments: List[int],
|
||||
scale: float = 0.5) -> Dict[int, List[Dict[str, Any]]]:
|
||||
\"\"\"
|
||||
"""
|
||||
Process multiple segments for human detection.
|
||||
|
||||
Args:
|
||||
@@ -191,7 +191,7 @@ class YOLODetector:
|
||||
|
||||
Returns:
|
||||
Dictionary mapping segment index to detection results
|
||||
\"\"\"
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for segment_info in segments_info:
|
||||
@@ -202,17 +202,17 @@ class YOLODetector:
|
||||
continue
|
||||
|
||||
video_path = segment_info['video_file']
|
||||
detection_file = os.path.join(segment_info['directory'], \"yolo_detections\")
|
||||
detection_file = os.path.join(segment_info['directory'], "yolo_detections")
|
||||
|
||||
# Skip if already processed
|
||||
if os.path.exists(detection_file):
|
||||
logger.info(f\"Segment {segment_idx} already has detections, skipping\")
|
||||
logger.info(f"Segment {segment_idx} already has detections, skipping")
|
||||
detections = self.load_detections_from_file(detection_file)
|
||||
results[segment_idx] = detections
|
||||
continue
|
||||
|
||||
# Run detection
|
||||
logger.info(f\"Processing segment {segment_idx} for human detection\")
|
||||
logger.info(f"Processing segment {segment_idx} for human detection")
|
||||
detections = self.detect_humans_in_video_first_frame(video_path, scale)
|
||||
|
||||
# Save results
|
||||
@@ -223,7 +223,7 @@ class YOLODetector:
|
||||
|
||||
def convert_detections_to_sam2_prompts(self, detections: List[Dict[str, Any]],
|
||||
frame_width: int) -> List[Dict[str, Any]]:
|
||||
\"\"\"
|
||||
"""
|
||||
Convert YOLO detections to SAM2-compatible prompts for stereo video.
|
||||
|
||||
Args:
|
||||
@@ -232,7 +232,7 @@ class YOLODetector:
|
||||
|
||||
Returns:
|
||||
List of SAM2 prompt dictionaries with obj_id and bbox
|
||||
\"\"\"
|
||||
"""
|
||||
if not detections:
|
||||
return []
|
||||
|
||||
@@ -282,5 +282,5 @@ class YOLODetector:
|
||||
'confidence': detection['confidence']
|
||||
})
|
||||
|
||||
logger.debug(f\"Converted {len(detections)} detections to {len(prompts)} SAM2 prompts\")
|
||||
logger.debug(f"Converted {len(detections)} detections to {len(prompts)} SAM2 prompts")
|
||||
return prompts
|
||||
Reference in New Issue
Block a user