sbs working phase 1

This commit is contained in:
2025-07-30 18:07:26 -07:00
parent 6617acb1c9
commit 70044e1b10
8 changed files with 2417 additions and 7 deletions

View File

@@ -732,4 +732,300 @@ class YOLODetector:
except Exception as e:
logger.error(f"Error creating debug frame: {e}")
return False
def detect_humans_in_single_eye(self, frame: np.ndarray, eye_side: str) -> List[Dict[str, Any]]:
"""
Detect humans in a single eye frame (left or right).
Args:
frame: Input eye frame (BGR format)
eye_side: 'left' or 'right' eye
Returns:
List of human detection dictionaries for the single eye
"""
logger.info(f"Running YOLO detection on {eye_side} eye frame")
# Run standard detection on the eye frame
detections = self.detect_humans_in_frame(frame)
logger.info(f"YOLO {eye_side.upper()} Eye: Found {len(detections)} human detections")
for i, detection in enumerate(detections):
bbox = detection['bbox']
conf = detection['confidence']
has_mask = detection.get('has_mask', False)
logger.debug(f"YOLO {eye_side.upper()} Eye Detection {i+1}: bbox={bbox}, conf={conf:.3f}, has_mask={has_mask}")
return detections
def convert_eye_detections_to_sam2_prompts(self, detections: List[Dict[str, Any]],
eye_side: str) -> List[Dict[str, Any]]:
"""
Convert single eye detections to SAM2 prompts (always uses obj_id=1 for single eye processing).
Args:
detections: List of YOLO detection results for single eye
eye_side: 'left' or 'right' eye
Returns:
List of SAM2 prompt dictionaries with obj_id=1 for single eye processing
"""
if not detections:
logger.warning(f"No detections provided for {eye_side} eye SAM2 prompt conversion")
return []
logger.info(f"Converting {len(detections)} {eye_side} eye detections to SAM2 prompts")
prompts = []
# For single eye processing, always use obj_id=1 and take the best detection
best_detection = max(detections, key=lambda x: x['confidence'])
prompts.append({
'obj_id': 1, # Always use obj_id=1 for single eye processing
'bbox': best_detection['bbox'].copy(),
'confidence': best_detection['confidence']
})
logger.info(f"{eye_side.upper()} Eye: Converted best detection (conf={best_detection['confidence']:.3f}) to SAM2 Object 1")
return prompts
def has_any_detections(self, detections_list: List[List[Dict[str, Any]]]) -> bool:
"""
Check if any detections exist in a list of detection lists.
Args:
detections_list: List of detection lists (e.g., [left_detections, right_detections])
Returns:
True if any detections are found
"""
for detections in detections_list:
if detections:
return True
return False
def split_detections_by_eye(self, detections: List[Dict[str, Any]], frame_width: int) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
"""
Split VR180 detections into left and right eye detections with coordinate conversion.
Args:
detections: List of full-frame VR180 detections
frame_width: Width of the full VR180 frame
Returns:
Tuple of (left_eye_detections, right_eye_detections) with converted coordinates
"""
half_width = frame_width // 2
left_detections = []
right_detections = []
logger.info(f"Splitting {len(detections)} VR180 detections by eye (frame_width={frame_width}, half_width={half_width})")
for i, detection in enumerate(detections):
bbox = detection['bbox']
center_x = (bbox[0] + bbox[2]) / 2
logger.info(f"Detection {i}: bbox={bbox}, center_x={center_x:.1f}")
# Create a copy with converted coordinates
converted_detection = detection.copy()
converted_bbox = bbox.copy()
if center_x < half_width:
# Left eye detection - coordinates remain the same
# For segmentation mode, we also need to crop the mask to the left eye
if detection.get('has_mask', False) and 'mask' in detection:
original_mask = detection['mask']
# Crop mask to left half (keep original coordinates for now, will be handled in eye processing)
converted_detection['mask'] = original_mask
logger.info(f"Detection {i}: LEFT eye mask shape: {original_mask.shape}")
left_detections.append(converted_detection)
logger.info(f"Detection {i}: Assigned to LEFT eye, center_x={center_x:.1f} < {half_width}, bbox={bbox}")
else:
# Right eye detection - shift coordinates to start from 0
original_bbox = converted_bbox.copy()
converted_bbox[0] -= half_width # x1
converted_bbox[2] -= half_width # x2
# Ensure coordinates are within bounds
converted_bbox[0] = max(0, converted_bbox[0])
converted_bbox[2] = max(0, min(converted_bbox[2], half_width))
converted_detection['bbox'] = converted_bbox
# For segmentation mode, we also need to crop the mask to the right eye
if detection.get('has_mask', False) and 'mask' in detection:
original_mask = detection['mask']
# Crop mask to right half and shift coordinates
# Note: This is a simplified approach - the mask coordinates need to be handled properly
converted_detection['mask'] = original_mask # Will be properly handled in eye processing
logger.info(f"Detection {i}: RIGHT eye mask shape: {original_mask.shape}")
right_detections.append(converted_detection)
logger.info(f"Detection {i}: Assigned to RIGHT eye, center_x={center_x:.1f} >= {half_width}, original_bbox={original_bbox}, converted_bbox={converted_bbox}")
logger.info(f"Split result: {len(left_detections)} left eye, {len(right_detections)} right eye detections")
return left_detections, right_detections
def save_eye_debug_frames(self, left_frame: np.ndarray, right_frame: np.ndarray,
left_detections: List[Dict[str, Any]], right_detections: List[Dict[str, Any]],
left_output_path: str, right_output_path: str) -> Tuple[bool, bool]:
"""
Save debug frames for both left and right eye detections.
Args:
left_frame: Left eye frame
right_frame: Right eye frame
left_detections: Left eye detections
right_detections: Right eye detections
left_output_path: Output path for left eye debug frame
right_output_path: Output path for right eye debug frame
Returns:
Tuple of (left_success, right_success)
"""
logger.info(f"Saving eye-specific debug frames")
# Save left eye debug frame (eye-specific version)
left_success = self._save_single_eye_debug_frame(
left_frame, left_detections, left_output_path, "LEFT"
)
# Save right eye debug frame (eye-specific version)
right_success = self._save_single_eye_debug_frame(
right_frame, right_detections, right_output_path, "RIGHT"
)
if left_success:
logger.info(f"Saved left eye debug frame: {left_output_path}")
if right_success:
logger.info(f"Saved right eye debug frame: {right_output_path}")
return left_success, right_success
def _save_single_eye_debug_frame(self, frame: np.ndarray, detections: List[Dict[str, Any]],
output_path: str, eye_side: str) -> bool:
"""
Save a debug frame for a single eye with eye-specific visualizations.
Args:
frame: Single eye frame (BGR format from OpenCV)
detections: List of detection dictionaries for this eye
output_path: Path to save the debug image
eye_side: "LEFT" or "RIGHT"
Returns:
True if saved successfully
"""
try:
debug_frame = frame.copy()
# Draw masks or bounding boxes for each detection
for i, detection in enumerate(detections):
bbox = detection['bbox']
confidence = detection['confidence']
has_mask = detection.get('has_mask', False)
# Extract coordinates
x1, y1, x2, y2 = map(int, bbox)
# Choose color based on confidence (green for high, yellow for medium, red for low)
if confidence >= 0.8:
color = (0, 255, 0) # Green
elif confidence >= 0.6:
color = (0, 255, 255) # Yellow
else:
color = (0, 0, 255) # Red
if has_mask and 'mask' in detection:
# Draw segmentation mask
mask = detection['mask']
# Resize mask to match frame if needed
if mask.shape != debug_frame.shape[:2]:
mask = cv2.resize(mask.astype(np.float32), (debug_frame.shape[1], debug_frame.shape[0]), interpolation=cv2.INTER_NEAREST)
mask = mask > 0.5
mask = mask.astype(bool)
# Apply colored overlay with transparency
overlay = debug_frame.copy()
overlay[mask] = color
cv2.addWeighted(overlay, 0.3, debug_frame, 0.7, 0, debug_frame)
# Draw mask outline
contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(debug_frame, contours, -1, color, 2)
# Prepare label text for segmentation
label = f"Person {i+1}: {confidence:.2f} (MASK)"
else:
# Draw bounding box (detection mode or no mask available)
cv2.rectangle(debug_frame, (x1, y1), (x2, y2), color, 2)
# Prepare label text for detection
label = f"Person {i+1}: {confidence:.2f} (BBOX)"
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
# Draw label background
cv2.rectangle(debug_frame,
(x1, y1 - label_size[1] - 10),
(x1 + label_size[0], y1),
color, -1)
# Draw label text
cv2.putText(debug_frame, label,
(x1, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.6,
(255, 255, 255), 2)
# Add title specific to this eye
frame_height, frame_width = debug_frame.shape[:2]
title = f"{eye_side} EYE: {len(detections)} detections"
cv2.putText(debug_frame, title, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)
# Add mode information
mode_text = f"YOLO Mode: {self.mode.upper()}"
masks_available = sum(1 for d in detections if d.get('has_mask', False))
if self.supports_segmentation and masks_available > 0:
summary = f"{len(detections)} detections → {masks_available} MASKS"
else:
summary = f"{len(detections)} detections → BOUNDING BOXES"
cv2.putText(debug_frame, mode_text,
(10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.8,
(0, 255, 255), 2) # Yellow for mode
cv2.putText(debug_frame, summary,
(10, 90),
cv2.FONT_HERSHEY_SIMPLEX, 0.8,
(255, 255, 255), 2)
# Add frame dimensions info
dims_info = f"Frame: {frame_width}x{frame_height}"
cv2.putText(debug_frame, dims_info,
(10, 120),
cv2.FONT_HERSHEY_SIMPLEX, 0.6,
(255, 255, 255), 2)
# Save debug frame
success = cv2.imwrite(output_path, debug_frame)
if success:
logger.info(f"Saved {eye_side} eye debug frame to {output_path}")
else:
logger.error(f"Failed to save {eye_side} eye debug frame to {output_path}")
return success
except Exception as e:
logger.error(f"Error creating {eye_side} eye debug frame: {e}")
return False