stereo mask working

This commit is contained in:
2025-07-31 11:13:31 -07:00
parent 0057017ac4
commit b97a3752a7
8 changed files with 1247 additions and 206 deletions

210
main.py
View File

@@ -681,138 +681,41 @@ async def main_async():
previous_masks = None
if use_detections:
# Run YOLO detection on current segment
logger.info(f"Running YOLO detection on segment {segment_idx}")
detection_file = os.path.join(segment_info['directory'], "yolo_detections")
# Run YOLO stereo detection and matching on current segment
logger.info(f"Running stereo pair detection on segment {segment_idx}")
# Check if detection already exists
if os.path.exists(detection_file):
logger.info(f"Loading existing YOLO detections for segment {segment_idx}")
detections = detector.load_detections_from_file(detection_file)
else:
# Run YOLO detection on first frame
detections = detector.detect_humans_in_video_first_frame(
segment_info['video_file'],
scale=config.get_inference_scale()
)
# Save detections for future runs
detector.save_detections_to_file(detections, detection_file)
if detections:
total_humans_detected += len(detections)
logger.info(f"Found {len(detections)} humans in segment {segment_idx}")
# Get frame width from video
cap = cv2.VideoCapture(segment_info['video_file'])
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
cap.release()
yolo_prompts = detector.convert_detections_to_sam2_prompts(
detections, frame_width
)
# If no right eye detections found, run debug analysis with lower confidence
half_frame_width = frame_width // 2
right_eye_detections = [d for d in detections if (d['bbox'][0] + d['bbox'][2]) / 2 >= half_frame_width]
if len(right_eye_detections) == 0 and config.get('advanced.save_yolo_debug_frames', False):
logger.info(f"VR180 Debug: No right eye detections found, running lower confidence analysis...")
# Load first frame for debug analysis
cap = cv2.VideoCapture(segment_info['video_file'])
ret, debug_frame = cap.read()
cap.release()
if ret:
# Scale frame to match detection scale
if config.get_inference_scale() != 1.0:
scale = config.get_inference_scale()
debug_frame = cv2.resize(debug_frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
# Run debug detection with lower confidence
debug_detections = detector.debug_detect_with_lower_confidence(debug_frame, debug_confidence=0.3)
# Analyze where these lower confidence detections are
debug_right_eye = [d for d in debug_detections if (d['bbox'][0] + d['bbox'][2]) / 2 >= half_frame_width]
if len(debug_right_eye) > 0:
logger.warning(f"VR180 Debug: Found {len(debug_right_eye)} right eye detections with lower confidence!")
for i, det in enumerate(debug_right_eye):
logger.warning(f"VR180 Debug: Right eye detection {i+1}: conf={det['confidence']:.3f}, bbox={det['bbox']}")
logger.warning(f"VR180 Debug: Consider lowering yolo_confidence from {config.get_yolo_confidence()} to 0.3-0.4")
else:
logger.info(f"VR180 Debug: No right eye detections found even with confidence 0.3")
logger.info(f"VR180 Debug: This confirms person is not visible in right eye view")
logger.info(f"Pipeline Debug: Segment {segment_idx} - Generated {len(yolo_prompts)} SAM2 prompts from {len(detections)} YOLO detections")
# Save debug frame with detections visualized (if enabled)
if config.get('advanced.save_yolo_debug_frames', False):
debug_frame_path = os.path.join(segment_info['directory'], "yolo_debug.jpg")
# Load first frame for debug visualization
cap = cv2.VideoCapture(segment_info['video_file'])
ret, debug_frame = cap.read()
cap.release()
if ret:
# Scale frame to match detection scale
if config.get_inference_scale() != 1.0:
scale = config.get_inference_scale()
debug_frame = cv2.resize(debug_frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
detector.save_debug_frame_with_detections(debug_frame, detections, debug_frame_path, yolo_prompts)
# Load the first frame for detection
cap = cv2.VideoCapture(segment_info['video_file'])
ret, frame = cap.read()
cap.release()
if not ret:
logger.error(f"Could not read first frame of segment {segment_idx}")
continue
# Scale frame if needed
if config.get_inference_scale() != 1.0:
frame = cv2.resize(frame, None, fx=config.get_inference_scale(), fy=config.get_inference_scale(), interpolation=cv2.INTER_LINEAR)
yolo_prompts = detector.detect_and_match_stereo_pairs(
frame,
config.get_confidence_reduction_factor(),
config.get_stereo_iou_threshold(),
segment_info,
config.get('advanced.save_yolo_debug_frames', True)
)
if not yolo_prompts:
logger.warning(f"No valid stereo pairs found for segment {segment_idx}. Attempting to use previous segment's mask.")
if segment_idx > 0:
prev_segment_dir = segments_info[segment_idx - 1]['directory']
previous_masks = sam2_processor.load_previous_segment_mask(prev_segment_dir)
if previous_masks:
logger.info(f"Using masks from segment {segment_idx - 1} as fallback.")
else:
logger.warning(f"Could not load frame for debug visualization in segment {segment_idx}")
# Check if we have YOLO masks for debug visualization
has_yolo_masks = False
if detections and detector.supports_segmentation:
has_yolo_masks = any(d.get('has_mask', False) for d in detections)
# Generate first frame masks debug (SAM2 or YOLO)
first_frame_debug_path = os.path.join(segment_info['directory'], "first_frame_detection.jpg")
if has_yolo_masks:
logger.info(f"Pipeline Debug: Generating YOLO first frame masks for segment {segment_idx}")
# Create YOLO mask debug visualization
create_yolo_mask_debug_frame(detections, segment_info['video_file'], first_frame_debug_path, config.get_inference_scale())
else:
logger.info(f"Pipeline Debug: Generating SAM2 first frame masks for segment {segment_idx}")
sam2_processor.generate_first_frame_debug_masks(
segment_info['video_file'],
yolo_prompts,
first_frame_debug_path,
config.get_inference_scale()
)
else:
logger.warning(f"No humans detected in segment {segment_idx}")
# Save debug frame even when no detections (if enabled)
if config.get('advanced.save_yolo_debug_frames', False):
debug_frame_path = os.path.join(segment_info['directory'], "yolo_debug_no_detections.jpg")
# Load first frame for debug visualization
cap = cv2.VideoCapture(segment_info['video_file'])
ret, debug_frame = cap.read()
cap.release()
if ret:
# Scale frame to match detection scale
if config.get_inference_scale() != 1.0:
scale = config.get_inference_scale()
debug_frame = cv2.resize(debug_frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
# Add "No detections" text overlay
cv2.putText(debug_frame, "YOLO: No humans detected",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
(0, 0, 255), 2) # Red text
cv2.imwrite(debug_frame_path, debug_frame)
logger.info(f"Saved no-detection debug frame to {debug_frame_path}")
else:
logger.warning(f"Could not load frame for no-detection debug visualization in segment {segment_idx}")
logger.error(f"Fallback failed: No previous mask found for segment {segment_idx}.")
else:
logger.error("Cannot use fallback for the first segment.")
elif segment_idx > 0:
# Try to load previous segment mask
for j in range(segment_idx - 1, -1, -1):
@@ -826,43 +729,20 @@ async def main_async():
logger.error(f"No prompts or previous masks available for segment {segment_idx}")
continue
# Check if we have YOLO masks and can skip SAM2 (recheck in case detections were loaded from file)
if not 'has_yolo_masks' in locals():
has_yolo_masks = False
if detections and detector.supports_segmentation:
has_yolo_masks = any(d.get('has_mask', False) for d in detections)
# Check if we have YOLO masks from the stereo pair matching and can use them as initial masks for SAM2
if yolo_prompts and detector.supports_segmentation:
logger.info(f"Pipeline Debug: YOLO segmentation provided matched stereo masks - using as SAM2 initial masks.")
if has_yolo_masks:
logger.info(f"Pipeline Debug: YOLO segmentation provided masks - using as SAM2 initial masks for segment {segment_idx}")
# Convert the prompts (which contain masks) into the initial_masks format for SAM2
initial_masks = {prompt['obj_id']: prompt['mask'] for prompt in yolo_prompts if 'mask' in prompt}
# Convert YOLO masks to initial masks for SAM2
cap = cv2.VideoCapture(segment_info['video_file'])
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
# Convert YOLO masks to the format expected by SAM2 add_previous_masks_to_predictor
yolo_masks_dict = {}
for i, detection in enumerate(detections[:2]): # Up to 2 objects
if detection.get('has_mask', False):
mask = detection['mask']
# Resize mask to match inference scale
if config.get_inference_scale() != 1.0:
scale = config.get_inference_scale()
scaled_height = int(frame_height * scale)
scaled_width = int(frame_width * scale)
mask = cv2.resize(mask.astype(np.float32), (scaled_width, scaled_height), interpolation=cv2.INTER_NEAREST)
mask = mask > 0.5
obj_id = i + 1 # Sequential object IDs
yolo_masks_dict[obj_id] = mask.astype(bool)
logger.info(f"Pipeline Debug: YOLO mask for Object {obj_id} - shape: {mask.shape}, pixels: {np.sum(mask)}")
logger.info(f"Pipeline Debug: Using YOLO masks as SAM2 initial masks - {len(yolo_masks_dict)} objects")
# Use traditional SAM2 pipeline with YOLO masks as initial masks
previous_masks = yolo_masks_dict
yolo_prompts = None # Don't use bounding box prompts when we have masks
if initial_masks:
# We are providing initial masks, so we should not provide bbox prompts
previous_masks = initial_masks
yolo_prompts = None
logger.info(f"Pipeline Debug: Using {len(previous_masks)} YOLO masks as SAM2 initial masks.")
else:
logger.warning("YOLO segmentation mode is on, but no masks were found in the final prompts.")
# Debug what we're passing to SAM2
if yolo_prompts: