stereo mask working
This commit is contained in:
210
main.py
210
main.py
@@ -681,138 +681,41 @@ async def main_async():
|
||||
previous_masks = None
|
||||
|
||||
if use_detections:
|
||||
# Run YOLO detection on current segment
|
||||
logger.info(f"Running YOLO detection on segment {segment_idx}")
|
||||
detection_file = os.path.join(segment_info['directory'], "yolo_detections")
|
||||
# Run YOLO stereo detection and matching on current segment
|
||||
logger.info(f"Running stereo pair detection on segment {segment_idx}")
|
||||
|
||||
# Check if detection already exists
|
||||
if os.path.exists(detection_file):
|
||||
logger.info(f"Loading existing YOLO detections for segment {segment_idx}")
|
||||
detections = detector.load_detections_from_file(detection_file)
|
||||
else:
|
||||
# Run YOLO detection on first frame
|
||||
detections = detector.detect_humans_in_video_first_frame(
|
||||
segment_info['video_file'],
|
||||
scale=config.get_inference_scale()
|
||||
)
|
||||
# Save detections for future runs
|
||||
detector.save_detections_to_file(detections, detection_file)
|
||||
|
||||
if detections:
|
||||
total_humans_detected += len(detections)
|
||||
logger.info(f"Found {len(detections)} humans in segment {segment_idx}")
|
||||
|
||||
# Get frame width from video
|
||||
cap = cv2.VideoCapture(segment_info['video_file'])
|
||||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
cap.release()
|
||||
|
||||
yolo_prompts = detector.convert_detections_to_sam2_prompts(
|
||||
detections, frame_width
|
||||
)
|
||||
|
||||
# If no right eye detections found, run debug analysis with lower confidence
|
||||
half_frame_width = frame_width // 2
|
||||
right_eye_detections = [d for d in detections if (d['bbox'][0] + d['bbox'][2]) / 2 >= half_frame_width]
|
||||
|
||||
if len(right_eye_detections) == 0 and config.get('advanced.save_yolo_debug_frames', False):
|
||||
logger.info(f"VR180 Debug: No right eye detections found, running lower confidence analysis...")
|
||||
|
||||
# Load first frame for debug analysis
|
||||
cap = cv2.VideoCapture(segment_info['video_file'])
|
||||
ret, debug_frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if ret:
|
||||
# Scale frame to match detection scale
|
||||
if config.get_inference_scale() != 1.0:
|
||||
scale = config.get_inference_scale()
|
||||
debug_frame = cv2.resize(debug_frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Run debug detection with lower confidence
|
||||
debug_detections = detector.debug_detect_with_lower_confidence(debug_frame, debug_confidence=0.3)
|
||||
|
||||
# Analyze where these lower confidence detections are
|
||||
debug_right_eye = [d for d in debug_detections if (d['bbox'][0] + d['bbox'][2]) / 2 >= half_frame_width]
|
||||
|
||||
if len(debug_right_eye) > 0:
|
||||
logger.warning(f"VR180 Debug: Found {len(debug_right_eye)} right eye detections with lower confidence!")
|
||||
for i, det in enumerate(debug_right_eye):
|
||||
logger.warning(f"VR180 Debug: Right eye detection {i+1}: conf={det['confidence']:.3f}, bbox={det['bbox']}")
|
||||
logger.warning(f"VR180 Debug: Consider lowering yolo_confidence from {config.get_yolo_confidence()} to 0.3-0.4")
|
||||
else:
|
||||
logger.info(f"VR180 Debug: No right eye detections found even with confidence 0.3")
|
||||
logger.info(f"VR180 Debug: This confirms person is not visible in right eye view")
|
||||
|
||||
logger.info(f"Pipeline Debug: Segment {segment_idx} - Generated {len(yolo_prompts)} SAM2 prompts from {len(detections)} YOLO detections")
|
||||
|
||||
# Save debug frame with detections visualized (if enabled)
|
||||
if config.get('advanced.save_yolo_debug_frames', False):
|
||||
debug_frame_path = os.path.join(segment_info['directory'], "yolo_debug.jpg")
|
||||
|
||||
# Load first frame for debug visualization
|
||||
cap = cv2.VideoCapture(segment_info['video_file'])
|
||||
ret, debug_frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if ret:
|
||||
# Scale frame to match detection scale
|
||||
if config.get_inference_scale() != 1.0:
|
||||
scale = config.get_inference_scale()
|
||||
debug_frame = cv2.resize(debug_frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
detector.save_debug_frame_with_detections(debug_frame, detections, debug_frame_path, yolo_prompts)
|
||||
# Load the first frame for detection
|
||||
cap = cv2.VideoCapture(segment_info['video_file'])
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if not ret:
|
||||
logger.error(f"Could not read first frame of segment {segment_idx}")
|
||||
continue
|
||||
|
||||
# Scale frame if needed
|
||||
if config.get_inference_scale() != 1.0:
|
||||
frame = cv2.resize(frame, None, fx=config.get_inference_scale(), fy=config.get_inference_scale(), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
yolo_prompts = detector.detect_and_match_stereo_pairs(
|
||||
frame,
|
||||
config.get_confidence_reduction_factor(),
|
||||
config.get_stereo_iou_threshold(),
|
||||
segment_info,
|
||||
config.get('advanced.save_yolo_debug_frames', True)
|
||||
)
|
||||
|
||||
if not yolo_prompts:
|
||||
logger.warning(f"No valid stereo pairs found for segment {segment_idx}. Attempting to use previous segment's mask.")
|
||||
if segment_idx > 0:
|
||||
prev_segment_dir = segments_info[segment_idx - 1]['directory']
|
||||
previous_masks = sam2_processor.load_previous_segment_mask(prev_segment_dir)
|
||||
if previous_masks:
|
||||
logger.info(f"Using masks from segment {segment_idx - 1} as fallback.")
|
||||
else:
|
||||
logger.warning(f"Could not load frame for debug visualization in segment {segment_idx}")
|
||||
|
||||
# Check if we have YOLO masks for debug visualization
|
||||
has_yolo_masks = False
|
||||
if detections and detector.supports_segmentation:
|
||||
has_yolo_masks = any(d.get('has_mask', False) for d in detections)
|
||||
|
||||
# Generate first frame masks debug (SAM2 or YOLO)
|
||||
first_frame_debug_path = os.path.join(segment_info['directory'], "first_frame_detection.jpg")
|
||||
|
||||
if has_yolo_masks:
|
||||
logger.info(f"Pipeline Debug: Generating YOLO first frame masks for segment {segment_idx}")
|
||||
# Create YOLO mask debug visualization
|
||||
create_yolo_mask_debug_frame(detections, segment_info['video_file'], first_frame_debug_path, config.get_inference_scale())
|
||||
else:
|
||||
logger.info(f"Pipeline Debug: Generating SAM2 first frame masks for segment {segment_idx}")
|
||||
sam2_processor.generate_first_frame_debug_masks(
|
||||
segment_info['video_file'],
|
||||
yolo_prompts,
|
||||
first_frame_debug_path,
|
||||
config.get_inference_scale()
|
||||
)
|
||||
else:
|
||||
logger.warning(f"No humans detected in segment {segment_idx}")
|
||||
|
||||
# Save debug frame even when no detections (if enabled)
|
||||
if config.get('advanced.save_yolo_debug_frames', False):
|
||||
debug_frame_path = os.path.join(segment_info['directory'], "yolo_debug_no_detections.jpg")
|
||||
|
||||
# Load first frame for debug visualization
|
||||
cap = cv2.VideoCapture(segment_info['video_file'])
|
||||
ret, debug_frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if ret:
|
||||
# Scale frame to match detection scale
|
||||
if config.get_inference_scale() != 1.0:
|
||||
scale = config.get_inference_scale()
|
||||
debug_frame = cv2.resize(debug_frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Add "No detections" text overlay
|
||||
cv2.putText(debug_frame, "YOLO: No humans detected",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
|
||||
(0, 0, 255), 2) # Red text
|
||||
|
||||
cv2.imwrite(debug_frame_path, debug_frame)
|
||||
logger.info(f"Saved no-detection debug frame to {debug_frame_path}")
|
||||
else:
|
||||
logger.warning(f"Could not load frame for no-detection debug visualization in segment {segment_idx}")
|
||||
logger.error(f"Fallback failed: No previous mask found for segment {segment_idx}.")
|
||||
else:
|
||||
logger.error("Cannot use fallback for the first segment.")
|
||||
elif segment_idx > 0:
|
||||
# Try to load previous segment mask
|
||||
for j in range(segment_idx - 1, -1, -1):
|
||||
@@ -826,43 +729,20 @@ async def main_async():
|
||||
logger.error(f"No prompts or previous masks available for segment {segment_idx}")
|
||||
continue
|
||||
|
||||
# Check if we have YOLO masks and can skip SAM2 (recheck in case detections were loaded from file)
|
||||
if not 'has_yolo_masks' in locals():
|
||||
has_yolo_masks = False
|
||||
if detections and detector.supports_segmentation:
|
||||
has_yolo_masks = any(d.get('has_mask', False) for d in detections)
|
||||
# Check if we have YOLO masks from the stereo pair matching and can use them as initial masks for SAM2
|
||||
if yolo_prompts and detector.supports_segmentation:
|
||||
logger.info(f"Pipeline Debug: YOLO segmentation provided matched stereo masks - using as SAM2 initial masks.")
|
||||
|
||||
if has_yolo_masks:
|
||||
logger.info(f"Pipeline Debug: YOLO segmentation provided masks - using as SAM2 initial masks for segment {segment_idx}")
|
||||
# Convert the prompts (which contain masks) into the initial_masks format for SAM2
|
||||
initial_masks = {prompt['obj_id']: prompt['mask'] for prompt in yolo_prompts if 'mask' in prompt}
|
||||
|
||||
# Convert YOLO masks to initial masks for SAM2
|
||||
cap = cv2.VideoCapture(segment_info['video_file'])
|
||||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
cap.release()
|
||||
|
||||
# Convert YOLO masks to the format expected by SAM2 add_previous_masks_to_predictor
|
||||
yolo_masks_dict = {}
|
||||
for i, detection in enumerate(detections[:2]): # Up to 2 objects
|
||||
if detection.get('has_mask', False):
|
||||
mask = detection['mask']
|
||||
# Resize mask to match inference scale
|
||||
if config.get_inference_scale() != 1.0:
|
||||
scale = config.get_inference_scale()
|
||||
scaled_height = int(frame_height * scale)
|
||||
scaled_width = int(frame_width * scale)
|
||||
mask = cv2.resize(mask.astype(np.float32), (scaled_width, scaled_height), interpolation=cv2.INTER_NEAREST)
|
||||
mask = mask > 0.5
|
||||
|
||||
obj_id = i + 1 # Sequential object IDs
|
||||
yolo_masks_dict[obj_id] = mask.astype(bool)
|
||||
logger.info(f"Pipeline Debug: YOLO mask for Object {obj_id} - shape: {mask.shape}, pixels: {np.sum(mask)}")
|
||||
|
||||
logger.info(f"Pipeline Debug: Using YOLO masks as SAM2 initial masks - {len(yolo_masks_dict)} objects")
|
||||
|
||||
# Use traditional SAM2 pipeline with YOLO masks as initial masks
|
||||
previous_masks = yolo_masks_dict
|
||||
yolo_prompts = None # Don't use bounding box prompts when we have masks
|
||||
if initial_masks:
|
||||
# We are providing initial masks, so we should not provide bbox prompts
|
||||
previous_masks = initial_masks
|
||||
yolo_prompts = None
|
||||
logger.info(f"Pipeline Debug: Using {len(previous_masks)} YOLO masks as SAM2 initial masks.")
|
||||
else:
|
||||
logger.warning("YOLO segmentation mode is on, but no masks were found in the final prompts.")
|
||||
|
||||
# Debug what we're passing to SAM2
|
||||
if yolo_prompts:
|
||||
|
||||
Reference in New Issue
Block a user