sbs working phase 1

This commit is contained in:
2025-07-30 18:07:26 -07:00
parent 6617acb1c9
commit 70044e1b10
8 changed files with 2417 additions and 7 deletions

325
main.py
View File

@@ -188,6 +188,293 @@ def resolve_detect_segments(detect_segments, total_segments: int) -> List[int]:
logger.warning(f"Invalid detect_segments format: {detect_segments}. Using all segments.")
return list(range(total_segments))
def process_segment_with_separate_eyes(segment_info, detector, sam2_processor, mask_processor, config,
previous_left_masks=None, previous_right_masks=None):
"""
Process a single segment using separate eye processing mode.
Split video first, then run YOLO independently on each eye.
Args:
segment_info: Segment information dictionary
detector: YOLO detector instance
sam2_processor: SAM2 processor with eye processing enabled
mask_processor: Mask processor instance
config: Configuration loader instance
previous_left_masks: Previous masks for left eye
previous_right_masks: Previous masks for right eye
Returns:
Tuple of (success, left_masks, right_masks)
"""
segment_idx = segment_info['index']
logger.info(f"VR180 Separate Eyes: Processing segment {segment_idx} (video-split approach)")
# Get video properties
cap = cv2.VideoCapture(segment_info['video_file'])
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
full_frame_shape = (frame_height, frame_width)
# Step 1: Split the segment video into left and right eye videos
left_eye_video = os.path.join(segment_info['directory'], "left_eye.mp4")
right_eye_video = os.path.join(segment_info['directory'], "right_eye.mp4")
logger.info(f"VR180 Separate Eyes: Splitting segment video into eye videos")
success = sam2_processor.eye_processor.split_video_into_eyes(
segment_info['video_file'],
left_eye_video,
right_eye_video,
scale=config.get_inference_scale()
)
if not success:
logger.error(f"VR180 Separate Eyes: Failed to split video for segment {segment_idx}")
return False, None, None
# Check if both eye videos were created
if not os.path.exists(left_eye_video) or not os.path.exists(right_eye_video):
logger.error(f"VR180 Separate Eyes: Eye video files not created for segment {segment_idx}")
return False, None, None
logger.info(f"VR180 Separate Eyes: Created eye videos - left: {left_eye_video}, right: {right_eye_video}")
# Step 2: Run YOLO independently on each eye video
left_detections = detector.detect_humans_in_video_first_frame(
left_eye_video, scale=1.0 # Already scaled during video splitting
)
right_detections = detector.detect_humans_in_video_first_frame(
right_eye_video, scale=1.0 # Already scaled during video splitting
)
logger.info(f"VR180 Separate Eyes: YOLO detections - left: {len(left_detections)}, right: {len(right_detections)}")
# Check if we have YOLO segmentation masks
has_yolo_masks = False
if detector.supports_segmentation:
has_yolo_masks = any(d.get('has_mask', False) for d in (left_detections + right_detections))
if has_yolo_masks:
logger.info(f"VR180 Separate Eyes: YOLO segmentation mode - using direct masks instead of bounding boxes")
# Save eye-specific debug frames if enabled
if config.get('advanced.save_yolo_debug_frames', False) and (left_detections or right_detections):
try:
# Load first frames from each eye video
left_cap = cv2.VideoCapture(left_eye_video)
ret_left, left_frame = left_cap.read()
left_cap.release()
right_cap = cv2.VideoCapture(right_eye_video)
ret_right, right_frame = right_cap.read()
right_cap.release()
if ret_left and ret_right:
# Save eye-specific debug frames
left_debug_path = os.path.join(segment_info['directory'], "left_eye_debug.jpg")
right_debug_path = os.path.join(segment_info['directory'], "right_eye_debug.jpg")
detector.save_eye_debug_frames(
left_frame, right_frame,
left_detections, right_detections,
left_debug_path, right_debug_path
)
logger.info(f"VR180 Separate Eyes: Saved eye-specific debug frames for segment {segment_idx}")
else:
logger.warning(f"VR180 Separate Eyes: Could not load eye frames for debug visualization")
except Exception as e:
logger.warning(f"VR180 Separate Eyes: Failed to create eye debug frames: {e}")
# Step 3: Process left eye if detections exist or we have previous masks
left_masks = None
if left_detections or previous_left_masks:
try:
left_prompts = None
left_initial_masks = None
if left_detections:
if has_yolo_masks:
# YOLO segmentation mode: convert masks to initial masks for SAM2
left_initial_masks = {}
for i, detection in enumerate(left_detections):
if detection.get('has_mask', False):
mask = detection['mask']
left_initial_masks[1] = mask.astype(bool) # Always use obj_id=1 for single eye
logger.info(f"VR180 Separate Eyes: Left eye YOLO mask - shape: {mask.shape}, pixels: {np.sum(mask)}")
break # Only take the first/best mask for single eye processing
if left_initial_masks:
logger.info(f"VR180 Separate Eyes: Left eye - using YOLO segmentation masks as initial masks")
else:
# YOLO detection mode: convert bounding boxes to prompts
left_prompts = detector.convert_detections_to_sam2_prompts(left_detections, frame_width // 2)
logger.info(f"VR180 Separate Eyes: Left eye - {len(left_prompts)} SAM2 prompts")
# Create temporary segment info for left eye processing
left_segment_info = segment_info.copy()
left_segment_info['video_file'] = left_eye_video
left_masks = sam2_processor.process_single_eye_segment(
left_segment_info, 'left', left_prompts,
left_initial_masks or previous_left_masks,
1.0 # Scale already applied during video splitting
)
if left_masks:
logger.info(f"VR180 Separate Eyes: Left eye processed - {len(left_masks)} frame masks")
else:
logger.warning(f"VR180 Separate Eyes: Left eye processing failed")
except Exception as e:
logger.error(f"VR180 Separate Eyes: Error processing left eye for segment {segment_idx}: {e}")
left_masks = None
# Step 4: Process right eye if detections exist or we have previous masks
right_masks = None
if right_detections or previous_right_masks:
try:
right_prompts = None
right_initial_masks = None
if right_detections:
if has_yolo_masks:
# YOLO segmentation mode: convert masks to initial masks for SAM2
right_initial_masks = {}
for i, detection in enumerate(right_detections):
if detection.get('has_mask', False):
mask = detection['mask']
right_initial_masks[1] = mask.astype(bool) # Always use obj_id=1 for single eye
logger.info(f"VR180 Separate Eyes: Right eye YOLO mask - shape: {mask.shape}, pixels: {np.sum(mask)}")
break # Only take the first/best mask for single eye processing
if right_initial_masks:
logger.info(f"VR180 Separate Eyes: Right eye - using YOLO segmentation masks as initial masks")
else:
# YOLO detection mode: convert bounding boxes to prompts
right_prompts = detector.convert_detections_to_sam2_prompts(right_detections, frame_width // 2)
logger.info(f"VR180 Separate Eyes: Right eye - {len(right_prompts)} SAM2 prompts")
# Create temporary segment info for right eye processing
right_segment_info = segment_info.copy()
right_segment_info['video_file'] = right_eye_video
right_masks = sam2_processor.process_single_eye_segment(
right_segment_info, 'right', right_prompts,
right_initial_masks or previous_right_masks,
1.0 # Scale already applied during video splitting
)
if right_masks:
logger.info(f"VR180 Separate Eyes: Right eye processed - {len(right_masks)} frame masks")
else:
logger.warning(f"VR180 Separate Eyes: Right eye processing failed")
except Exception as e:
logger.error(f"VR180 Separate Eyes: Error processing right eye for segment {segment_idx}: {e}")
right_masks = None
# Step 5: Check if we got any valid masks
if not left_masks and not right_masks:
logger.warning(f"VR180 Separate Eyes: Neither eye produced valid masks for segment {segment_idx}")
if config.get('processing.enable_greenscreen_fallback', True):
logger.info(f"VR180 Separate Eyes: Using greenscreen fallback for segment {segment_idx}")
success = mask_processor.process_greenscreen_only_segment(
segment_info,
green_color=config.get_green_color(),
use_nvenc=config.get_use_nvenc(),
bitrate=config.get_output_bitrate()
)
return success, None, None
else:
logger.error(f"VR180 Separate Eyes: No masks generated and greenscreen fallback disabled")
return False, None, None
# Step 6: Combine masks back to full frame format
try:
logger.info(f"VR180 Separate Eyes: Combining eye masks for segment {segment_idx}")
combined_masks = sam2_processor.eye_processor.combine_eye_masks(
left_masks, right_masks, full_frame_shape
)
if not combined_masks:
logger.error(f"VR180 Separate Eyes: Failed to combine eye masks for segment {segment_idx}")
return False, left_masks, right_masks
# Validate combined masks have reasonable content
total_mask_pixels = 0
for frame_idx, frame_masks in combined_masks.items():
for obj_id, mask in frame_masks.items():
if mask is not None:
total_mask_pixels += np.sum(mask)
if total_mask_pixels == 0:
logger.warning(f"VR180 Separate Eyes: Combined masks are empty for segment {segment_idx}")
if config.get('processing.enable_greenscreen_fallback', True):
logger.info(f"VR180 Separate Eyes: Using greenscreen fallback due to empty masks")
success = mask_processor.process_greenscreen_only_segment(
segment_info,
green_color=config.get_green_color(),
use_nvenc=config.get_use_nvenc(),
bitrate=config.get_output_bitrate()
)
return success, left_masks, right_masks
logger.info(f"VR180 Separate Eyes: Combined masks contain {total_mask_pixels} total pixels")
except Exception as e:
logger.error(f"VR180 Separate Eyes: Error combining eye masks for segment {segment_idx}: {e}")
# Try greenscreen fallback if mask combination fails
if config.get('processing.enable_greenscreen_fallback', True):
logger.info(f"VR180 Separate Eyes: Using greenscreen fallback due to mask combination error")
success = mask_processor.process_greenscreen_only_segment(
segment_info,
green_color=config.get_green_color(),
use_nvenc=config.get_use_nvenc(),
bitrate=config.get_output_bitrate()
)
return success, left_masks, right_masks
else:
return False, left_masks, right_masks
# Step 7: Save combined masks
mask_path = os.path.join(segment_info['directory'], "mask.png")
sam2_processor.save_final_masks(
combined_masks,
mask_path,
green_color=config.get_green_color(),
blue_color=config.get_blue_color()
)
# Step 8: Apply green screen and save output video
success = mask_processor.process_segment(
segment_info,
combined_masks,
use_nvenc=config.get_use_nvenc(),
bitrate=config.get_output_bitrate()
)
if success:
logger.info(f"VR180 Separate Eyes: Successfully processed segment {segment_idx}")
else:
logger.error(f"VR180 Separate Eyes: Failed to create output video for segment {segment_idx}")
# Clean up temporary eye video files
try:
if os.path.exists(left_eye_video):
os.remove(left_eye_video)
if os.path.exists(right_eye_video):
os.remove(right_eye_video)
logger.debug(f"VR180 Separate Eyes: Cleaned up temporary eye videos for segment {segment_idx}")
except Exception as e:
logger.warning(f"VR180 Separate Eyes: Failed to clean up temporary eye videos: {e}")
return success, left_masks, right_masks
def main():
"""Main processing pipeline."""
args = parse_arguments()
@@ -275,10 +562,23 @@ def main():
)
logger.info("Step 3: Initializing SAM2 processor")
# Check if separate eye processing is enabled
separate_eye_processing = config.get('processing.separate_eye_processing', False)
eye_overlap_pixels = config.get('processing.eye_overlap_pixels', 0)
enable_greenscreen_fallback = config.get('processing.enable_greenscreen_fallback', True)
if separate_eye_processing:
logger.info("VR180 Separate Eye Processing: ENABLED")
logger.info(f"Eye overlap pixels: {eye_overlap_pixels}")
logger.info(f"Greenscreen fallback: {enable_greenscreen_fallback}")
sam2_processor = SAM2Processor(
checkpoint_path=config.get_sam2_checkpoint(),
config_path=config.get_sam2_config(),
vos_optimized=config.get('models.sam2_vos_optimized', False)
vos_optimized=config.get('models.sam2_vos_optimized', False),
separate_eye_processing=separate_eye_processing,
eye_overlap_pixels=eye_overlap_pixels
)
# Initialize mask processor with quality enhancements
@@ -293,6 +593,10 @@ def main():
logger.info("Step 4: Processing segments sequentially")
total_humans_detected = 0
# Initialize previous masks for separate eye processing
previous_left_masks = None
previous_right_masks = None
for i, segment_info in enumerate(segments_info):
segment_idx = segment_info['index']
@@ -307,6 +611,25 @@ def main():
logger.info(f"Segment {segment_idx} already processed, skipping")
continue
# Branch based on processing mode
if separate_eye_processing:
# Use separate eye processing mode
success, left_masks, right_masks = process_segment_with_separate_eyes(
segment_info, detector, sam2_processor, mask_processor, config,
previous_left_masks, previous_right_masks
)
# Update previous masks for next segment
previous_left_masks = left_masks
previous_right_masks = right_masks
if success:
logger.info(f"Successfully processed segment {segment_idx} with separate eye processing")
else:
logger.error(f"Failed to process segment {segment_idx} with separate eye processing")
continue # Skip the original processing logic
# Determine if we should use YOLO detections or previous masks
use_detections = segment_idx in detect_segments