stereo mask working

2025-07-31 11:13:31 -07:00
parent 0057017ac4
commit b97a3752a7
8 changed files with 1247 additions and 206 deletions
--- a/test-separate-eyes-config.yaml
+++ b/test-separate-eyes-config.yaml
@@ -0,0 +1,122 @@
+# YOLO + SAM2 Video Processing Configuration with VR180 Separate Eye Processing
+
+input:
+  video_path: "./input/regrets_full.mp4"
+  
+output:
+  directory: "./output/"
+  filename: "vr180_processed_both_eyes.mp4"
+  
+processing:
+  # Duration of each video segment in seconds
+  segment_duration: 5
+  
+  # Scale factor for SAM2 inference (0.5 = half resolution)
+  inference_scale: 0.4
+  
+  # YOLO detection confidence threshold (lowered for better VR180 detection)
+  yolo_confidence: 0.4
+  
+  # Which segments to run YOLO detection on
+  detect_segments: "all"
+  
+  # VR180 separate eye processing mode (ENABLED FOR TESTING)
+  separate_eye_processing: false
+  
+  # Enable full greenscreen fallback when no humans detected
+  # A value of 0.5 means masks must overlap by 50% to be considered a pair.
+  stereo_iou_threshold: 0.5
+  
+  # Factor to reduce YOLO confidence by if no stereo pairs are found on the first try (e.g., 0.8 = 20% reduction).
+  confidence_reduction_factor: 0.8
+  
+  # If no humans are detected in a segment, create a full green screen video.
+  # Only used when separate_eye_processing is true.
+  enable_greenscreen_fallback: true
+  
+  # Pixel overlap between left/right eyes for blending (0 = no overlap)
+  eye_overlap_pixels: 0
+  
+models:
+  # YOLO detection mode: "detection" (bounding boxes) or "segmentation" (direct masks)
+  yolo_mode: "segmentation"  # Default: existing behavior, Options: "detection", "segmentation"
+  
+  # YOLO model paths for different modes
+  yolo_detection_model: "models/yolo/yolo11l.pt"      # Regular YOLO for detection mode
+  yolo_segmentation_model: "models/yolo/yolo11x-seg.pt"  # Segmentation YOLO for segmentation mode
+  
+  # SAM2 model configuration
+  sam2_checkpoint: "models/sam2/checkpoints/sam2.1_hiera_small.pt"
+  sam2_config: "models/sam2/configs/sam2.1/sam2.1_hiera_s.yaml"
+  
+video:
+  # Use NVIDIA hardware encoding (requires NVENC-capable GPU)
+  use_nvenc: true
+  
+  # Output video bitrate
+  output_bitrate: "25M"
+  
+  # Preserve original audio track
+  preserve_audio: true
+  
+  # Force keyframes for better segment boundaries
+  force_keyframes: true
+  
+advanced:
+  # Green screen color (RGB values)
+  green_color: [0, 255, 0]
+  
+  # Blue screen color for second object (RGB values)  
+  blue_color: [255, 0, 0]
+  
+  # YOLO human class ID (0 for COCO person class)
+  human_class_id: 0
+  
+  # GPU memory management
+  cleanup_intermediate_files: true
+  
+  # Logging level (DEBUG, INFO, WARNING, ERROR)
+  log_level: "INFO"
+
+  # Save debug frames with YOLO detections visualized (ENABLED FOR TESTING)
+  save_yolo_debug_frames: true
+  
+  # --- Mid-Segment Re-detection ---
+  # Re-run YOLO at intervals within a segment to correct tracking drift.
+  enable_mid_segment_detection: false
+  redetection_interval: 30 # Frames between re-detections.
+  max_redetections_per_segment: 10
+  
+
+  # Parallel Processing Optimizations
+  enable_background_lowres_generation: false  # Enable async low-res video pre-generation (temporarily disabled due to syntax fix needed)
+  max_concurrent_lowres: 2                   # Max parallel FFmpeg processes for low-res creation
+  lowres_segments_ahead: 2                   # How many segments to prepare in advance
+  use_ffmpeg_lowres: true                    # Use FFmpeg instead of OpenCV for low-res creation
+
+# Mask Quality Enhancement Settings - Optimized for Performance
+mask_processing:
+  # Edge feathering and blurring (REDUCED for performance)
+  enable_edge_blur: true               # Enable Gaussian blur on mask edges for smooth transitions
+  edge_blur_radius: 3                  # Reduced from 10 to 3 for better performance
+  edge_blur_sigma: 0.5                 # Gaussian blur standard deviation
+  
+  # Temporal smoothing between frames
+  enable_temporal_smoothing: false      # Enable frame-to-frame mask blending
+  temporal_blend_weight: 0.2           # Weight for previous frame (0.0-1.0, higher = more smoothing)
+  temporal_history_frames: 2           # Number of previous frames to consider
+  
+  # Morphological mask cleaning (DISABLED for VR180 - SAM2 masks are already high quality)
+  enable_morphological_cleaning: false  # Disabled for performance - SAM2 produces clean masks
+  morphology_kernel_size: 5            # Kernel size for opening/closing operations
+  min_component_size: 500              # Minimum pixel area for connected components
+  
+  # Alpha blending mode (OPTIMIZED)
+  alpha_blending_mode: "linear"        # Linear is fastest - keep as-is
+  alpha_transition_width: 1            # Width of transition zone in pixels
+  
+  # Advanced options
+  enable_bilateral_filter: false       # Edge-preserving smoothing (slower but higher quality)
+  bilateral_d: 9                       # Bilateral filter diameter
+  bilateral_sigma_color: 75            # Bilateral filter color sigma
+  bilateral_sigma_space: 75            # Bilateral filter space sigma