123 lines
4.7 KiB
YAML
123 lines
4.7 KiB
YAML
# YOLO + SAM2 Video Processing Configuration with VR180 Separate Eye Processing
|
|
|
|
input:
|
|
video_path: "./input/regrets_full.mp4"
|
|
|
|
output:
|
|
directory: "./output/"
|
|
filename: "vr180_processed_both_eyes.mp4"
|
|
|
|
processing:
|
|
# Duration of each video segment in seconds
|
|
segment_duration: 5
|
|
|
|
# Scale factor for SAM2 inference (0.5 = half resolution)
|
|
inference_scale: 0.4
|
|
|
|
# YOLO detection confidence threshold (lowered for better VR180 detection)
|
|
yolo_confidence: 0.4
|
|
|
|
# Which segments to run YOLO detection on
|
|
detect_segments: "all"
|
|
|
|
# VR180 separate eye processing mode (ENABLED FOR TESTING)
|
|
separate_eye_processing: false
|
|
|
|
# Enable full greenscreen fallback when no humans detected
|
|
# A value of 0.5 means masks must overlap by 50% to be considered a pair.
|
|
stereo_iou_threshold: 0.5
|
|
|
|
# Factor to reduce YOLO confidence by if no stereo pairs are found on the first try (e.g., 0.8 = 20% reduction).
|
|
confidence_reduction_factor: 0.8
|
|
|
|
# If no humans are detected in a segment, create a full green screen video.
|
|
# Only used when separate_eye_processing is true.
|
|
enable_greenscreen_fallback: true
|
|
|
|
# Pixel overlap between left/right eyes for blending (0 = no overlap)
|
|
eye_overlap_pixels: 0
|
|
|
|
models:
|
|
# YOLO detection mode: "detection" (bounding boxes) or "segmentation" (direct masks)
|
|
yolo_mode: "segmentation" # Default: existing behavior, Options: "detection", "segmentation"
|
|
|
|
# YOLO model paths for different modes
|
|
yolo_detection_model: "models/yolo/yolo11l.pt" # Regular YOLO for detection mode
|
|
yolo_segmentation_model: "models/yolo/yolo11x-seg.pt" # Segmentation YOLO for segmentation mode
|
|
|
|
# SAM2 model configuration
|
|
sam2_checkpoint: "models/sam2/checkpoints/sam2.1_hiera_small.pt"
|
|
sam2_config: "models/sam2/configs/sam2.1/sam2.1_hiera_s.yaml"
|
|
|
|
video:
|
|
# Use NVIDIA hardware encoding (requires NVENC-capable GPU)
|
|
use_nvenc: true
|
|
|
|
# Output video bitrate
|
|
output_bitrate: "25M"
|
|
|
|
# Preserve original audio track
|
|
preserve_audio: true
|
|
|
|
# Force keyframes for better segment boundaries
|
|
force_keyframes: true
|
|
|
|
advanced:
|
|
# Green screen color (RGB values)
|
|
green_color: [0, 255, 0]
|
|
|
|
# Blue screen color for second object (RGB values)
|
|
blue_color: [255, 0, 0]
|
|
|
|
# YOLO human class ID (0 for COCO person class)
|
|
human_class_id: 0
|
|
|
|
# GPU memory management
|
|
cleanup_intermediate_files: true
|
|
|
|
# Logging level (DEBUG, INFO, WARNING, ERROR)
|
|
log_level: "INFO"
|
|
|
|
# Save debug frames with YOLO detections visualized (ENABLED FOR TESTING)
|
|
save_yolo_debug_frames: true
|
|
|
|
# --- Mid-Segment Re-detection ---
|
|
# Re-run YOLO at intervals within a segment to correct tracking drift.
|
|
enable_mid_segment_detection: false
|
|
redetection_interval: 30 # Frames between re-detections.
|
|
max_redetections_per_segment: 10
|
|
|
|
|
|
# Parallel Processing Optimizations
|
|
enable_background_lowres_generation: false # Enable async low-res video pre-generation (temporarily disabled due to syntax fix needed)
|
|
max_concurrent_lowres: 2 # Max parallel FFmpeg processes for low-res creation
|
|
lowres_segments_ahead: 2 # How many segments to prepare in advance
|
|
use_ffmpeg_lowres: true # Use FFmpeg instead of OpenCV for low-res creation
|
|
|
|
# Mask Quality Enhancement Settings - Optimized for Performance
|
|
mask_processing:
|
|
# Edge feathering and blurring (REDUCED for performance)
|
|
enable_edge_blur: true # Enable Gaussian blur on mask edges for smooth transitions
|
|
edge_blur_radius: 3 # Reduced from 10 to 3 for better performance
|
|
edge_blur_sigma: 0.5 # Gaussian blur standard deviation
|
|
|
|
# Temporal smoothing between frames
|
|
enable_temporal_smoothing: false # Enable frame-to-frame mask blending
|
|
temporal_blend_weight: 0.2 # Weight for previous frame (0.0-1.0, higher = more smoothing)
|
|
temporal_history_frames: 2 # Number of previous frames to consider
|
|
|
|
# Morphological mask cleaning (DISABLED for VR180 - SAM2 masks are already high quality)
|
|
enable_morphological_cleaning: false # Disabled for performance - SAM2 produces clean masks
|
|
morphology_kernel_size: 5 # Kernel size for opening/closing operations
|
|
min_component_size: 500 # Minimum pixel area for connected components
|
|
|
|
# Alpha blending mode (OPTIMIZED)
|
|
alpha_blending_mode: "linear" # Linear is fastest - keep as-is
|
|
alpha_transition_width: 1 # Width of transition zone in pixels
|
|
|
|
# Advanced options
|
|
enable_bilateral_filter: false # Edge-preserving smoothing (slower but higher quality)
|
|
bilateral_d: 9 # Bilateral filter diameter
|
|
bilateral_sigma_color: 75 # Bilateral filter color sigma
|
|
bilateral_sigma_space: 75 # Bilateral filter space sigma
|