stereo mask working
This commit is contained in:
112
config.yaml
112
config.yaml
@@ -1,75 +1,137 @@
|
||||
# YOLO + SAM2 Video Processing Configuration
|
||||
# This file serves as a complete reference for all available settings.
|
||||
|
||||
input:
|
||||
# Full path to the input video file.
|
||||
video_path: "/path/to/input/video.mp4"
|
||||
|
||||
output:
|
||||
# Directory where all output files and segments will be stored.
|
||||
directory: "/path/to/output/"
|
||||
# Filename for the final assembled video.
|
||||
filename: "processed_video.mp4"
|
||||
|
||||
processing:
|
||||
# Duration of each video segment in seconds
|
||||
# Duration of each video segment in seconds. Shorter segments use less memory.
|
||||
segment_duration: 5
|
||||
|
||||
# Scale factor for SAM2 inference (0.5 = half resolution)
|
||||
# Scale factor for SAM2 inference (e.g., 0.5 = half resolution).
|
||||
# Lower values are faster but may reduce mask quality.
|
||||
inference_scale: 0.5
|
||||
|
||||
# YOLO detection confidence threshold
|
||||
# YOLO detection confidence threshold (0.0 to 1.0).
|
||||
yolo_confidence: 0.6
|
||||
|
||||
# Which segments to run YOLO detection on
|
||||
# Options: "all", [0, 5, 10], or [] for default (all)
|
||||
# Which segments to run YOLO detection on.
|
||||
# Options: "all", a list of specific segment indices (e.g., [0, 10, 20]), or [] for default ("all").
|
||||
detect_segments: "all"
|
||||
|
||||
# VR180 separate eye processing mode (default: false for backward compatibility)
|
||||
# --- VR180 Stereo Processing ---
|
||||
# Enables special logic for VR180 SBS video. When false, video is treated as a single view.
|
||||
separate_eye_processing: false
|
||||
|
||||
# Enable full greenscreen fallback when no humans detected (only used with separate_eye_processing)
|
||||
# Threshold for stereo mask agreement (Intersection over Union).
|
||||
# A value of 0.5 means masks must overlap by 50% to be considered a pair.
|
||||
stereo_iou_threshold: 0.5
|
||||
|
||||
# Factor to reduce YOLO confidence by if no stereo pairs are found on the first try (e.g., 0.8 = 20% reduction).
|
||||
confidence_reduction_factor: 0.8
|
||||
|
||||
# If no humans are detected in a segment, create a full green screen video.
|
||||
# Only used when separate_eye_processing is true.
|
||||
enable_greenscreen_fallback: true
|
||||
|
||||
# Pixel overlap between left/right eyes for blending (optional, default: 0)
|
||||
# Pixel overlap between left/right eyes for smoother blending at the center seam.
|
||||
eye_overlap_pixels: 0
|
||||
|
||||
models:
|
||||
# YOLO detection mode: "detection" (bounding boxes) or "segmentation" (direct masks)
|
||||
yolo_mode: "segmentation" # Default: existing behavior, Options: "detection", "segmentation"
|
||||
# YOLO mode: "detection" (for bounding boxes) or "segmentation" (for direct masks).
|
||||
# "segmentation" is generally recommended as it provides initial masks to SAM2.
|
||||
yolo_mode: "segmentation"
|
||||
|
||||
# YOLO model paths for different modes
|
||||
yolo_detection_model: "models/yolo/yolo11l.pt" # Regular YOLO for detection mode
|
||||
yolo_segmentation_model: "models/yolo/yolo11x-seg.pt" # Segmentation YOLO for segmentation mode
|
||||
# Path to the YOLO model for "detection" mode.
|
||||
yolo_detection_model: "models/yolo/yolo11l.pt"
|
||||
# Path to the YOLO model for "segmentation" mode.
|
||||
yolo_segmentation_model: "models/yolo/yolo11x-seg.pt"
|
||||
|
||||
# SAM2 model configuration
|
||||
# --- SAM2 Model Configuration ---
|
||||
sam2_checkpoint: "models/sam2/checkpoints/sam2.1_hiera_small.pt"
|
||||
sam2_config: "models/sam2/configs/sam2.1/sam2.1_hiera_s.yaml"
|
||||
|
||||
# (Experimental) Use optimized VOS predictor for a significant speedup. Requires PyTorch 2.5.1+.
|
||||
sam2_vos_optimized: false
|
||||
|
||||
video:
|
||||
# Use NVIDIA hardware encoding (requires NVENC-capable GPU)
|
||||
# Use NVIDIA's NVENC for hardware-accelerated video encoding.
|
||||
use_nvenc: true
|
||||
|
||||
# Output video bitrate
|
||||
# Bitrate for the output video (e.g., "25M", "50M").
|
||||
output_bitrate: "50M"
|
||||
|
||||
# Preserve original audio track
|
||||
# If true, the audio track from the input video will be copied to the final output.
|
||||
preserve_audio: true
|
||||
|
||||
# Force keyframes for better segment boundaries
|
||||
# Force keyframes at the start of each segment for clean cuts. Recommended to keep true.
|
||||
force_keyframes: true
|
||||
|
||||
advanced:
|
||||
# Green screen color (RGB values)
|
||||
# RGB color for the green screen background.
|
||||
green_color: [0, 255, 0]
|
||||
|
||||
# Blue screen color for second object (RGB values)
|
||||
# RGB color for the second object's mask (typically the right eye in VR180).
|
||||
blue_color: [255, 0, 0]
|
||||
|
||||
# YOLO human class ID (0 for COCO person class)
|
||||
# The class ID for humans in the YOLO model (COCO default is 0 for "person").
|
||||
human_class_id: 0
|
||||
|
||||
# GPU memory management
|
||||
# If true, deletes intermediate files like segment videos after processing.
|
||||
cleanup_intermediate_files: true
|
||||
|
||||
# Logging level (DEBUG, INFO, WARNING, ERROR)
|
||||
# Logging level: DEBUG, INFO, WARNING, ERROR.
|
||||
log_level: "INFO"
|
||||
|
||||
# Save debug frames with YOLO detections visualized
|
||||
# If true, saves debug images for YOLO detections.
|
||||
save_yolo_debug_frames: true
|
||||
|
||||
# --- Mid-Segment Re-detection ---
|
||||
# Re-run YOLO at intervals within a segment to correct tracking drift.
|
||||
enable_mid_segment_detection: false
|
||||
redetection_interval: 30 # Frames between re-detections.
|
||||
max_redetections_per_segment: 10
|
||||
|
||||
# --- Parallel Processing Optimizations ---
|
||||
# (Experimental) Generate low-res videos for upcoming segments in the background.
|
||||
enable_background_lowres_generation: false
|
||||
max_concurrent_lowres: 2 # Max parallel FFmpeg processes.
|
||||
lowres_segments_ahead: 2 # How many segments to prepare in advance.
|
||||
use_ffmpeg_lowres: true # Use FFmpeg (faster) instead of OpenCV for low-res creation.
|
||||
|
||||
# --- Mask Quality Enhancement Settings ---
|
||||
# These settings allow fine-tuning of the final mask appearance.
|
||||
# Enabling these may increase processing time.
|
||||
mask_processing:
|
||||
# Edge feathering and blurring for smoother transitions.
|
||||
enable_edge_blur: true
|
||||
edge_blur_radius: 3
|
||||
edge_blur_sigma: 0.5
|
||||
|
||||
# Temporal smoothing to reduce mask flickering between frames.
|
||||
enable_temporal_smoothing: false
|
||||
temporal_blend_weight: 0.2
|
||||
temporal_history_frames: 2
|
||||
|
||||
# Clean up small noise and holes in the mask.
|
||||
# Generally not needed when using SAM2, as its masks are high quality.
|
||||
enable_morphological_cleaning: false
|
||||
morphology_kernel_size: 5
|
||||
min_component_size: 500
|
||||
|
||||
# Method for blending the mask edge with the background.
|
||||
# Options: "linear" (fastest), "gaussian", "sigmoid".
|
||||
alpha_blending_mode: "linear"
|
||||
alpha_transition_width: 1
|
||||
|
||||
# Advanced edge-preserving smoothing filter. Slower but can produce higher quality edges.
|
||||
enable_bilateral_filter: false
|
||||
bilateral_d: 9
|
||||
bilateral_sigma_color: 75
|
||||
bilateral_sigma_space: 75
|
||||
|
||||
Reference in New Issue
Block a user