test2/config-streaming-runpod.yaml

# VR180 Streaming Configuration for RunPod
# Optimized for A6000 (48GB VRAM) or similar cloud GPUs

input:
  video_path: "/workspace/input_video.mp4"  # Update with your input path
  start_frame: 0  # Resume from checkpoint if auto_resume is enabled
  max_frames: null  # null = process entire video, or set a number for testing

streaming:
  mode: true  # True streaming - no chunking!
  buffer_frames: 10  # Small buffer for correction lookahead
  write_interval: 1  # Write every frame immediately

processing:
  scale_factor: 0.5  # 0.5 = 4K processing for 8K input (good balance)
  adaptive_scaling: true  # Dynamically adjust scale based on GPU load
  target_gpu_usage: 0.7  # Target 70% GPU utilization
  min_scale: 0.25  # Never go below 25% scale
  max_scale: 1.0  # Can go up to full resolution if GPU allows

detection:
  confidence_threshold: 0.7  # Person detection confidence
  model: "yolov8n"  # Fast model suitable for streaming (n/s/m/l/x)
  device: "cuda"

matting:
  sam2_model_cfg: "sam2.1_hiera_l"  # Use large model for best quality
  sam2_checkpoint: "segment-anything-2/checkpoints/sam2.1_hiera_large.pt"
  memory_offload: true  # Critical for streaming - offload to CPU when needed
  fp16: false  # Disable FP16 to avoid type mismatch with compiled models for memory efficiency
  continuous_correction: true  # Periodically refine tracking
  correction_interval: 30  # Correct every 0.5 seconds at 60fps (for testing)

stereo:
  mode: "master_slave"  # Left eye detects, right eye follows
  master_eye: "left"  # Which eye leads detection
  disparity_correction: true  # Adjust for stereo parallax
  consistency_threshold: 0.3  # Max allowed difference between eyes
  baseline: 65.0  # Interpupillary distance in mm
  focal_length: 1000.0  # Camera focal length in pixels

output:
  path: "/workspace/output_video.mp4"  # Update with your output path
  format: "greenscreen"  # "greenscreen" or "alpha"
  background_color: [0, 255, 0]  # RGB for green screen
  video_codec: "h264_nvenc"  # GPU encoding for L40 (fallback to CPU if not available)
  quality_preset: "p4"  # NVENC preset (p1=fastest, p7=slowest/best quality)
  crf: 18  # Quality (0-51, lower = better, 18 = high quality)
  maintain_sbs: true  # Keep side-by-side format with audio

hardware:
  device: "cuda"
  max_vram_gb: 44.0  # Conservative limit for L40 48GB VRAM
  max_ram_gb: 48.0  # RunPod container RAM limit

recovery:
  enable_checkpoints: true  # Save progress for resume
  checkpoint_interval: 1000  # Save every ~16 seconds at 60fps
  auto_resume: true  # Automatically resume from last checkpoint
  checkpoint_dir: "./checkpoints"

performance:
  profile_enabled: true  # Track performance metrics
  log_interval: 100  # Log progress every 100 frames
  memory_monitor: true  # Monitor RAM/VRAM usage

# Usage:
# 1. Update input.video_path and output.path
# 2. Adjust scale_factor based on your GPU (0.25 for faster, 1.0 for quality)
# 3. Run: python -m vr180_streaming config-streaming-runpod.yaml