working a bit faster

This commit is contained in:
2025-07-31 09:09:22 -07:00
parent 70044e1b10
commit 0057017ac4
5 changed files with 585 additions and 137 deletions

View File

@@ -8,6 +8,7 @@ import cv2
import numpy as np
import torch
import logging
import subprocess
import gc
from typing import Dict, List, Any, Optional, Tuple
from sam2.build_sam import build_sam2_video_predictor
@@ -19,7 +20,8 @@ class SAM2Processor:
"""Handles SAM2-based video segmentation for human tracking."""
def __init__(self, checkpoint_path: str, config_path: str, vos_optimized: bool = False,
separate_eye_processing: bool = False, eye_overlap_pixels: int = 0):
separate_eye_processing: bool = False, eye_overlap_pixels: int = 0,
async_preprocessor=None):
"""
Initialize SAM2 processor.
@@ -29,11 +31,13 @@ class SAM2Processor:
vos_optimized: Enable VOS optimization for speedup (requires PyTorch 2.5.1+)
separate_eye_processing: Enable VR180 separate eye processing mode
eye_overlap_pixels: Pixel overlap between eyes for blending
async_preprocessor: Optional async preprocessor for background low-res video generation
"""
self.checkpoint_path = checkpoint_path
self.config_path = config_path
self.vos_optimized = vos_optimized
self.separate_eye_processing = separate_eye_processing
self.async_preprocessor = async_preprocessor
self.predictor = None
# Initialize eye processor if separate eye processing is enabled
@@ -120,13 +124,64 @@ class SAM2Processor:
def create_low_res_video(self, input_video_path: str, output_video_path: str, scale: float):
"""
Create a low-resolution version of the input video for inference.
Create a low-resolution version of the input video for inference using FFmpeg
with hardware acceleration for improved performance.
Args:
input_video_path: Path to input video
output_video_path: Path to output low-res video
scale: Scale factor for resolution reduction
"""
try:
# Get video properties using OpenCV
cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
raise ValueError(f"Could not open video: {input_video_path}")
original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
target_width = int(original_width * scale)
target_height = int(original_height * scale)
# Ensure dimensions are even, as required by many codecs
target_width = target_width if target_width % 2 == 0 else target_width + 1
target_height = target_height if target_height % 2 == 0 else target_height + 1
# Construct FFmpeg command with hardware acceleration
command = [
'ffmpeg',
'-y',
'-hwaccel', 'auto', # Auto-detect hardware acceleration
'-i', input_video_path,
'-vf', f'scale={target_width}:{target_height}',
'-c:v', 'h264_nvenc', # Use NVIDIA's hardware encoder
'-preset', 'fast',
'-crf', '23',
output_video_path
]
logger.info(f"Executing FFmpeg command: {' '.join(command)}")
# Execute FFmpeg command
process = subprocess.run(command, check=True, capture_output=True, text=True)
if process.returncode != 0:
logger.error(f"FFmpeg failed with error: {process.stderr}")
raise RuntimeError(f"FFmpeg process failed: {process.stderr}")
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
except (subprocess.CalledProcessError, FileNotFoundError) as e:
logger.warning(f"Hardware-accelerated FFmpeg failed: {e}. Falling back to OpenCV.")
# Fallback to original OpenCV implementation if FFmpeg fails
self._create_low_res_video_opencv(input_video_path, output_video_path, scale)
def _create_low_res_video_opencv(self, input_video_path: str, output_video_path: str, scale: float):
"""Original OpenCV-based implementation for creating low-resolution video."""
cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
raise ValueError(f"Could not open video: {input_video_path}")
@@ -151,7 +206,42 @@ class SAM2Processor:
cap.release()
out.release()
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
logger.info(f"Created low-res video with {frame_count} frames using OpenCV: {output_video_path}")
def ensure_low_res_video(self, input_video_path: str, output_video_path: str,
scale: float, segment_idx: Optional[int] = None) -> bool:
"""
Ensure low-resolution video exists, using async preprocessor if available.
Args:
input_video_path: Path to input video
output_video_path: Path to output low-res video
scale: Scale factor for resolution reduction
segment_idx: Optional segment index for async coordination
Returns:
True if low-res video is ready
"""
# Check if already exists
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
return True
# Use async preprocessor if available and segment index provided
if self.async_preprocessor and segment_idx is not None:
if self.async_preprocessor.is_segment_ready(segment_idx):
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
logger.debug(f"Async preprocessor provided segment {segment_idx}")
return True
else:
logger.debug(f"Async preprocessor hasn't completed segment {segment_idx} yet")
# Fallback to synchronous creation
try:
self.create_low_res_video(input_video_path, output_video_path, scale)
return os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0
except Exception as e:
logger.error(f"Failed to create low-res video {output_video_path}: {e}")
return False
def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]]) -> bool:
"""
@@ -341,14 +431,11 @@ class SAM2Processor:
logger.info(f"Processing segment {segment_idx} with SAM2")
# Create low-resolution video for inference
# Create low-resolution video for inference (async-aware)
low_res_video_path = os.path.join(segment_dir, "low_res_video.mp4")
if not os.path.exists(low_res_video_path):
try:
self.create_low_res_video(video_path, low_res_video_path, inference_scale)
except Exception as e:
logger.error(f"Failed to create low-res video for segment {segment_idx}: {e}")
return None
if not self.ensure_low_res_video(video_path, low_res_video_path, inference_scale, segment_idx):
logger.error(f"Failed to create low-res video for segment {segment_idx}")
return None
try:
# Initialize inference state
@@ -387,13 +474,7 @@ class SAM2Processor:
except Exception as e:
logger.warning(f"Could not remove low-res video: {e}")
# Mark segment as completed (for resume capability)
try:
with open(output_done_file, 'w') as f:
f.write(f"Segment {segment_idx} completed successfully\n")
logger.debug(f"Marked segment {segment_idx} as completed")
except Exception as e:
logger.warning(f"Could not create completion marker: {e}")
return video_segments
@@ -698,14 +779,11 @@ class SAM2Processor:
logger.error(f"Eye video not found: {eye_video_path}")
return None
# Create low-resolution eye video for inference
# Create low-resolution eye video for inference (async-aware)
low_res_eye_video_path = os.path.join(segment_dir, f"low_res_{eye_side}_eye_video.mp4")
if not os.path.exists(low_res_eye_video_path):
try:
self.create_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale)
except Exception as e:
logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}: {e}")
return None
if not self.ensure_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale, segment_idx):
logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}")
return None
try:
# Initialize inference state with eye-specific video