working a bit faster

2025-07-31 09:09:22 -07:00
parent 70044e1b10
commit 0057017ac4
5 changed files with 585 additions and 137 deletions
--- a/core/sam2_processor.py
+++ b/core/sam2_processor.py
@@ -8,6 +8,7 @@ import cv2
 import numpy as np
 import torch
 import logging
+import subprocess
 import gc
 from typing import Dict, List, Any, Optional, Tuple
 from sam2.build_sam import build_sam2_video_predictor
@@ -19,7 +20,8 @@ class SAM2Processor:
    """Handles SAM2-based video segmentation for human tracking."""
    
    def __init__(self, checkpoint_path: str, config_path: str, vos_optimized: bool = False, 
-                 separate_eye_processing: bool = False, eye_overlap_pixels: int = 0):
+                 separate_eye_processing: bool = False, eye_overlap_pixels: int = 0,
+                 async_preprocessor=None):
        """
        Initialize SAM2 processor.
        
@@ -29,11 +31,13 @@ class SAM2Processor:
            vos_optimized: Enable VOS optimization for speedup (requires PyTorch 2.5.1+)
            separate_eye_processing: Enable VR180 separate eye processing mode
            eye_overlap_pixels: Pixel overlap between eyes for blending
+            async_preprocessor: Optional async preprocessor for background low-res video generation
        """
        self.checkpoint_path = checkpoint_path
        self.config_path = config_path
        self.vos_optimized = vos_optimized
        self.separate_eye_processing = separate_eye_processing
+        self.async_preprocessor = async_preprocessor
        self.predictor = None
        
        # Initialize eye processor if separate eye processing is enabled
@@ -120,13 +124,64 @@ class SAM2Processor:
    
    def create_low_res_video(self, input_video_path: str, output_video_path: str, scale: float):
        """
-        Create a low-resolution version of the input video for inference.
+        Create a low-resolution version of the input video for inference using FFmpeg
+        with hardware acceleration for improved performance.
        
        Args:
            input_video_path: Path to input video
            output_video_path: Path to output low-res video  
            scale: Scale factor for resolution reduction
        """
+        try:
+            # Get video properties using OpenCV
+            cap = cv2.VideoCapture(input_video_path)
+            if not cap.isOpened():
+                raise ValueError(f"Could not open video: {input_video_path}")
+            
+            original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            cap.release()
+
+            target_width = int(original_width * scale)
+            target_height = int(original_height * scale)
+
+            # Ensure dimensions are even, as required by many codecs
+            target_width = target_width if target_width % 2 == 0 else target_width + 1
+            target_height = target_height if target_height % 2 == 0 else target_height + 1
+
+            # Construct FFmpeg command with hardware acceleration
+            command = [
+                'ffmpeg',
+                '-y',
+                '-hwaccel', 'auto',  # Auto-detect hardware acceleration
+                '-i', input_video_path,
+                '-vf', f'scale={target_width}:{target_height}',
+                '-c:v', 'h264_nvenc', # Use NVIDIA's hardware encoder
+                '-preset', 'fast',
+                '-crf', '23',
+                output_video_path
+            ]
+            
+            logger.info(f"Executing FFmpeg command: {' '.join(command)}")
+            
+            # Execute FFmpeg command
+            process = subprocess.run(command, check=True, capture_output=True, text=True)
+            
+            if process.returncode != 0:
+                logger.error(f"FFmpeg failed with error: {process.stderr}")
+                raise RuntimeError(f"FFmpeg process failed: {process.stderr}")
+
+            logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
+
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            logger.warning(f"Hardware-accelerated FFmpeg failed: {e}. Falling back to OpenCV.")
+            # Fallback to original OpenCV implementation if FFmpeg fails
+            self._create_low_res_video_opencv(input_video_path, output_video_path, scale)
+
+    def _create_low_res_video_opencv(self, input_video_path: str, output_video_path: str, scale: float):
+        """Original OpenCV-based implementation for creating low-resolution video."""
        cap = cv2.VideoCapture(input_video_path)
        if not cap.isOpened():
            raise ValueError(f"Could not open video: {input_video_path}")
@@ -151,7 +206,42 @@ class SAM2Processor:
        cap.release()
        out.release()
        
-        logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
+        logger.info(f"Created low-res video with {frame_count} frames using OpenCV: {output_video_path}")
+    
+    def ensure_low_res_video(self, input_video_path: str, output_video_path: str, 
+                                 scale: float, segment_idx: Optional[int] = None) -> bool:
+        """
+        Ensure low-resolution video exists, using async preprocessor if available.
+        
+        Args:
+            input_video_path: Path to input video
+            output_video_path: Path to output low-res video
+            scale: Scale factor for resolution reduction
+            segment_idx: Optional segment index for async coordination
+            
+        Returns:
+            True if low-res video is ready
+        """
+        # Check if already exists
+        if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
+            return True
+        
+        # Use async preprocessor if available and segment index provided
+        if self.async_preprocessor and segment_idx is not None:
+            if self.async_preprocessor.is_segment_ready(segment_idx):
+                if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
+                    logger.debug(f"Async preprocessor provided segment {segment_idx}")
+                    return True
+            else:
+                logger.debug(f"Async preprocessor hasn't completed segment {segment_idx} yet")
+        
+        # Fallback to synchronous creation
+        try:
+            self.create_low_res_video(input_video_path, output_video_path, scale)
+            return os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0
+        except Exception as e:
+            logger.error(f"Failed to create low-res video {output_video_path}: {e}")
+            return False
    
    def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]]) -> bool:
        """
@@ -341,14 +431,11 @@ class SAM2Processor:
        
        logger.info(f"Processing segment {segment_idx} with SAM2")
        
-        # Create low-resolution video for inference
+        # Create low-resolution video for inference (async-aware)
        low_res_video_path = os.path.join(segment_dir, "low_res_video.mp4")
-        if not os.path.exists(low_res_video_path):
-            try:
-                self.create_low_res_video(video_path, low_res_video_path, inference_scale)
-            except Exception as e:
-                logger.error(f"Failed to create low-res video for segment {segment_idx}: {e}")
-                return None
+        if not self.ensure_low_res_video(video_path, low_res_video_path, inference_scale, segment_idx):
+            logger.error(f"Failed to create low-res video for segment {segment_idx}")
+            return None
        
        try:
            # Initialize inference state
@@ -387,13 +474,7 @@ class SAM2Processor:
            except Exception as e:
                logger.warning(f"Could not remove low-res video: {e}")
            
-            # Mark segment as completed (for resume capability)
-            try:
-                with open(output_done_file, 'w') as f:
-                    f.write(f"Segment {segment_idx} completed successfully\n")
-                logger.debug(f"Marked segment {segment_idx} as completed")
-            except Exception as e:
-                logger.warning(f"Could not create completion marker: {e}")
+
            
            return video_segments
            
@@ -698,14 +779,11 @@ class SAM2Processor:
            logger.error(f"Eye video not found: {eye_video_path}")
            return None
        
-        # Create low-resolution eye video for inference
+        # Create low-resolution eye video for inference (async-aware)
        low_res_eye_video_path = os.path.join(segment_dir, f"low_res_{eye_side}_eye_video.mp4")
-        if not os.path.exists(low_res_eye_video_path):
-            try:
-                self.create_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale)
-            except Exception as e:
-                logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}: {e}")
-                return None
+        if not self.ensure_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale, segment_idx):
+            logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}")
+            return None
        
        try:
            # Initialize inference state with eye-specific video