working a bit faster
This commit is contained in:
@@ -8,6 +8,7 @@ import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import logging
|
||||
import subprocess
|
||||
import gc
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from sam2.build_sam import build_sam2_video_predictor
|
||||
@@ -19,7 +20,8 @@ class SAM2Processor:
|
||||
"""Handles SAM2-based video segmentation for human tracking."""
|
||||
|
||||
def __init__(self, checkpoint_path: str, config_path: str, vos_optimized: bool = False,
|
||||
separate_eye_processing: bool = False, eye_overlap_pixels: int = 0):
|
||||
separate_eye_processing: bool = False, eye_overlap_pixels: int = 0,
|
||||
async_preprocessor=None):
|
||||
"""
|
||||
Initialize SAM2 processor.
|
||||
|
||||
@@ -29,11 +31,13 @@ class SAM2Processor:
|
||||
vos_optimized: Enable VOS optimization for speedup (requires PyTorch 2.5.1+)
|
||||
separate_eye_processing: Enable VR180 separate eye processing mode
|
||||
eye_overlap_pixels: Pixel overlap between eyes for blending
|
||||
async_preprocessor: Optional async preprocessor for background low-res video generation
|
||||
"""
|
||||
self.checkpoint_path = checkpoint_path
|
||||
self.config_path = config_path
|
||||
self.vos_optimized = vos_optimized
|
||||
self.separate_eye_processing = separate_eye_processing
|
||||
self.async_preprocessor = async_preprocessor
|
||||
self.predictor = None
|
||||
|
||||
# Initialize eye processor if separate eye processing is enabled
|
||||
@@ -120,13 +124,64 @@ class SAM2Processor:
|
||||
|
||||
def create_low_res_video(self, input_video_path: str, output_video_path: str, scale: float):
|
||||
"""
|
||||
Create a low-resolution version of the input video for inference.
|
||||
Create a low-resolution version of the input video for inference using FFmpeg
|
||||
with hardware acceleration for improved performance.
|
||||
|
||||
Args:
|
||||
input_video_path: Path to input video
|
||||
output_video_path: Path to output low-res video
|
||||
scale: Scale factor for resolution reduction
|
||||
"""
|
||||
try:
|
||||
# Get video properties using OpenCV
|
||||
cap = cv2.VideoCapture(input_video_path)
|
||||
if not cap.isOpened():
|
||||
raise ValueError(f"Could not open video: {input_video_path}")
|
||||
|
||||
original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
||||
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
cap.release()
|
||||
|
||||
target_width = int(original_width * scale)
|
||||
target_height = int(original_height * scale)
|
||||
|
||||
# Ensure dimensions are even, as required by many codecs
|
||||
target_width = target_width if target_width % 2 == 0 else target_width + 1
|
||||
target_height = target_height if target_height % 2 == 0 else target_height + 1
|
||||
|
||||
# Construct FFmpeg command with hardware acceleration
|
||||
command = [
|
||||
'ffmpeg',
|
||||
'-y',
|
||||
'-hwaccel', 'auto', # Auto-detect hardware acceleration
|
||||
'-i', input_video_path,
|
||||
'-vf', f'scale={target_width}:{target_height}',
|
||||
'-c:v', 'h264_nvenc', # Use NVIDIA's hardware encoder
|
||||
'-preset', 'fast',
|
||||
'-crf', '23',
|
||||
output_video_path
|
||||
]
|
||||
|
||||
logger.info(f"Executing FFmpeg command: {' '.join(command)}")
|
||||
|
||||
# Execute FFmpeg command
|
||||
process = subprocess.run(command, check=True, capture_output=True, text=True)
|
||||
|
||||
if process.returncode != 0:
|
||||
logger.error(f"FFmpeg failed with error: {process.stderr}")
|
||||
raise RuntimeError(f"FFmpeg process failed: {process.stderr}")
|
||||
|
||||
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
|
||||
|
||||
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
||||
logger.warning(f"Hardware-accelerated FFmpeg failed: {e}. Falling back to OpenCV.")
|
||||
# Fallback to original OpenCV implementation if FFmpeg fails
|
||||
self._create_low_res_video_opencv(input_video_path, output_video_path, scale)
|
||||
|
||||
def _create_low_res_video_opencv(self, input_video_path: str, output_video_path: str, scale: float):
|
||||
"""Original OpenCV-based implementation for creating low-resolution video."""
|
||||
cap = cv2.VideoCapture(input_video_path)
|
||||
if not cap.isOpened():
|
||||
raise ValueError(f"Could not open video: {input_video_path}")
|
||||
@@ -151,7 +206,42 @@ class SAM2Processor:
|
||||
cap.release()
|
||||
out.release()
|
||||
|
||||
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
|
||||
logger.info(f"Created low-res video with {frame_count} frames using OpenCV: {output_video_path}")
|
||||
|
||||
def ensure_low_res_video(self, input_video_path: str, output_video_path: str,
|
||||
scale: float, segment_idx: Optional[int] = None) -> bool:
|
||||
"""
|
||||
Ensure low-resolution video exists, using async preprocessor if available.
|
||||
|
||||
Args:
|
||||
input_video_path: Path to input video
|
||||
output_video_path: Path to output low-res video
|
||||
scale: Scale factor for resolution reduction
|
||||
segment_idx: Optional segment index for async coordination
|
||||
|
||||
Returns:
|
||||
True if low-res video is ready
|
||||
"""
|
||||
# Check if already exists
|
||||
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
|
||||
return True
|
||||
|
||||
# Use async preprocessor if available and segment index provided
|
||||
if self.async_preprocessor and segment_idx is not None:
|
||||
if self.async_preprocessor.is_segment_ready(segment_idx):
|
||||
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
|
||||
logger.debug(f"Async preprocessor provided segment {segment_idx}")
|
||||
return True
|
||||
else:
|
||||
logger.debug(f"Async preprocessor hasn't completed segment {segment_idx} yet")
|
||||
|
||||
# Fallback to synchronous creation
|
||||
try:
|
||||
self.create_low_res_video(input_video_path, output_video_path, scale)
|
||||
return os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create low-res video {output_video_path}: {e}")
|
||||
return False
|
||||
|
||||
def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]]) -> bool:
|
||||
"""
|
||||
@@ -341,14 +431,11 @@ class SAM2Processor:
|
||||
|
||||
logger.info(f"Processing segment {segment_idx} with SAM2")
|
||||
|
||||
# Create low-resolution video for inference
|
||||
# Create low-resolution video for inference (async-aware)
|
||||
low_res_video_path = os.path.join(segment_dir, "low_res_video.mp4")
|
||||
if not os.path.exists(low_res_video_path):
|
||||
try:
|
||||
self.create_low_res_video(video_path, low_res_video_path, inference_scale)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create low-res video for segment {segment_idx}: {e}")
|
||||
return None
|
||||
if not self.ensure_low_res_video(video_path, low_res_video_path, inference_scale, segment_idx):
|
||||
logger.error(f"Failed to create low-res video for segment {segment_idx}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Initialize inference state
|
||||
@@ -387,13 +474,7 @@ class SAM2Processor:
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not remove low-res video: {e}")
|
||||
|
||||
# Mark segment as completed (for resume capability)
|
||||
try:
|
||||
with open(output_done_file, 'w') as f:
|
||||
f.write(f"Segment {segment_idx} completed successfully\n")
|
||||
logger.debug(f"Marked segment {segment_idx} as completed")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not create completion marker: {e}")
|
||||
|
||||
|
||||
return video_segments
|
||||
|
||||
@@ -698,14 +779,11 @@ class SAM2Processor:
|
||||
logger.error(f"Eye video not found: {eye_video_path}")
|
||||
return None
|
||||
|
||||
# Create low-resolution eye video for inference
|
||||
# Create low-resolution eye video for inference (async-aware)
|
||||
low_res_eye_video_path = os.path.join(segment_dir, f"low_res_{eye_side}_eye_video.mp4")
|
||||
if not os.path.exists(low_res_eye_video_path):
|
||||
try:
|
||||
self.create_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}: {e}")
|
||||
return None
|
||||
if not self.ensure_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale, segment_idx):
|
||||
logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Initialize inference state with eye-specific video
|
||||
|
||||
Reference in New Issue
Block a user