working a bit faster

This commit is contained in:
2025-07-31 09:09:22 -07:00
parent 70044e1b10
commit 0057017ac4
5 changed files with 585 additions and 137 deletions

View File

@@ -0,0 +1,337 @@
"""
Async low-resolution video preprocessor for parallel processing optimization.
Creates low-resolution videos in background while main pipeline processes other segments.
"""
import os
import asyncio
import subprocess
import logging
import threading
from pathlib import Path
from typing import List, Dict, Any, Optional
from concurrent.futures import ThreadPoolExecutor
logger = logging.getLogger(__name__)
class AsyncLowResPreprocessor:
"""
Handles async pre-generation of low-resolution videos for SAM2 inference.
Uses FFmpeg subprocesses to bypass Python GIL limitations.
"""
def __init__(self, max_concurrent: int = 3, segments_ahead: int = 3, use_ffmpeg: bool = True):
"""
Initialize async preprocessor.
Args:
max_concurrent: Maximum number of concurrent FFmpeg processes
segments_ahead: How many segments to prepare in advance
use_ffmpeg: Use FFmpeg instead of OpenCV for better performance
"""
self.max_concurrent = max_concurrent
self.segments_ahead = segments_ahead
self.use_ffmpeg = use_ffmpeg
self.preparation_tasks = {} # segment_idx -> threading.Thread
self.completed_segments = set() # Track completed preparations
self.active_threads = [] # Track active background threads
logger.info(f"AsyncLowResPreprocessor initialized: max_concurrent={max_concurrent}, "
f"segments_ahead={segments_ahead}, use_ffmpeg={use_ffmpeg}")
async def create_lowres_ffmpeg(self, input_path: str, output_path: str, scale: float, semaphore: asyncio.Semaphore) -> bool:
"""
Create low-resolution video using FFmpeg (bypasses Python GIL).
Args:
input_path: Path to input video
output_path: Path to output low-res video
scale: Scale factor for resolution reduction
semaphore: Asyncio semaphore for limiting concurrent processes
Returns:
True if successful
"""
async with semaphore: # Limit concurrent FFmpeg processes
try:
# Ensure output directory exists
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# FFmpeg command for fast low-res video creation
cmd = [
'ffmpeg', '-y', # Overwrite output
'-i', input_path,
'-vf', f'scale=iw*{scale}:ih*{scale}',
'-c:v', 'libx264',
'-preset', 'ultrafast', # Fastest encoding
'-crf', '28', # Lower quality OK for inference
'-an', # No audio needed for inference
output_path
]
logger.debug(f"Starting FFmpeg low-res creation: {os.path.basename(input_path)} -> {os.path.basename(output_path)}")
# Run FFmpeg asynchronously
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.wait(), await proc.communicate()
if proc.returncode != 0:
stderr_text = stderr[1].decode() if stderr and len(stderr) > 1 else "Unknown error"
logger.error(f"FFmpeg failed for {input_path}: {stderr_text}")
return False
# Verify output file was created
if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
logger.error(f"FFmpeg output file missing or empty: {output_path}")
return False
logger.debug(f"FFmpeg low-res creation completed: {os.path.basename(output_path)}")
return True
except Exception as e:
logger.error(f"Error in FFmpeg low-res creation for {input_path}: {e}")
return False
def create_lowres_opencv(self, input_path: str, output_path: str, scale: float) -> bool:
"""
Fallback: Create low-resolution video using OpenCV (blocking operation).
Used when FFmpeg is not available or fails.
Args:
input_path: Path to input video
output_path: Path to output low-res video
scale: Scale factor for resolution reduction
Returns:
True if successful
"""
try:
import cv2
logger.debug(f"Creating low-res video with OpenCV: {os.path.basename(input_path)}")
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
logger.error(f"Could not open video with OpenCV: {input_path}")
return False
# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * scale)
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * scale)
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
# Ensure output directory exists
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Create video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
if not out.isOpened():
logger.error(f"Could not create video writer for: {output_path}")
cap.release()
return False
# Process frames
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
# Resize frame
low_res_frame = cv2.resize(frame, (frame_width, frame_height),
interpolation=cv2.INTER_LINEAR)
out.write(low_res_frame)
frame_count += 1
# Cleanup
cap.release()
out.release()
logger.debug(f"OpenCV low-res creation completed: {frame_count} frames -> {os.path.basename(output_path)}")
return True
except Exception as e:
logger.error(f"Error in OpenCV low-res creation for {input_path}: {e}")
return False
async def create_lowres_video_async(self, input_path: str, output_path: str, scale: float, semaphore: asyncio.Semaphore) -> bool:
"""
Create low-resolution video using the configured method (FFmpeg or OpenCV).
Args:
input_path: Path to input video
output_path: Path to output low-res video
scale: Scale factor for resolution reduction
semaphore: Asyncio semaphore for limiting concurrent processes
Returns:
True if successful
"""
# Skip if already exists
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
logger.debug(f"Low-res video already exists: {os.path.basename(output_path)}")
return True
if self.use_ffmpeg:
# Try FFmpeg first
success = await self.create_lowres_ffmpeg(input_path, output_path, scale, semaphore)
if success:
return True
logger.warning(f"FFmpeg failed for {input_path}, falling back to OpenCV")
# Fallback to OpenCV (run in thread pool to avoid blocking)
loop = asyncio.get_event_loop()
with ThreadPoolExecutor(max_workers=1) as executor:
success = await loop.run_in_executor(
executor, self.create_lowres_opencv, input_path, output_path, scale
)
return success
async def prepare_segment_lowres(self, segment_info: Dict[str, Any], scale: float,
separate_eye_processing: bool = False, semaphore: asyncio.Semaphore = None) -> bool:
"""
Prepare low-resolution videos for a segment (regular or eye-specific).
Args:
segment_info: Segment information dictionary
scale: Scale factor for resolution reduction
separate_eye_processing: Whether to prepare eye-specific videos
semaphore: Asyncio semaphore for limiting concurrent processes
Returns:
True if all videos were prepared successfully
"""
segment_idx = segment_info['index']
segment_dir = segment_info['directory']
try:
if separate_eye_processing:
# Prepare low-res videos for left and right eyes
success_left = success_right = True
left_eye_path = os.path.join(segment_dir, "left_eye.mp4")
right_eye_path = os.path.join(segment_dir, "right_eye.mp4")
if os.path.exists(left_eye_path):
lowres_left_path = os.path.join(segment_dir, "low_res_left_eye_video.mp4")
success_left = await self.create_lowres_video_async(left_eye_path, lowres_left_path, scale, semaphore)
if os.path.exists(right_eye_path):
lowres_right_path = os.path.join(segment_dir, "low_res_right_eye_video.mp4")
success_right = await self.create_lowres_video_async(right_eye_path, lowres_right_path, scale, semaphore)
success = success_left and success_right
if success:
logger.info(f"Pre-generated low-res eye videos for segment {segment_idx}")
else:
logger.warning(f"Failed to pre-generate some eye videos for segment {segment_idx}")
else:
# Prepare regular low-res video
input_path = segment_info['video_file']
lowres_path = os.path.join(segment_dir, "low_res_video.mp4")
success = await self.create_lowres_video_async(input_path, lowres_path, scale, semaphore)
if success:
logger.info(f"Pre-generated low-res video for segment {segment_idx}")
else:
logger.warning(f"Failed to pre-generate low-res video for segment {segment_idx}")
if success:
self.completed_segments.add(segment_idx)
return success
except Exception as e:
logger.error(f"Error preparing low-res videos for segment {segment_idx}: {e}")
return False
def start_background_preparation(self, segments_info: List[Dict[str, Any]], scale: float,
separate_eye_processing: bool = False, current_segment: int = 0):
"""
Start preparing upcoming segments in background using threads.
Args:
segments_info: List of all segment information
scale: Scale factor for resolution reduction
separate_eye_processing: Whether to prepare eye-specific videos
current_segment: Index of currently processing segment
"""
def background_worker():
"""Background thread worker that prepares upcoming segments."""
try:
# Prepare segments ahead of current processing
start_idx = current_segment + 1
end_idx = min(len(segments_info), start_idx + self.segments_ahead)
segments_to_prepare = []
for i in range(start_idx, end_idx):
if i not in self.completed_segments and i not in self.preparation_tasks:
segments_to_prepare.append((i, segments_info[i]))
if segments_to_prepare:
logger.info(f"Starting background preparation for {len(segments_to_prepare)} segments (indices {start_idx}-{end_idx-1})")
# Run async work in new event loop
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
# Create semaphore in this event loop
semaphore = asyncio.Semaphore(self.max_concurrent)
tasks = []
for segment_idx, segment_info in segments_to_prepare:
task = self.prepare_segment_lowres(segment_info, scale, separate_eye_processing, semaphore)
tasks.append(task)
# Run all preparation tasks
results = loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True))
# Mark completed segments
for i, (segment_idx, _) in enumerate(segments_to_prepare):
if i < len(results) and results[i] is True:
self.completed_segments.add(segment_idx)
logger.debug(f"Background preparation completed for segment {segment_idx}")
finally:
loop.close()
else:
logger.debug(f"No segments need preparation (current: {current_segment})")
except Exception as e:
logger.error(f"Error in background preparation worker: {e}")
# Start background thread
thread = threading.Thread(target=background_worker, daemon=True)
thread.start()
self.active_threads.append(thread)
def is_segment_ready(self, segment_idx: int) -> bool:
"""
Check if low-res videos for a segment are ready.
Args:
segment_idx: Index of segment to check
Returns:
True if segment is ready
"""
return segment_idx in self.completed_segments
def cleanup(self):
"""Clean up any running threads."""
# Note: daemon threads will be cleaned up automatically when main process exits
# We just clear our tracking structures
self.active_threads.clear()
self.preparation_tasks.clear()
logger.debug("AsyncLowResPreprocessor cleanup completed")

View File

@@ -459,41 +459,12 @@ class MaskProcessor:
return colored_mask return colored_mask
def _precompute_upscaled_masks(self, video_segments: Dict[int, Dict[int, np.ndarray]],
target_width: int, target_height: int) -> Dict[int, Dict[int, np.ndarray]]:
"""
Pre-compute all upscaled masks to avoid per-frame upscaling.
Args:
video_segments: Dictionary of frame masks from SAM2
target_width: Target frame width
target_height: Target frame height
Returns:
Dictionary with pre-upscaled masks
"""
logger.info(f"Pre-computing upscaled masks for {len(video_segments)} frames")
upscaled_segments = {}
for frame_idx, frame_masks in video_segments.items():
upscaled_frame_masks = {}
for obj_id, mask in frame_masks.items():
mask = mask.squeeze()
if mask.shape != (target_height, target_width):
upscaled_mask = cv2.resize(mask.astype(np.uint8),
(target_width, target_height),
interpolation=cv2.INTER_NEAREST)
upscaled_frame_masks[obj_id] = upscaled_mask
else:
upscaled_frame_masks[obj_id] = mask.astype(np.uint8)
upscaled_segments[frame_idx] = upscaled_frame_masks
logger.info(f"Pre-computed upscaled masks for {len(upscaled_segments)} frames")
return upscaled_segments
def process_and_save_output_video(self, video_path: str, output_video_path: str, def process_and_save_output_video(self, video_path: str, output_video_path: str,
video_segments: Dict[int, Dict[int, np.ndarray]], video_segments: Dict[int, Dict[int, np.ndarray]],
use_nvenc: bool = False, bitrate: str = "50M") -> bool: use_nvenc: bool = False, bitrate: str = "50M",
batch_size: int = 16) -> bool:
""" """
Process high-resolution frames, apply upscaled masks, and save the output video. Process high-resolution frames, apply upscaled masks, and save the output video.
@@ -503,6 +474,7 @@ class MaskProcessor:
video_segments: Dictionary of frame masks video_segments: Dictionary of frame masks
use_nvenc: Whether to use NVIDIA hardware encoding use_nvenc: Whether to use NVIDIA hardware encoding
bitrate: Output video bitrate bitrate: Output video bitrate
batch_size: Number of frames to process in a single batch
Returns: Returns:
True if successful True if successful
@@ -520,18 +492,15 @@ class MaskProcessor:
logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames") logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames")
# Pre-compute all upscaled masks (Phase 1 optimization)
upscaled_segments = self._precompute_upscaled_masks(video_segments, frame_width, frame_height)
# Setup VideoWriter # Setup VideoWriter
out_writer = None
if self.output_mode == "alpha_channel": if self.output_mode == "alpha_channel":
# For alpha channel, we need a codec that supports transparency
success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate) success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
if not success: if not success:
logger.error("Failed to setup alpha channel encoder") logger.error("Failed to setup alpha channel encoder")
cap.release() cap.release()
return False return False
use_nvenc = False # Override NVENC for alpha channel use_nvenc = False
elif use_nvenc: elif use_nvenc:
success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate) success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
if not success: if not success:
@@ -539,106 +508,101 @@ class MaskProcessor:
use_nvenc = False use_nvenc = False
if not use_nvenc and self.output_mode != "alpha_channel": if not use_nvenc and self.output_mode != "alpha_channel":
# Use OpenCV VideoWriter fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Use mp4v for better compatibility out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height)) if not out_writer.isOpened():
if not out.isOpened():
logger.error("Failed to create output video writer") logger.error("Failed to create output video writer")
cap.release() cap.release()
return False return False
# Process frames with batch reading (Phase 1 optimization) # Process frames in batches
frame_idx = 0 frame_idx = 0
processed_frames = 0 processed_frames = 0
batch_size = 10 # Process frames in batches for better I/O performance
frame_buffer = []
# Pre-fill frame buffer while frame_idx < total_frames:
for _ in range(min(batch_size, len(upscaled_segments))): batch_frames = []
ret, frame = cap.read() batch_masks = []
if ret:
frame_buffer.append(frame) # Read a batch of frames
else: for _ in range(batch_size):
ret, frame = cap.read()
if not ret:
break
batch_frames.append(frame)
if not batch_frames:
break break
buffer_idx = 0
while frame_idx < len(upscaled_segments) and buffer_idx < len(frame_buffer):
frame = frame_buffer[buffer_idx]
if frame_idx in upscaled_segments: # Get masks for the current batch and perform just-in-time upscaling
# Get pre-computed upscaled masks for this frame (Phase 1 optimization) for i in range(len(batch_frames)):
upscaled_masks = [upscaled_segments[frame_idx][obj_id] current_frame_idx = frame_idx + i
for obj_id in upscaled_segments[frame_idx]] if current_frame_idx in video_segments:
frame_masks = video_segments[current_frame_idx]
# Apply mask based on output mode (no upscaling needed - already done) upscaled_masks = []
if self.output_mode == "alpha_channel": for obj_id, mask in frame_masks.items():
result_frame = self.apply_mask_with_alpha(frame, upscaled_masks) mask = mask.squeeze()
if mask.shape != (frame_height, frame_width):
upscaled_mask = cv2.resize(mask.astype(np.uint8),
(frame_width, frame_height),
interpolation=cv2.INTER_NEAREST)
upscaled_masks.append(upscaled_mask)
else:
upscaled_masks.append(mask.astype(np.uint8))
batch_masks.append(upscaled_masks)
else: else:
result_frame = self.apply_green_mask(frame, upscaled_masks) batch_masks.append([]) # No masks for this frame
else:
# No mask for this frame
if self.output_mode == "alpha_channel":
# Create fully transparent frame for alpha channel mode
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
bgra_frame[:, :, 3] = 0 # Fully transparent
result_frame = bgra_frame
logger.warning(f"No mask for frame {frame_idx}, using transparent frame")
else:
# Use original frame for green screen mode
logger.warning(f"No mask for frame {frame_idx}, using original")
result_frame = frame
# Write frame # Process the batch
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'): result_batch = []
self.alpha_process.stdin.write(result_frame.tobytes()) for i, frame in enumerate(batch_frames):
elif use_nvenc and hasattr(self, 'nvenc_process'): masks = batch_masks[i]
self.nvenc_process.stdin.write(result_frame.tobytes()) if masks:
else: if self.output_mode == "alpha_channel":
out.write(result_frame) result_frame = self.apply_mask_with_alpha(frame, masks)
processed_frames += 1
frame_idx += 1
buffer_idx += 1
# Refill buffer when needed
if buffer_idx >= len(frame_buffer) and frame_idx < len(upscaled_segments):
frame_buffer.clear()
buffer_idx = 0
# Read next batch
for _ in range(min(batch_size, len(upscaled_segments) - frame_idx)):
ret, frame = cap.read()
if ret:
frame_buffer.append(frame)
else: else:
break result_frame = self.apply_green_mask(frame, masks)
else:
# No mask for this frame
if self.output_mode == "alpha_channel":
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
bgra_frame[:, :, 3] = 0
result_frame = bgra_frame
else:
result_frame = frame
result_batch.append(result_frame)
# Progress logging # Write the processed batch
if processed_frames % 100 == 0: for result_frame in result_batch:
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
self.alpha_process.stdin.write(result_frame.tobytes())
elif use_nvenc and hasattr(self, 'nvenc_process'):
self.nvenc_process.stdin.write(result_frame.tobytes())
else:
out_writer.write(result_frame)
processed_frames += len(batch_frames)
frame_idx += len(batch_frames)
if processed_frames % 100 < batch_size:
logger.info(f"Processed {processed_frames}/{total_frames} frames") logger.info(f"Processed {processed_frames}/{total_frames} frames")
# Cleanup # Cleanup
cap.release() cap.release()
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'): if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
self.alpha_process.stdin.close() self.alpha_process.stdin.close()
self.alpha_process.wait() self.alpha_process.wait()
if self.alpha_process.returncode != 0:
logger.error("Alpha channel encoding failed")
return False
elif use_nvenc and hasattr(self, 'nvenc_process'): elif use_nvenc and hasattr(self, 'nvenc_process'):
self.nvenc_process.stdin.close() self.nvenc_process.stdin.close()
self.nvenc_process.wait() self.nvenc_process.wait()
if self.nvenc_process.returncode != 0:
logger.error("NVENC encoding failed")
return False
else: else:
out.release() if out_writer:
out_writer.release()
logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}") logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}")
return True return True
except Exception as e: except Exception as e:
logger.error(f"Error processing video: {e}") logger.error(f"Error processing video: {e}", exc_info=True)
return False return False
def _setup_nvenc_encoder(self, output_path: str, width: int, height: int, def _setup_nvenc_encoder(self, output_path: str, width: int, height: int,
@@ -751,6 +715,14 @@ class MaskProcessor:
if success: if success:
logger.info(f"Successfully created {self.output_mode} video: {output_video}") logger.info(f"Successfully created {self.output_mode} video: {output_video}")
# Mark segment as completed only after video is successfully written
try:
output_done_file = os.path.join(segment_info['directory'], "output_frames_done")
with open(output_done_file, 'w') as f:
f.write(f"Segment {segment_info['index']} processed and saved successfully.")
logger.debug(f"Created completion marker for segment {segment_info['index']}")
except Exception as e:
logger.error(f"Failed to create completion marker for segment {segment_info['index']}: {e}")
else: else:
logger.error(f"Failed to process segment {segment_info['index']}") logger.error(f"Failed to process segment {segment_info['index']}")

View File

@@ -8,6 +8,7 @@ import cv2
import numpy as np import numpy as np
import torch import torch
import logging import logging
import subprocess
import gc import gc
from typing import Dict, List, Any, Optional, Tuple from typing import Dict, List, Any, Optional, Tuple
from sam2.build_sam import build_sam2_video_predictor from sam2.build_sam import build_sam2_video_predictor
@@ -19,7 +20,8 @@ class SAM2Processor:
"""Handles SAM2-based video segmentation for human tracking.""" """Handles SAM2-based video segmentation for human tracking."""
def __init__(self, checkpoint_path: str, config_path: str, vos_optimized: bool = False, def __init__(self, checkpoint_path: str, config_path: str, vos_optimized: bool = False,
separate_eye_processing: bool = False, eye_overlap_pixels: int = 0): separate_eye_processing: bool = False, eye_overlap_pixels: int = 0,
async_preprocessor=None):
""" """
Initialize SAM2 processor. Initialize SAM2 processor.
@@ -29,11 +31,13 @@ class SAM2Processor:
vos_optimized: Enable VOS optimization for speedup (requires PyTorch 2.5.1+) vos_optimized: Enable VOS optimization for speedup (requires PyTorch 2.5.1+)
separate_eye_processing: Enable VR180 separate eye processing mode separate_eye_processing: Enable VR180 separate eye processing mode
eye_overlap_pixels: Pixel overlap between eyes for blending eye_overlap_pixels: Pixel overlap between eyes for blending
async_preprocessor: Optional async preprocessor for background low-res video generation
""" """
self.checkpoint_path = checkpoint_path self.checkpoint_path = checkpoint_path
self.config_path = config_path self.config_path = config_path
self.vos_optimized = vos_optimized self.vos_optimized = vos_optimized
self.separate_eye_processing = separate_eye_processing self.separate_eye_processing = separate_eye_processing
self.async_preprocessor = async_preprocessor
self.predictor = None self.predictor = None
# Initialize eye processor if separate eye processing is enabled # Initialize eye processor if separate eye processing is enabled
@@ -120,13 +124,64 @@ class SAM2Processor:
def create_low_res_video(self, input_video_path: str, output_video_path: str, scale: float): def create_low_res_video(self, input_video_path: str, output_video_path: str, scale: float):
""" """
Create a low-resolution version of the input video for inference. Create a low-resolution version of the input video for inference using FFmpeg
with hardware acceleration for improved performance.
Args: Args:
input_video_path: Path to input video input_video_path: Path to input video
output_video_path: Path to output low-res video output_video_path: Path to output low-res video
scale: Scale factor for resolution reduction scale: Scale factor for resolution reduction
""" """
try:
# Get video properties using OpenCV
cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
raise ValueError(f"Could not open video: {input_video_path}")
original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
target_width = int(original_width * scale)
target_height = int(original_height * scale)
# Ensure dimensions are even, as required by many codecs
target_width = target_width if target_width % 2 == 0 else target_width + 1
target_height = target_height if target_height % 2 == 0 else target_height + 1
# Construct FFmpeg command with hardware acceleration
command = [
'ffmpeg',
'-y',
'-hwaccel', 'auto', # Auto-detect hardware acceleration
'-i', input_video_path,
'-vf', f'scale={target_width}:{target_height}',
'-c:v', 'h264_nvenc', # Use NVIDIA's hardware encoder
'-preset', 'fast',
'-crf', '23',
output_video_path
]
logger.info(f"Executing FFmpeg command: {' '.join(command)}")
# Execute FFmpeg command
process = subprocess.run(command, check=True, capture_output=True, text=True)
if process.returncode != 0:
logger.error(f"FFmpeg failed with error: {process.stderr}")
raise RuntimeError(f"FFmpeg process failed: {process.stderr}")
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
except (subprocess.CalledProcessError, FileNotFoundError) as e:
logger.warning(f"Hardware-accelerated FFmpeg failed: {e}. Falling back to OpenCV.")
# Fallback to original OpenCV implementation if FFmpeg fails
self._create_low_res_video_opencv(input_video_path, output_video_path, scale)
def _create_low_res_video_opencv(self, input_video_path: str, output_video_path: str, scale: float):
"""Original OpenCV-based implementation for creating low-resolution video."""
cap = cv2.VideoCapture(input_video_path) cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened(): if not cap.isOpened():
raise ValueError(f"Could not open video: {input_video_path}") raise ValueError(f"Could not open video: {input_video_path}")
@@ -151,7 +206,42 @@ class SAM2Processor:
cap.release() cap.release()
out.release() out.release()
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}") logger.info(f"Created low-res video with {frame_count} frames using OpenCV: {output_video_path}")
def ensure_low_res_video(self, input_video_path: str, output_video_path: str,
scale: float, segment_idx: Optional[int] = None) -> bool:
"""
Ensure low-resolution video exists, using async preprocessor if available.
Args:
input_video_path: Path to input video
output_video_path: Path to output low-res video
scale: Scale factor for resolution reduction
segment_idx: Optional segment index for async coordination
Returns:
True if low-res video is ready
"""
# Check if already exists
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
return True
# Use async preprocessor if available and segment index provided
if self.async_preprocessor and segment_idx is not None:
if self.async_preprocessor.is_segment_ready(segment_idx):
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
logger.debug(f"Async preprocessor provided segment {segment_idx}")
return True
else:
logger.debug(f"Async preprocessor hasn't completed segment {segment_idx} yet")
# Fallback to synchronous creation
try:
self.create_low_res_video(input_video_path, output_video_path, scale)
return os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0
except Exception as e:
logger.error(f"Failed to create low-res video {output_video_path}: {e}")
return False
def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]]) -> bool: def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]]) -> bool:
""" """
@@ -341,14 +431,11 @@ class SAM2Processor:
logger.info(f"Processing segment {segment_idx} with SAM2") logger.info(f"Processing segment {segment_idx} with SAM2")
# Create low-resolution video for inference # Create low-resolution video for inference (async-aware)
low_res_video_path = os.path.join(segment_dir, "low_res_video.mp4") low_res_video_path = os.path.join(segment_dir, "low_res_video.mp4")
if not os.path.exists(low_res_video_path): if not self.ensure_low_res_video(video_path, low_res_video_path, inference_scale, segment_idx):
try: logger.error(f"Failed to create low-res video for segment {segment_idx}")
self.create_low_res_video(video_path, low_res_video_path, inference_scale) return None
except Exception as e:
logger.error(f"Failed to create low-res video for segment {segment_idx}: {e}")
return None
try: try:
# Initialize inference state # Initialize inference state
@@ -387,13 +474,7 @@ class SAM2Processor:
except Exception as e: except Exception as e:
logger.warning(f"Could not remove low-res video: {e}") logger.warning(f"Could not remove low-res video: {e}")
# Mark segment as completed (for resume capability)
try:
with open(output_done_file, 'w') as f:
f.write(f"Segment {segment_idx} completed successfully\n")
logger.debug(f"Marked segment {segment_idx} as completed")
except Exception as e:
logger.warning(f"Could not create completion marker: {e}")
return video_segments return video_segments
@@ -698,14 +779,11 @@ class SAM2Processor:
logger.error(f"Eye video not found: {eye_video_path}") logger.error(f"Eye video not found: {eye_video_path}")
return None return None
# Create low-resolution eye video for inference # Create low-resolution eye video for inference (async-aware)
low_res_eye_video_path = os.path.join(segment_dir, f"low_res_{eye_side}_eye_video.mp4") low_res_eye_video_path = os.path.join(segment_dir, f"low_res_{eye_side}_eye_video.mp4")
if not os.path.exists(low_res_eye_video_path): if not self.ensure_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale, segment_idx):
try: logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}")
self.create_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale) return None
except Exception as e:
logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}: {e}")
return None
try: try:
# Initialize inference state with eye-specific video # Initialize inference state with eye-specific video

View File

@@ -44,6 +44,14 @@ class VideoSplitter:
segments_dir = os.path.join(output_dir, f"{video_name}_segments") segments_dir = os.path.join(output_dir, f"{video_name}_segments")
ensure_directory(segments_dir) ensure_directory(segments_dir)
# Check for completion marker to avoid re-splitting
completion_marker = os.path.join(segments_dir, ".splitting_done")
if os.path.exists(completion_marker):
logger.info(f"Video already split, skipping splitting process. Found completion marker: {completion_marker}")
segment_dirs = [d for d in os.listdir(segments_dir) if os.path.isdir(os.path.join(segments_dir, d)) and d.startswith("segment_")]
segment_dirs.sort(key=lambda x: int(x.split("_")[1]))
return segments_dir, segment_dirs
logger.info(f"Splitting video {input_video} into {self.segment_duration}s segments") logger.info(f"Splitting video {input_video} into {self.segment_duration}s segments")
# Split video using ffmpeg # Split video using ffmpeg
@@ -83,6 +91,11 @@ class VideoSplitter:
# Create file list for later concatenation # Create file list for later concatenation
self._create_file_list(segments_dir, segment_dirs) self._create_file_list(segments_dir, segment_dirs)
# Create completion marker
completion_marker = os.path.join(segments_dir, ".splitting_done")
with open(completion_marker, 'w') as f:
f.write("Video splitting completed successfully.")
logger.info(f"Successfully split video into {len(segment_dirs)} segments") logger.info(f"Successfully split video into {len(segment_dirs)} segments")
return segments_dir, segment_dirs return segments_dir, segment_dirs

54
main.py
View File

@@ -475,8 +475,8 @@ def process_segment_with_separate_eyes(segment_info, detector, sam2_processor, m
return success, left_masks, right_masks return success, left_masks, right_masks
def main(): async def main_async():
"""Main processing pipeline.""" """Main processing pipeline with async optimizations."""
args = parse_arguments() args = parse_arguments()
try: try:
@@ -568,6 +568,24 @@ def main():
eye_overlap_pixels = config.get('processing.eye_overlap_pixels', 0) eye_overlap_pixels = config.get('processing.eye_overlap_pixels', 0)
enable_greenscreen_fallback = config.get('processing.enable_greenscreen_fallback', True) enable_greenscreen_fallback = config.get('processing.enable_greenscreen_fallback', True)
# Initialize async preprocessor if enabled
async_preprocessor = None
if config.get('advanced.enable_background_lowres_generation', False):
from core.async_lowres_preprocessor import AsyncLowResPreprocessor
max_concurrent = config.get('advanced.max_concurrent_lowres', 3)
segments_ahead = config.get('advanced.lowres_segments_ahead', 3)
use_ffmpeg = config.get('advanced.use_ffmpeg_lowres', True)
async_preprocessor = AsyncLowResPreprocessor(
max_concurrent=max_concurrent,
segments_ahead=segments_ahead,
use_ffmpeg=use_ffmpeg
)
logger.info(f"Async low-res preprocessing: ENABLED (max_concurrent={max_concurrent}, segments_ahead={segments_ahead})")
else:
logger.info("Async low-res preprocessing: DISABLED")
if separate_eye_processing: if separate_eye_processing:
logger.info("VR180 Separate Eye Processing: ENABLED") logger.info("VR180 Separate Eye Processing: ENABLED")
logger.info(f"Eye overlap pixels: {eye_overlap_pixels}") logger.info(f"Eye overlap pixels: {eye_overlap_pixels}")
@@ -578,7 +596,8 @@ def main():
config_path=config.get_sam2_config(), config_path=config.get_sam2_config(),
vos_optimized=config.get('models.sam2_vos_optimized', False), vos_optimized=config.get('models.sam2_vos_optimized', False),
separate_eye_processing=separate_eye_processing, separate_eye_processing=separate_eye_processing,
eye_overlap_pixels=eye_overlap_pixels eye_overlap_pixels=eye_overlap_pixels,
async_preprocessor=async_preprocessor
) )
# Initialize mask processor with quality enhancements # Initialize mask processor with quality enhancements
@@ -593,6 +612,16 @@ def main():
logger.info("Step 4: Processing segments sequentially") logger.info("Step 4: Processing segments sequentially")
total_humans_detected = 0 total_humans_detected = 0
# Start background low-res video preprocessing if enabled
if async_preprocessor:
logger.info("Starting background low-res video preprocessing")
async_preprocessor.start_background_preparation(
segments_info,
config.get_inference_scale(),
separate_eye_processing,
current_segment=0
)
# Initialize previous masks for separate eye processing # Initialize previous masks for separate eye processing
previous_left_masks = None previous_left_masks = None
previous_right_masks = None previous_right_masks = None
@@ -602,6 +631,15 @@ def main():
logger.info(f"Processing segment {segment_idx}/{len(segments_info)-1}") logger.info(f"Processing segment {segment_idx}/{len(segments_info)-1}")
# Start background preparation for upcoming segments
if async_preprocessor and i < len(segments_info) - 1:
async_preprocessor.start_background_preparation(
segments_info,
config.get_inference_scale(),
separate_eye_processing,
current_segment=i
)
# Reset temporal history for new segment # Reset temporal history for new segment
mask_processor.reset_temporal_history() mask_processor.reset_temporal_history()
@@ -1012,6 +1050,16 @@ def main():
except Exception as e: except Exception as e:
logger.error(f"Pipeline failed: {e}", exc_info=True) logger.error(f"Pipeline failed: {e}", exc_info=True)
return 1 return 1
finally:
# Cleanup async preprocessor if it was used
if async_preprocessor:
async_preprocessor.cleanup()
logger.debug("Async preprocessor cleanup completed")
def main():
"""Main entry point - wrapper for async main."""
import asyncio
return asyncio.run(main_async())
if __name__ == "__main__": if __name__ == "__main__":
exit_code = main() exit_code = main()