working a bit faster
This commit is contained in:
337
core/async_lowres_preprocessor.py
Normal file
337
core/async_lowres_preprocessor.py
Normal file
@@ -0,0 +1,337 @@
|
||||
"""
|
||||
Async low-resolution video preprocessor for parallel processing optimization.
|
||||
Creates low-resolution videos in background while main pipeline processes other segments.
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import subprocess
|
||||
import logging
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AsyncLowResPreprocessor:
|
||||
"""
|
||||
Handles async pre-generation of low-resolution videos for SAM2 inference.
|
||||
Uses FFmpeg subprocesses to bypass Python GIL limitations.
|
||||
"""
|
||||
|
||||
def __init__(self, max_concurrent: int = 3, segments_ahead: int = 3, use_ffmpeg: bool = True):
|
||||
"""
|
||||
Initialize async preprocessor.
|
||||
|
||||
Args:
|
||||
max_concurrent: Maximum number of concurrent FFmpeg processes
|
||||
segments_ahead: How many segments to prepare in advance
|
||||
use_ffmpeg: Use FFmpeg instead of OpenCV for better performance
|
||||
"""
|
||||
self.max_concurrent = max_concurrent
|
||||
self.segments_ahead = segments_ahead
|
||||
self.use_ffmpeg = use_ffmpeg
|
||||
self.preparation_tasks = {} # segment_idx -> threading.Thread
|
||||
self.completed_segments = set() # Track completed preparations
|
||||
self.active_threads = [] # Track active background threads
|
||||
|
||||
logger.info(f"AsyncLowResPreprocessor initialized: max_concurrent={max_concurrent}, "
|
||||
f"segments_ahead={segments_ahead}, use_ffmpeg={use_ffmpeg}")
|
||||
|
||||
async def create_lowres_ffmpeg(self, input_path: str, output_path: str, scale: float, semaphore: asyncio.Semaphore) -> bool:
|
||||
"""
|
||||
Create low-resolution video using FFmpeg (bypasses Python GIL).
|
||||
|
||||
Args:
|
||||
input_path: Path to input video
|
||||
output_path: Path to output low-res video
|
||||
scale: Scale factor for resolution reduction
|
||||
semaphore: Asyncio semaphore for limiting concurrent processes
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
async with semaphore: # Limit concurrent FFmpeg processes
|
||||
try:
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# FFmpeg command for fast low-res video creation
|
||||
cmd = [
|
||||
'ffmpeg', '-y', # Overwrite output
|
||||
'-i', input_path,
|
||||
'-vf', f'scale=iw*{scale}:ih*{scale}',
|
||||
'-c:v', 'libx264',
|
||||
'-preset', 'ultrafast', # Fastest encoding
|
||||
'-crf', '28', # Lower quality OK for inference
|
||||
'-an', # No audio needed for inference
|
||||
output_path
|
||||
]
|
||||
|
||||
logger.debug(f"Starting FFmpeg low-res creation: {os.path.basename(input_path)} -> {os.path.basename(output_path)}")
|
||||
|
||||
# Run FFmpeg asynchronously
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.DEVNULL,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
stdout, stderr = await proc.wait(), await proc.communicate()
|
||||
|
||||
if proc.returncode != 0:
|
||||
stderr_text = stderr[1].decode() if stderr and len(stderr) > 1 else "Unknown error"
|
||||
logger.error(f"FFmpeg failed for {input_path}: {stderr_text}")
|
||||
return False
|
||||
|
||||
# Verify output file was created
|
||||
if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
|
||||
logger.error(f"FFmpeg output file missing or empty: {output_path}")
|
||||
return False
|
||||
|
||||
logger.debug(f"FFmpeg low-res creation completed: {os.path.basename(output_path)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in FFmpeg low-res creation for {input_path}: {e}")
|
||||
return False
|
||||
|
||||
def create_lowres_opencv(self, input_path: str, output_path: str, scale: float) -> bool:
|
||||
"""
|
||||
Fallback: Create low-resolution video using OpenCV (blocking operation).
|
||||
Used when FFmpeg is not available or fails.
|
||||
|
||||
Args:
|
||||
input_path: Path to input video
|
||||
output_path: Path to output low-res video
|
||||
scale: Scale factor for resolution reduction
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
import cv2
|
||||
|
||||
logger.debug(f"Creating low-res video with OpenCV: {os.path.basename(input_path)}")
|
||||
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
logger.error(f"Could not open video with OpenCV: {input_path}")
|
||||
return False
|
||||
|
||||
# Get video properties
|
||||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * scale)
|
||||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * scale)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Create video writer
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
|
||||
|
||||
if not out.isOpened():
|
||||
logger.error(f"Could not create video writer for: {output_path}")
|
||||
cap.release()
|
||||
return False
|
||||
|
||||
# Process frames
|
||||
frame_count = 0
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Resize frame
|
||||
low_res_frame = cv2.resize(frame, (frame_width, frame_height),
|
||||
interpolation=cv2.INTER_LINEAR)
|
||||
out.write(low_res_frame)
|
||||
frame_count += 1
|
||||
|
||||
# Cleanup
|
||||
cap.release()
|
||||
out.release()
|
||||
|
||||
logger.debug(f"OpenCV low-res creation completed: {frame_count} frames -> {os.path.basename(output_path)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in OpenCV low-res creation for {input_path}: {e}")
|
||||
return False
|
||||
|
||||
async def create_lowres_video_async(self, input_path: str, output_path: str, scale: float, semaphore: asyncio.Semaphore) -> bool:
|
||||
"""
|
||||
Create low-resolution video using the configured method (FFmpeg or OpenCV).
|
||||
|
||||
Args:
|
||||
input_path: Path to input video
|
||||
output_path: Path to output low-res video
|
||||
scale: Scale factor for resolution reduction
|
||||
semaphore: Asyncio semaphore for limiting concurrent processes
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
# Skip if already exists
|
||||
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
||||
logger.debug(f"Low-res video already exists: {os.path.basename(output_path)}")
|
||||
return True
|
||||
|
||||
if self.use_ffmpeg:
|
||||
# Try FFmpeg first
|
||||
success = await self.create_lowres_ffmpeg(input_path, output_path, scale, semaphore)
|
||||
if success:
|
||||
return True
|
||||
|
||||
logger.warning(f"FFmpeg failed for {input_path}, falling back to OpenCV")
|
||||
|
||||
# Fallback to OpenCV (run in thread pool to avoid blocking)
|
||||
loop = asyncio.get_event_loop()
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
success = await loop.run_in_executor(
|
||||
executor, self.create_lowres_opencv, input_path, output_path, scale
|
||||
)
|
||||
|
||||
return success
|
||||
|
||||
async def prepare_segment_lowres(self, segment_info: Dict[str, Any], scale: float,
|
||||
separate_eye_processing: bool = False, semaphore: asyncio.Semaphore = None) -> bool:
|
||||
"""
|
||||
Prepare low-resolution videos for a segment (regular or eye-specific).
|
||||
|
||||
Args:
|
||||
segment_info: Segment information dictionary
|
||||
scale: Scale factor for resolution reduction
|
||||
separate_eye_processing: Whether to prepare eye-specific videos
|
||||
semaphore: Asyncio semaphore for limiting concurrent processes
|
||||
|
||||
Returns:
|
||||
True if all videos were prepared successfully
|
||||
"""
|
||||
segment_idx = segment_info['index']
|
||||
segment_dir = segment_info['directory']
|
||||
|
||||
try:
|
||||
if separate_eye_processing:
|
||||
# Prepare low-res videos for left and right eyes
|
||||
success_left = success_right = True
|
||||
|
||||
left_eye_path = os.path.join(segment_dir, "left_eye.mp4")
|
||||
right_eye_path = os.path.join(segment_dir, "right_eye.mp4")
|
||||
|
||||
if os.path.exists(left_eye_path):
|
||||
lowres_left_path = os.path.join(segment_dir, "low_res_left_eye_video.mp4")
|
||||
success_left = await self.create_lowres_video_async(left_eye_path, lowres_left_path, scale, semaphore)
|
||||
|
||||
if os.path.exists(right_eye_path):
|
||||
lowres_right_path = os.path.join(segment_dir, "low_res_right_eye_video.mp4")
|
||||
success_right = await self.create_lowres_video_async(right_eye_path, lowres_right_path, scale, semaphore)
|
||||
|
||||
success = success_left and success_right
|
||||
if success:
|
||||
logger.info(f"Pre-generated low-res eye videos for segment {segment_idx}")
|
||||
else:
|
||||
logger.warning(f"Failed to pre-generate some eye videos for segment {segment_idx}")
|
||||
else:
|
||||
# Prepare regular low-res video
|
||||
input_path = segment_info['video_file']
|
||||
lowres_path = os.path.join(segment_dir, "low_res_video.mp4")
|
||||
|
||||
success = await self.create_lowres_video_async(input_path, lowres_path, scale, semaphore)
|
||||
if success:
|
||||
logger.info(f"Pre-generated low-res video for segment {segment_idx}")
|
||||
else:
|
||||
logger.warning(f"Failed to pre-generate low-res video for segment {segment_idx}")
|
||||
|
||||
if success:
|
||||
self.completed_segments.add(segment_idx)
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error preparing low-res videos for segment {segment_idx}: {e}")
|
||||
return False
|
||||
|
||||
def start_background_preparation(self, segments_info: List[Dict[str, Any]], scale: float,
|
||||
separate_eye_processing: bool = False, current_segment: int = 0):
|
||||
"""
|
||||
Start preparing upcoming segments in background using threads.
|
||||
|
||||
Args:
|
||||
segments_info: List of all segment information
|
||||
scale: Scale factor for resolution reduction
|
||||
separate_eye_processing: Whether to prepare eye-specific videos
|
||||
current_segment: Index of currently processing segment
|
||||
"""
|
||||
def background_worker():
|
||||
"""Background thread worker that prepares upcoming segments."""
|
||||
try:
|
||||
# Prepare segments ahead of current processing
|
||||
start_idx = current_segment + 1
|
||||
end_idx = min(len(segments_info), start_idx + self.segments_ahead)
|
||||
|
||||
segments_to_prepare = []
|
||||
for i in range(start_idx, end_idx):
|
||||
if i not in self.completed_segments and i not in self.preparation_tasks:
|
||||
segments_to_prepare.append((i, segments_info[i]))
|
||||
|
||||
if segments_to_prepare:
|
||||
logger.info(f"Starting background preparation for {len(segments_to_prepare)} segments (indices {start_idx}-{end_idx-1})")
|
||||
|
||||
# Run async work in new event loop
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
# Create semaphore in this event loop
|
||||
semaphore = asyncio.Semaphore(self.max_concurrent)
|
||||
|
||||
tasks = []
|
||||
for segment_idx, segment_info in segments_to_prepare:
|
||||
task = self.prepare_segment_lowres(segment_info, scale, separate_eye_processing, semaphore)
|
||||
tasks.append(task)
|
||||
|
||||
# Run all preparation tasks
|
||||
results = loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True))
|
||||
|
||||
# Mark completed segments
|
||||
for i, (segment_idx, _) in enumerate(segments_to_prepare):
|
||||
if i < len(results) and results[i] is True:
|
||||
self.completed_segments.add(segment_idx)
|
||||
logger.debug(f"Background preparation completed for segment {segment_idx}")
|
||||
|
||||
finally:
|
||||
loop.close()
|
||||
else:
|
||||
logger.debug(f"No segments need preparation (current: {current_segment})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background preparation worker: {e}")
|
||||
|
||||
# Start background thread
|
||||
thread = threading.Thread(target=background_worker, daemon=True)
|
||||
thread.start()
|
||||
self.active_threads.append(thread)
|
||||
|
||||
def is_segment_ready(self, segment_idx: int) -> bool:
|
||||
"""
|
||||
Check if low-res videos for a segment are ready.
|
||||
|
||||
Args:
|
||||
segment_idx: Index of segment to check
|
||||
|
||||
Returns:
|
||||
True if segment is ready
|
||||
"""
|
||||
return segment_idx in self.completed_segments
|
||||
|
||||
def cleanup(self):
|
||||
"""Clean up any running threads."""
|
||||
# Note: daemon threads will be cleaned up automatically when main process exits
|
||||
# We just clear our tracking structures
|
||||
self.active_threads.clear()
|
||||
self.preparation_tasks.clear()
|
||||
|
||||
logger.debug("AsyncLowResPreprocessor cleanup completed")
|
||||
@@ -459,41 +459,12 @@ class MaskProcessor:
|
||||
|
||||
return colored_mask
|
||||
|
||||
def _precompute_upscaled_masks(self, video_segments: Dict[int, Dict[int, np.ndarray]],
|
||||
target_width: int, target_height: int) -> Dict[int, Dict[int, np.ndarray]]:
|
||||
"""
|
||||
Pre-compute all upscaled masks to avoid per-frame upscaling.
|
||||
|
||||
Args:
|
||||
video_segments: Dictionary of frame masks from SAM2
|
||||
target_width: Target frame width
|
||||
target_height: Target frame height
|
||||
|
||||
Returns:
|
||||
Dictionary with pre-upscaled masks
|
||||
"""
|
||||
logger.info(f"Pre-computing upscaled masks for {len(video_segments)} frames")
|
||||
upscaled_segments = {}
|
||||
|
||||
for frame_idx, frame_masks in video_segments.items():
|
||||
upscaled_frame_masks = {}
|
||||
for obj_id, mask in frame_masks.items():
|
||||
mask = mask.squeeze()
|
||||
if mask.shape != (target_height, target_width):
|
||||
upscaled_mask = cv2.resize(mask.astype(np.uint8),
|
||||
(target_width, target_height),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
upscaled_frame_masks[obj_id] = upscaled_mask
|
||||
else:
|
||||
upscaled_frame_masks[obj_id] = mask.astype(np.uint8)
|
||||
upscaled_segments[frame_idx] = upscaled_frame_masks
|
||||
|
||||
logger.info(f"Pre-computed upscaled masks for {len(upscaled_segments)} frames")
|
||||
return upscaled_segments
|
||||
|
||||
|
||||
def process_and_save_output_video(self, video_path: str, output_video_path: str,
|
||||
video_segments: Dict[int, Dict[int, np.ndarray]],
|
||||
use_nvenc: bool = False, bitrate: str = "50M") -> bool:
|
||||
use_nvenc: bool = False, bitrate: str = "50M",
|
||||
batch_size: int = 16) -> bool:
|
||||
"""
|
||||
Process high-resolution frames, apply upscaled masks, and save the output video.
|
||||
|
||||
@@ -503,6 +474,7 @@ class MaskProcessor:
|
||||
video_segments: Dictionary of frame masks
|
||||
use_nvenc: Whether to use NVIDIA hardware encoding
|
||||
bitrate: Output video bitrate
|
||||
batch_size: Number of frames to process in a single batch
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
@@ -520,18 +492,15 @@ class MaskProcessor:
|
||||
|
||||
logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames")
|
||||
|
||||
# Pre-compute all upscaled masks (Phase 1 optimization)
|
||||
upscaled_segments = self._precompute_upscaled_masks(video_segments, frame_width, frame_height)
|
||||
|
||||
# Setup VideoWriter
|
||||
out_writer = None
|
||||
if self.output_mode == "alpha_channel":
|
||||
# For alpha channel, we need a codec that supports transparency
|
||||
success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
|
||||
if not success:
|
||||
logger.error("Failed to setup alpha channel encoder")
|
||||
cap.release()
|
||||
return False
|
||||
use_nvenc = False # Override NVENC for alpha channel
|
||||
use_nvenc = False
|
||||
elif use_nvenc:
|
||||
success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
|
||||
if not success:
|
||||
@@ -539,106 +508,101 @@ class MaskProcessor:
|
||||
use_nvenc = False
|
||||
|
||||
if not use_nvenc and self.output_mode != "alpha_channel":
|
||||
# Use OpenCV VideoWriter
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Use mp4v for better compatibility
|
||||
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
|
||||
if not out.isOpened():
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
|
||||
if not out_writer.isOpened():
|
||||
logger.error("Failed to create output video writer")
|
||||
cap.release()
|
||||
return False
|
||||
|
||||
# Process frames with batch reading (Phase 1 optimization)
|
||||
|
||||
# Process frames in batches
|
||||
frame_idx = 0
|
||||
processed_frames = 0
|
||||
batch_size = 10 # Process frames in batches for better I/O performance
|
||||
frame_buffer = []
|
||||
|
||||
# Pre-fill frame buffer
|
||||
for _ in range(min(batch_size, len(upscaled_segments))):
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
frame_buffer.append(frame)
|
||||
else:
|
||||
while frame_idx < total_frames:
|
||||
batch_frames = []
|
||||
batch_masks = []
|
||||
|
||||
# Read a batch of frames
|
||||
for _ in range(batch_size):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
batch_frames.append(frame)
|
||||
|
||||
if not batch_frames:
|
||||
break
|
||||
|
||||
buffer_idx = 0
|
||||
while frame_idx < len(upscaled_segments) and buffer_idx < len(frame_buffer):
|
||||
frame = frame_buffer[buffer_idx]
|
||||
|
||||
if frame_idx in upscaled_segments:
|
||||
# Get pre-computed upscaled masks for this frame (Phase 1 optimization)
|
||||
upscaled_masks = [upscaled_segments[frame_idx][obj_id]
|
||||
for obj_id in upscaled_segments[frame_idx]]
|
||||
|
||||
# Apply mask based on output mode (no upscaling needed - already done)
|
||||
if self.output_mode == "alpha_channel":
|
||||
result_frame = self.apply_mask_with_alpha(frame, upscaled_masks)
|
||||
# Get masks for the current batch and perform just-in-time upscaling
|
||||
for i in range(len(batch_frames)):
|
||||
current_frame_idx = frame_idx + i
|
||||
if current_frame_idx in video_segments:
|
||||
frame_masks = video_segments[current_frame_idx]
|
||||
upscaled_masks = []
|
||||
for obj_id, mask in frame_masks.items():
|
||||
mask = mask.squeeze()
|
||||
if mask.shape != (frame_height, frame_width):
|
||||
upscaled_mask = cv2.resize(mask.astype(np.uint8),
|
||||
(frame_width, frame_height),
|
||||
interpolation=cv2.INTER_NEAREST)
|
||||
upscaled_masks.append(upscaled_mask)
|
||||
else:
|
||||
upscaled_masks.append(mask.astype(np.uint8))
|
||||
batch_masks.append(upscaled_masks)
|
||||
else:
|
||||
result_frame = self.apply_green_mask(frame, upscaled_masks)
|
||||
else:
|
||||
# No mask for this frame
|
||||
if self.output_mode == "alpha_channel":
|
||||
# Create fully transparent frame for alpha channel mode
|
||||
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
|
||||
bgra_frame[:, :, 3] = 0 # Fully transparent
|
||||
result_frame = bgra_frame
|
||||
logger.warning(f"No mask for frame {frame_idx}, using transparent frame")
|
||||
else:
|
||||
# Use original frame for green screen mode
|
||||
logger.warning(f"No mask for frame {frame_idx}, using original")
|
||||
result_frame = frame
|
||||
batch_masks.append([]) # No masks for this frame
|
||||
|
||||
# Write frame
|
||||
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
|
||||
self.alpha_process.stdin.write(result_frame.tobytes())
|
||||
elif use_nvenc and hasattr(self, 'nvenc_process'):
|
||||
self.nvenc_process.stdin.write(result_frame.tobytes())
|
||||
else:
|
||||
out.write(result_frame)
|
||||
|
||||
processed_frames += 1
|
||||
frame_idx += 1
|
||||
buffer_idx += 1
|
||||
|
||||
# Refill buffer when needed
|
||||
if buffer_idx >= len(frame_buffer) and frame_idx < len(upscaled_segments):
|
||||
frame_buffer.clear()
|
||||
buffer_idx = 0
|
||||
# Read next batch
|
||||
for _ in range(min(batch_size, len(upscaled_segments) - frame_idx)):
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
frame_buffer.append(frame)
|
||||
# Process the batch
|
||||
result_batch = []
|
||||
for i, frame in enumerate(batch_frames):
|
||||
masks = batch_masks[i]
|
||||
if masks:
|
||||
if self.output_mode == "alpha_channel":
|
||||
result_frame = self.apply_mask_with_alpha(frame, masks)
|
||||
else:
|
||||
break
|
||||
result_frame = self.apply_green_mask(frame, masks)
|
||||
else:
|
||||
# No mask for this frame
|
||||
if self.output_mode == "alpha_channel":
|
||||
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
|
||||
bgra_frame[:, :, 3] = 0
|
||||
result_frame = bgra_frame
|
||||
else:
|
||||
result_frame = frame
|
||||
result_batch.append(result_frame)
|
||||
|
||||
# Progress logging
|
||||
if processed_frames % 100 == 0:
|
||||
# Write the processed batch
|
||||
for result_frame in result_batch:
|
||||
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
|
||||
self.alpha_process.stdin.write(result_frame.tobytes())
|
||||
elif use_nvenc and hasattr(self, 'nvenc_process'):
|
||||
self.nvenc_process.stdin.write(result_frame.tobytes())
|
||||
else:
|
||||
out_writer.write(result_frame)
|
||||
|
||||
processed_frames += len(batch_frames)
|
||||
frame_idx += len(batch_frames)
|
||||
|
||||
if processed_frames % 100 < batch_size:
|
||||
logger.info(f"Processed {processed_frames}/{total_frames} frames")
|
||||
|
||||
|
||||
# Cleanup
|
||||
cap.release()
|
||||
|
||||
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
|
||||
self.alpha_process.stdin.close()
|
||||
self.alpha_process.wait()
|
||||
if self.alpha_process.returncode != 0:
|
||||
logger.error("Alpha channel encoding failed")
|
||||
return False
|
||||
elif use_nvenc and hasattr(self, 'nvenc_process'):
|
||||
self.nvenc_process.stdin.close()
|
||||
self.nvenc_process.wait()
|
||||
if self.nvenc_process.returncode != 0:
|
||||
logger.error("NVENC encoding failed")
|
||||
return False
|
||||
else:
|
||||
out.release()
|
||||
if out_writer:
|
||||
out_writer.release()
|
||||
|
||||
logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing video: {e}")
|
||||
logger.error(f"Error processing video: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _setup_nvenc_encoder(self, output_path: str, width: int, height: int,
|
||||
@@ -751,6 +715,14 @@ class MaskProcessor:
|
||||
|
||||
if success:
|
||||
logger.info(f"Successfully created {self.output_mode} video: {output_video}")
|
||||
# Mark segment as completed only after video is successfully written
|
||||
try:
|
||||
output_done_file = os.path.join(segment_info['directory'], "output_frames_done")
|
||||
with open(output_done_file, 'w') as f:
|
||||
f.write(f"Segment {segment_info['index']} processed and saved successfully.")
|
||||
logger.debug(f"Created completion marker for segment {segment_info['index']}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create completion marker for segment {segment_info['index']}: {e}")
|
||||
else:
|
||||
logger.error(f"Failed to process segment {segment_info['index']}")
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import logging
|
||||
import subprocess
|
||||
import gc
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from sam2.build_sam import build_sam2_video_predictor
|
||||
@@ -19,7 +20,8 @@ class SAM2Processor:
|
||||
"""Handles SAM2-based video segmentation for human tracking."""
|
||||
|
||||
def __init__(self, checkpoint_path: str, config_path: str, vos_optimized: bool = False,
|
||||
separate_eye_processing: bool = False, eye_overlap_pixels: int = 0):
|
||||
separate_eye_processing: bool = False, eye_overlap_pixels: int = 0,
|
||||
async_preprocessor=None):
|
||||
"""
|
||||
Initialize SAM2 processor.
|
||||
|
||||
@@ -29,11 +31,13 @@ class SAM2Processor:
|
||||
vos_optimized: Enable VOS optimization for speedup (requires PyTorch 2.5.1+)
|
||||
separate_eye_processing: Enable VR180 separate eye processing mode
|
||||
eye_overlap_pixels: Pixel overlap between eyes for blending
|
||||
async_preprocessor: Optional async preprocessor for background low-res video generation
|
||||
"""
|
||||
self.checkpoint_path = checkpoint_path
|
||||
self.config_path = config_path
|
||||
self.vos_optimized = vos_optimized
|
||||
self.separate_eye_processing = separate_eye_processing
|
||||
self.async_preprocessor = async_preprocessor
|
||||
self.predictor = None
|
||||
|
||||
# Initialize eye processor if separate eye processing is enabled
|
||||
@@ -120,13 +124,64 @@ class SAM2Processor:
|
||||
|
||||
def create_low_res_video(self, input_video_path: str, output_video_path: str, scale: float):
|
||||
"""
|
||||
Create a low-resolution version of the input video for inference.
|
||||
Create a low-resolution version of the input video for inference using FFmpeg
|
||||
with hardware acceleration for improved performance.
|
||||
|
||||
Args:
|
||||
input_video_path: Path to input video
|
||||
output_video_path: Path to output low-res video
|
||||
scale: Scale factor for resolution reduction
|
||||
"""
|
||||
try:
|
||||
# Get video properties using OpenCV
|
||||
cap = cv2.VideoCapture(input_video_path)
|
||||
if not cap.isOpened():
|
||||
raise ValueError(f"Could not open video: {input_video_path}")
|
||||
|
||||
original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
||||
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
cap.release()
|
||||
|
||||
target_width = int(original_width * scale)
|
||||
target_height = int(original_height * scale)
|
||||
|
||||
# Ensure dimensions are even, as required by many codecs
|
||||
target_width = target_width if target_width % 2 == 0 else target_width + 1
|
||||
target_height = target_height if target_height % 2 == 0 else target_height + 1
|
||||
|
||||
# Construct FFmpeg command with hardware acceleration
|
||||
command = [
|
||||
'ffmpeg',
|
||||
'-y',
|
||||
'-hwaccel', 'auto', # Auto-detect hardware acceleration
|
||||
'-i', input_video_path,
|
||||
'-vf', f'scale={target_width}:{target_height}',
|
||||
'-c:v', 'h264_nvenc', # Use NVIDIA's hardware encoder
|
||||
'-preset', 'fast',
|
||||
'-crf', '23',
|
||||
output_video_path
|
||||
]
|
||||
|
||||
logger.info(f"Executing FFmpeg command: {' '.join(command)}")
|
||||
|
||||
# Execute FFmpeg command
|
||||
process = subprocess.run(command, check=True, capture_output=True, text=True)
|
||||
|
||||
if process.returncode != 0:
|
||||
logger.error(f"FFmpeg failed with error: {process.stderr}")
|
||||
raise RuntimeError(f"FFmpeg process failed: {process.stderr}")
|
||||
|
||||
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
|
||||
|
||||
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
||||
logger.warning(f"Hardware-accelerated FFmpeg failed: {e}. Falling back to OpenCV.")
|
||||
# Fallback to original OpenCV implementation if FFmpeg fails
|
||||
self._create_low_res_video_opencv(input_video_path, output_video_path, scale)
|
||||
|
||||
def _create_low_res_video_opencv(self, input_video_path: str, output_video_path: str, scale: float):
|
||||
"""Original OpenCV-based implementation for creating low-resolution video."""
|
||||
cap = cv2.VideoCapture(input_video_path)
|
||||
if not cap.isOpened():
|
||||
raise ValueError(f"Could not open video: {input_video_path}")
|
||||
@@ -151,7 +206,42 @@ class SAM2Processor:
|
||||
cap.release()
|
||||
out.release()
|
||||
|
||||
logger.info(f"Created low-res video with {frame_count} frames: {output_video_path}")
|
||||
logger.info(f"Created low-res video with {frame_count} frames using OpenCV: {output_video_path}")
|
||||
|
||||
def ensure_low_res_video(self, input_video_path: str, output_video_path: str,
|
||||
scale: float, segment_idx: Optional[int] = None) -> bool:
|
||||
"""
|
||||
Ensure low-resolution video exists, using async preprocessor if available.
|
||||
|
||||
Args:
|
||||
input_video_path: Path to input video
|
||||
output_video_path: Path to output low-res video
|
||||
scale: Scale factor for resolution reduction
|
||||
segment_idx: Optional segment index for async coordination
|
||||
|
||||
Returns:
|
||||
True if low-res video is ready
|
||||
"""
|
||||
# Check if already exists
|
||||
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
|
||||
return True
|
||||
|
||||
# Use async preprocessor if available and segment index provided
|
||||
if self.async_preprocessor and segment_idx is not None:
|
||||
if self.async_preprocessor.is_segment_ready(segment_idx):
|
||||
if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
|
||||
logger.debug(f"Async preprocessor provided segment {segment_idx}")
|
||||
return True
|
||||
else:
|
||||
logger.debug(f"Async preprocessor hasn't completed segment {segment_idx} yet")
|
||||
|
||||
# Fallback to synchronous creation
|
||||
try:
|
||||
self.create_low_res_video(input_video_path, output_video_path, scale)
|
||||
return os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create low-res video {output_video_path}: {e}")
|
||||
return False
|
||||
|
||||
def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]]) -> bool:
|
||||
"""
|
||||
@@ -341,14 +431,11 @@ class SAM2Processor:
|
||||
|
||||
logger.info(f"Processing segment {segment_idx} with SAM2")
|
||||
|
||||
# Create low-resolution video for inference
|
||||
# Create low-resolution video for inference (async-aware)
|
||||
low_res_video_path = os.path.join(segment_dir, "low_res_video.mp4")
|
||||
if not os.path.exists(low_res_video_path):
|
||||
try:
|
||||
self.create_low_res_video(video_path, low_res_video_path, inference_scale)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create low-res video for segment {segment_idx}: {e}")
|
||||
return None
|
||||
if not self.ensure_low_res_video(video_path, low_res_video_path, inference_scale, segment_idx):
|
||||
logger.error(f"Failed to create low-res video for segment {segment_idx}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Initialize inference state
|
||||
@@ -387,13 +474,7 @@ class SAM2Processor:
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not remove low-res video: {e}")
|
||||
|
||||
# Mark segment as completed (for resume capability)
|
||||
try:
|
||||
with open(output_done_file, 'w') as f:
|
||||
f.write(f"Segment {segment_idx} completed successfully\n")
|
||||
logger.debug(f"Marked segment {segment_idx} as completed")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not create completion marker: {e}")
|
||||
|
||||
|
||||
return video_segments
|
||||
|
||||
@@ -698,14 +779,11 @@ class SAM2Processor:
|
||||
logger.error(f"Eye video not found: {eye_video_path}")
|
||||
return None
|
||||
|
||||
# Create low-resolution eye video for inference
|
||||
# Create low-resolution eye video for inference (async-aware)
|
||||
low_res_eye_video_path = os.path.join(segment_dir, f"low_res_{eye_side}_eye_video.mp4")
|
||||
if not os.path.exists(low_res_eye_video_path):
|
||||
try:
|
||||
self.create_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}: {e}")
|
||||
return None
|
||||
if not self.ensure_low_res_video(eye_video_path, low_res_eye_video_path, inference_scale, segment_idx):
|
||||
logger.error(f"Failed to create low-res {eye_side} eye video for segment {segment_idx}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Initialize inference state with eye-specific video
|
||||
|
||||
@@ -44,6 +44,14 @@ class VideoSplitter:
|
||||
segments_dir = os.path.join(output_dir, f"{video_name}_segments")
|
||||
ensure_directory(segments_dir)
|
||||
|
||||
# Check for completion marker to avoid re-splitting
|
||||
completion_marker = os.path.join(segments_dir, ".splitting_done")
|
||||
if os.path.exists(completion_marker):
|
||||
logger.info(f"Video already split, skipping splitting process. Found completion marker: {completion_marker}")
|
||||
segment_dirs = [d for d in os.listdir(segments_dir) if os.path.isdir(os.path.join(segments_dir, d)) and d.startswith("segment_")]
|
||||
segment_dirs.sort(key=lambda x: int(x.split("_")[1]))
|
||||
return segments_dir, segment_dirs
|
||||
|
||||
logger.info(f"Splitting video {input_video} into {self.segment_duration}s segments")
|
||||
|
||||
# Split video using ffmpeg
|
||||
@@ -83,6 +91,11 @@ class VideoSplitter:
|
||||
# Create file list for later concatenation
|
||||
self._create_file_list(segments_dir, segment_dirs)
|
||||
|
||||
# Create completion marker
|
||||
completion_marker = os.path.join(segments_dir, ".splitting_done")
|
||||
with open(completion_marker, 'w') as f:
|
||||
f.write("Video splitting completed successfully.")
|
||||
|
||||
logger.info(f"Successfully split video into {len(segment_dirs)} segments")
|
||||
return segments_dir, segment_dirs
|
||||
|
||||
|
||||
54
main.py
54
main.py
@@ -475,8 +475,8 @@ def process_segment_with_separate_eyes(segment_info, detector, sam2_processor, m
|
||||
|
||||
return success, left_masks, right_masks
|
||||
|
||||
def main():
|
||||
"""Main processing pipeline."""
|
||||
async def main_async():
|
||||
"""Main processing pipeline with async optimizations."""
|
||||
args = parse_arguments()
|
||||
|
||||
try:
|
||||
@@ -568,6 +568,24 @@ def main():
|
||||
eye_overlap_pixels = config.get('processing.eye_overlap_pixels', 0)
|
||||
enable_greenscreen_fallback = config.get('processing.enable_greenscreen_fallback', True)
|
||||
|
||||
# Initialize async preprocessor if enabled
|
||||
async_preprocessor = None
|
||||
if config.get('advanced.enable_background_lowres_generation', False):
|
||||
from core.async_lowres_preprocessor import AsyncLowResPreprocessor
|
||||
|
||||
max_concurrent = config.get('advanced.max_concurrent_lowres', 3)
|
||||
segments_ahead = config.get('advanced.lowres_segments_ahead', 3)
|
||||
use_ffmpeg = config.get('advanced.use_ffmpeg_lowres', True)
|
||||
|
||||
async_preprocessor = AsyncLowResPreprocessor(
|
||||
max_concurrent=max_concurrent,
|
||||
segments_ahead=segments_ahead,
|
||||
use_ffmpeg=use_ffmpeg
|
||||
)
|
||||
logger.info(f"Async low-res preprocessing: ENABLED (max_concurrent={max_concurrent}, segments_ahead={segments_ahead})")
|
||||
else:
|
||||
logger.info("Async low-res preprocessing: DISABLED")
|
||||
|
||||
if separate_eye_processing:
|
||||
logger.info("VR180 Separate Eye Processing: ENABLED")
|
||||
logger.info(f"Eye overlap pixels: {eye_overlap_pixels}")
|
||||
@@ -578,7 +596,8 @@ def main():
|
||||
config_path=config.get_sam2_config(),
|
||||
vos_optimized=config.get('models.sam2_vos_optimized', False),
|
||||
separate_eye_processing=separate_eye_processing,
|
||||
eye_overlap_pixels=eye_overlap_pixels
|
||||
eye_overlap_pixels=eye_overlap_pixels,
|
||||
async_preprocessor=async_preprocessor
|
||||
)
|
||||
|
||||
# Initialize mask processor with quality enhancements
|
||||
@@ -593,6 +612,16 @@ def main():
|
||||
logger.info("Step 4: Processing segments sequentially")
|
||||
total_humans_detected = 0
|
||||
|
||||
# Start background low-res video preprocessing if enabled
|
||||
if async_preprocessor:
|
||||
logger.info("Starting background low-res video preprocessing")
|
||||
async_preprocessor.start_background_preparation(
|
||||
segments_info,
|
||||
config.get_inference_scale(),
|
||||
separate_eye_processing,
|
||||
current_segment=0
|
||||
)
|
||||
|
||||
# Initialize previous masks for separate eye processing
|
||||
previous_left_masks = None
|
||||
previous_right_masks = None
|
||||
@@ -602,6 +631,15 @@ def main():
|
||||
|
||||
logger.info(f"Processing segment {segment_idx}/{len(segments_info)-1}")
|
||||
|
||||
# Start background preparation for upcoming segments
|
||||
if async_preprocessor and i < len(segments_info) - 1:
|
||||
async_preprocessor.start_background_preparation(
|
||||
segments_info,
|
||||
config.get_inference_scale(),
|
||||
separate_eye_processing,
|
||||
current_segment=i
|
||||
)
|
||||
|
||||
# Reset temporal history for new segment
|
||||
mask_processor.reset_temporal_history()
|
||||
|
||||
@@ -1012,6 +1050,16 @@ def main():
|
||||
except Exception as e:
|
||||
logger.error(f"Pipeline failed: {e}", exc_info=True)
|
||||
return 1
|
||||
finally:
|
||||
# Cleanup async preprocessor if it was used
|
||||
if async_preprocessor:
|
||||
async_preprocessor.cleanup()
|
||||
logger.debug("Async preprocessor cleanup completed")
|
||||
|
||||
def main():
|
||||
"""Main entry point - wrapper for async main."""
|
||||
import asyncio
|
||||
return asyncio.run(main_async())
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = main()
|
||||
|
||||
Reference in New Issue
Block a user