914 lines
38 KiB
Python
914 lines
38 KiB
Python
"""
|
|
Mask processor module for applying green screen effects.
|
|
Handles applying masks to video frames to create green screen output.
|
|
"""
|
|
|
|
import os
|
|
import cv2
|
|
import numpy as np
|
|
import cupy as cp
|
|
import subprocess
|
|
import sys
|
|
import logging
|
|
from typing import Dict, List, Any, Optional, Tuple
|
|
from collections import deque
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MaskProcessor:
|
|
"""Handles mask application and green screen processing with quality enhancements."""
|
|
|
|
def __init__(self, green_color: List[int] = [0, 255, 0], blue_color: List[int] = [255, 0, 0],
|
|
mask_quality_config: Optional[Dict[str, Any]] = None,
|
|
output_mode: str = "green_screen"):
|
|
"""
|
|
Initialize mask processor with quality enhancement options.
|
|
|
|
Args:
|
|
green_color: RGB color for green screen background
|
|
blue_color: RGB color for second object (if needed)
|
|
mask_quality_config: Configuration dictionary for mask quality improvements
|
|
output_mode: Output mode - "green_screen" or "alpha_channel"
|
|
"""
|
|
self.green_color = green_color
|
|
self.blue_color = blue_color
|
|
self.output_mode = output_mode
|
|
self.use_gpu = self._check_gpu_availability()
|
|
|
|
# Mask quality configuration with defaults
|
|
if mask_quality_config is None:
|
|
mask_quality_config = {}
|
|
|
|
self.enable_edge_blur = mask_quality_config.get('enable_edge_blur', False)
|
|
self.edge_blur_radius = mask_quality_config.get('edge_blur_radius', 3)
|
|
self.edge_blur_sigma = mask_quality_config.get('edge_blur_sigma', 1.5)
|
|
|
|
self.enable_temporal_smoothing = mask_quality_config.get('enable_temporal_smoothing', False)
|
|
self.temporal_blend_weight = mask_quality_config.get('temporal_blend_weight', 0.3)
|
|
self.temporal_history_frames = mask_quality_config.get('temporal_history_frames', 3)
|
|
|
|
self.enable_morphological_cleaning = mask_quality_config.get('enable_morphological_cleaning', False)
|
|
self.morphology_kernel_size = mask_quality_config.get('morphology_kernel_size', 5)
|
|
self.min_component_size = mask_quality_config.get('min_component_size', 500)
|
|
|
|
self.alpha_blending_mode = mask_quality_config.get('alpha_blending_mode', 'gaussian')
|
|
self.alpha_transition_width = mask_quality_config.get('alpha_transition_width', 10)
|
|
|
|
self.enable_bilateral_filter = mask_quality_config.get('enable_bilateral_filter', False)
|
|
self.bilateral_d = mask_quality_config.get('bilateral_d', 9)
|
|
self.bilateral_sigma_color = mask_quality_config.get('bilateral_sigma_color', 75)
|
|
self.bilateral_sigma_space = mask_quality_config.get('bilateral_sigma_space', 75)
|
|
|
|
# Temporal history buffer for mask smoothing
|
|
self.mask_history = deque(maxlen=self.temporal_history_frames)
|
|
|
|
# Log configuration
|
|
if any([self.enable_edge_blur, self.enable_temporal_smoothing, self.enable_morphological_cleaning]):
|
|
logger.info("Mask quality enhancements enabled:")
|
|
if self.enable_edge_blur:
|
|
logger.info(f" Edge blur: radius={self.edge_blur_radius}, sigma={self.edge_blur_sigma}")
|
|
if self.enable_temporal_smoothing:
|
|
logger.info(f" Temporal smoothing: weight={self.temporal_blend_weight}, history={self.temporal_history_frames}")
|
|
if self.enable_morphological_cleaning:
|
|
logger.info(f" Morphological cleaning: kernel={self.morphology_kernel_size}, min_size={self.min_component_size}")
|
|
logger.info(f" Alpha blending: mode={self.alpha_blending_mode}, width={self.alpha_transition_width}")
|
|
else:
|
|
logger.info("Mask quality enhancements disabled - using standard binary masking")
|
|
|
|
logger.info(f"Output mode: {self.output_mode}")
|
|
|
|
def _check_gpu_availability(self) -> bool:
|
|
"""Check if CuPy GPU acceleration is available."""
|
|
try:
|
|
import cupy as cp
|
|
# Test GPU availability
|
|
test_array = cp.array([1, 2, 3])
|
|
_ = test_array * 2
|
|
logger.info("GPU acceleration available via CuPy")
|
|
return True
|
|
except Exception as e:
|
|
logger.warning(f"GPU acceleration not available, using CPU: {e}")
|
|
return False
|
|
|
|
def enhance_mask_quality(self, mask: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply all enabled mask quality enhancements.
|
|
|
|
Args:
|
|
mask: Input binary mask
|
|
|
|
Returns:
|
|
Enhanced mask with quality improvements applied
|
|
"""
|
|
enhanced_mask = mask.copy()
|
|
|
|
# 1. Morphological cleaning
|
|
if self.enable_morphological_cleaning:
|
|
enhanced_mask = self._clean_mask_morphologically(enhanced_mask)
|
|
|
|
# 2. Temporal smoothing
|
|
if self.enable_temporal_smoothing:
|
|
enhanced_mask = self._apply_temporal_smoothing(enhanced_mask)
|
|
|
|
# 3. Edge enhancement and blurring
|
|
if self.enable_edge_blur:
|
|
enhanced_mask = self._apply_edge_blur(enhanced_mask)
|
|
|
|
# 4. Bilateral filtering (if enabled)
|
|
if self.enable_bilateral_filter:
|
|
enhanced_mask = self._apply_bilateral_filter(enhanced_mask)
|
|
|
|
return enhanced_mask
|
|
|
|
def _clean_mask_morphologically(self, mask: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Clean mask using morphological operations to remove noise and small artifacts.
|
|
|
|
Args:
|
|
mask: Input binary mask
|
|
|
|
Returns:
|
|
Cleaned mask
|
|
"""
|
|
# Convert to uint8 for OpenCV operations
|
|
mask_uint8 = (mask * 255).astype(np.uint8)
|
|
|
|
# Create morphological kernel
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
|
|
(self.morphology_kernel_size, self.morphology_kernel_size))
|
|
|
|
# Opening operation (erosion followed by dilation) to remove small noise
|
|
cleaned = cv2.morphologyEx(mask_uint8, cv2.MORPH_OPEN, kernel)
|
|
|
|
# Closing operation (dilation followed by erosion) to fill small holes
|
|
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
|
|
|
|
# Remove small connected components
|
|
if self.min_component_size > 0:
|
|
cleaned = self._remove_small_components(cleaned)
|
|
|
|
return (cleaned / 255.0).astype(np.float32)
|
|
|
|
def _remove_small_components(self, mask: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Remove connected components smaller than minimum size.
|
|
|
|
Args:
|
|
mask: Input binary mask (uint8)
|
|
|
|
Returns:
|
|
Mask with small components removed
|
|
"""
|
|
# Find connected components
|
|
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, connectivity=8)
|
|
|
|
# Create output mask
|
|
output_mask = np.zeros_like(mask)
|
|
|
|
# Keep components larger than minimum size (skip background label 0)
|
|
for i in range(1, num_labels):
|
|
component_size = stats[i, cv2.CC_STAT_AREA]
|
|
if component_size >= self.min_component_size:
|
|
output_mask[labels == i] = 255
|
|
|
|
return output_mask
|
|
|
|
def _apply_temporal_smoothing(self, mask: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply temporal smoothing using mask history.
|
|
|
|
Args:
|
|
mask: Current frame mask
|
|
|
|
Returns:
|
|
Temporally smoothed mask
|
|
"""
|
|
if len(self.mask_history) == 0:
|
|
# First frame, no history to blend with
|
|
self.mask_history.append(mask.copy())
|
|
return mask
|
|
|
|
# Blend with previous frames using weighted average
|
|
smoothed_mask = mask.astype(np.float32)
|
|
total_weight = 1.0
|
|
|
|
for i, hist_mask in enumerate(reversed(self.mask_history)):
|
|
# Exponential decay: more recent frames have higher weight
|
|
frame_weight = self.temporal_blend_weight * (0.8 ** i)
|
|
smoothed_mask += hist_mask.astype(np.float32) * frame_weight
|
|
total_weight += frame_weight
|
|
|
|
# Normalize by total weight
|
|
smoothed_mask /= total_weight
|
|
|
|
# Update history
|
|
self.mask_history.append(mask.copy())
|
|
|
|
return smoothed_mask
|
|
|
|
def _apply_edge_blur(self, mask: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply Gaussian blur to mask edges for smooth transitions.
|
|
|
|
Args:
|
|
mask: Input mask
|
|
|
|
Returns:
|
|
Mask with blurred edges
|
|
"""
|
|
# Apply Gaussian blur
|
|
kernel_size = 2 * self.edge_blur_radius + 1
|
|
blurred_mask = cv2.GaussianBlur(mask.astype(np.float32),
|
|
(kernel_size, kernel_size),
|
|
self.edge_blur_sigma)
|
|
|
|
return blurred_mask
|
|
|
|
def _apply_bilateral_filter(self, mask: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply bilateral filtering for edge-preserving smoothing.
|
|
|
|
Args:
|
|
mask: Input mask
|
|
|
|
Returns:
|
|
Filtered mask
|
|
"""
|
|
# Convert to uint8 for bilateral filter
|
|
mask_uint8 = (mask * 255).astype(np.uint8)
|
|
|
|
# Apply bilateral filter
|
|
filtered = cv2.bilateralFilter(mask_uint8, self.bilateral_d,
|
|
self.bilateral_sigma_color,
|
|
self.bilateral_sigma_space)
|
|
|
|
return (filtered / 255.0).astype(np.float32)
|
|
|
|
def _create_alpha_mask(self, mask: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Create alpha mask with smooth transitions based on blending mode.
|
|
|
|
Args:
|
|
mask: Input binary/float mask
|
|
|
|
Returns:
|
|
Alpha mask with smooth transitions
|
|
"""
|
|
if self.alpha_blending_mode == "linear":
|
|
return mask
|
|
elif self.alpha_blending_mode == "gaussian":
|
|
# Use distance transform for smooth falloff
|
|
binary_mask = (mask > 0.5).astype(np.uint8)
|
|
|
|
# Distance transform from mask edges
|
|
dist_inside = cv2.distanceTransform(binary_mask, cv2.DIST_L2, 5)
|
|
dist_outside = cv2.distanceTransform(1 - binary_mask, cv2.DIST_L2, 5)
|
|
|
|
# Create smooth alpha based on distance
|
|
alpha = np.zeros_like(mask, dtype=np.float32)
|
|
transition_width = self.alpha_transition_width
|
|
|
|
# Inside mask: fade from edge
|
|
alpha[binary_mask > 0] = np.minimum(1.0, dist_inside[binary_mask > 0] / transition_width)
|
|
|
|
# Outside mask: fade to zero
|
|
alpha[binary_mask == 0] = np.maximum(0.0, 1.0 - dist_outside[binary_mask == 0] / transition_width)
|
|
|
|
return alpha
|
|
elif self.alpha_blending_mode == "sigmoid":
|
|
# Sigmoid-based smooth transition
|
|
return 1.0 / (1.0 + np.exp(-10 * (mask - 0.5)))
|
|
else:
|
|
return mask
|
|
|
|
def apply_green_mask(self, frame: np.ndarray, masks: List[np.ndarray]) -> np.ndarray:
|
|
"""
|
|
Apply green screen mask to a frame with quality enhancements.
|
|
|
|
Args:
|
|
frame: Input video frame (BGR format)
|
|
masks: List of object masks to apply
|
|
|
|
Returns:
|
|
Frame with green screen background and enhanced mask quality
|
|
"""
|
|
# Combine all masks into a single mask
|
|
combined_mask = self._combine_masks(masks)
|
|
|
|
# Apply quality enhancements
|
|
enhanced_mask = self.enhance_mask_quality(combined_mask)
|
|
|
|
# Create alpha mask for smooth blending
|
|
alpha_mask = self._create_alpha_mask(enhanced_mask)
|
|
|
|
# Apply mask using alpha blending
|
|
if self.use_gpu:
|
|
return self._apply_green_mask_gpu_enhanced(frame, alpha_mask)
|
|
else:
|
|
return self._apply_green_mask_cpu_enhanced(frame, alpha_mask)
|
|
|
|
def apply_mask_with_alpha(self, frame: np.ndarray, masks: List[np.ndarray]) -> np.ndarray:
|
|
"""
|
|
Apply mask to create RGBA frame with alpha channel.
|
|
|
|
Args:
|
|
frame: Input video frame (BGR format)
|
|
masks: List of object masks to apply
|
|
|
|
Returns:
|
|
RGBA frame with alpha channel
|
|
"""
|
|
# Combine all masks into a single mask
|
|
combined_mask = self._combine_masks(masks)
|
|
|
|
# Apply quality enhancements
|
|
enhanced_mask = self.enhance_mask_quality(combined_mask)
|
|
|
|
# Create alpha mask for smooth blending
|
|
alpha_mask = self._create_alpha_mask(enhanced_mask)
|
|
|
|
# Resize alpha mask to match frame if needed
|
|
if alpha_mask.shape != frame.shape[:2]:
|
|
alpha_mask = cv2.resize(alpha_mask, (frame.shape[1], frame.shape[0]))
|
|
|
|
# Convert BGR to BGRA
|
|
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
|
|
|
|
# Set alpha channel
|
|
bgra_frame[:, :, 3] = (alpha_mask * 255).astype(np.uint8)
|
|
|
|
return bgra_frame
|
|
|
|
def _combine_masks(self, masks: List[np.ndarray]) -> np.ndarray:
|
|
"""
|
|
Combine multiple object masks into a single mask.
|
|
|
|
Args:
|
|
masks: List of object masks
|
|
|
|
Returns:
|
|
Combined mask
|
|
"""
|
|
if not masks:
|
|
return np.zeros((0, 0), dtype=np.float32)
|
|
|
|
# Start with first mask
|
|
combined_mask = masks[0].squeeze().astype(np.float32)
|
|
|
|
# Combine with remaining masks using logical OR
|
|
for mask in masks[1:]:
|
|
mask_squeezed = mask.squeeze().astype(np.float32)
|
|
if mask_squeezed.shape != combined_mask.shape:
|
|
# Resize mask to match combined mask
|
|
mask_squeezed = cv2.resize(mask_squeezed,
|
|
(combined_mask.shape[1], combined_mask.shape[0]),
|
|
interpolation=cv2.INTER_NEAREST)
|
|
combined_mask = np.maximum(combined_mask, mask_squeezed)
|
|
|
|
return combined_mask
|
|
|
|
def reset_temporal_history(self):
|
|
"""Reset temporal history buffer. Call this when starting a new segment."""
|
|
self.mask_history.clear()
|
|
logger.debug("Temporal history buffer reset")
|
|
|
|
def _apply_green_mask_gpu_enhanced(self, frame: np.ndarray, alpha_mask: np.ndarray) -> np.ndarray:
|
|
"""GPU-accelerated green mask application with alpha blending using CuPy (Phase 1 optimized)."""
|
|
try:
|
|
# Convert to CuPy arrays with optimized data transfer
|
|
frame_gpu = cp.asarray(frame, dtype=cp.uint8)
|
|
alpha_gpu = cp.asarray(alpha_mask, dtype=cp.float32)
|
|
|
|
# Resize alpha mask to match frame if needed (vectorized operation)
|
|
if alpha_gpu.shape != frame_gpu.shape[:2]:
|
|
# Use CuPy's resize instead of OpenCV for GPU optimization
|
|
alpha_gpu = cp.array(cv2.resize(cp.asnumpy(alpha_gpu),
|
|
(frame_gpu.shape[1], frame_gpu.shape[0])))
|
|
|
|
# Create green background (optimized broadcasting)
|
|
green_color_gpu = cp.array(self.green_color, dtype=cp.uint8)
|
|
green_background = cp.broadcast_to(green_color_gpu, frame_gpu.shape)
|
|
|
|
# Apply vectorized alpha blending with optimized memory access
|
|
alpha_3d = cp.expand_dims(alpha_gpu, axis=2)
|
|
|
|
# Use more efficient computation with explicit typing
|
|
frame_float = frame_gpu.astype(cp.float32)
|
|
green_float = green_background.astype(cp.float32)
|
|
|
|
# Vectorized blending operation
|
|
result_frame = cp.clip(alpha_3d * frame_float + (1.0 - alpha_3d) * green_float, 0, 255)
|
|
|
|
return cp.asnumpy(result_frame.astype(cp.uint8))
|
|
|
|
except Exception as e:
|
|
logger.error(f"GPU enhanced processing failed, falling back to CPU: {e}")
|
|
return self._apply_green_mask_cpu_enhanced(frame, alpha_mask)
|
|
|
|
def _apply_green_mask_cpu_enhanced(self, frame: np.ndarray, alpha_mask: np.ndarray) -> np.ndarray:
|
|
"""CPU-based green mask application with alpha blending (Phase 1 optimized)."""
|
|
# Resize alpha mask to match frame if needed
|
|
if alpha_mask.shape != frame.shape[:2]:
|
|
alpha_mask = cv2.resize(alpha_mask, (frame.shape[1], frame.shape[0]))
|
|
|
|
# Create green background with broadcasting (more efficient)
|
|
green_color = np.array(self.green_color, dtype=np.uint8)
|
|
green_background = np.broadcast_to(green_color, frame.shape)
|
|
|
|
# Apply optimized alpha blending with explicit data types
|
|
alpha_3d = np.expand_dims(alpha_mask.astype(np.float32), axis=2)
|
|
|
|
# Vectorized blending with optimized memory access
|
|
frame_float = frame.astype(np.float32)
|
|
green_float = green_background.astype(np.float32)
|
|
|
|
result_frame = np.clip(alpha_3d * frame_float + (1.0 - alpha_3d) * green_float, 0, 255)
|
|
|
|
return result_frame.astype(np.uint8)
|
|
|
|
def apply_colored_mask(self, frame: np.ndarray, masks_a: List[np.ndarray],
|
|
masks_b: List[np.ndarray]) -> np.ndarray:
|
|
"""
|
|
Apply colored masks for visualization (green and blue).
|
|
|
|
Args:
|
|
frame: Input video frame
|
|
masks_a: Masks for object A (green)
|
|
masks_b: Masks for object B (blue)
|
|
|
|
Returns:
|
|
Frame with colored masks applied
|
|
"""
|
|
colored_mask = np.zeros_like(frame)
|
|
|
|
# Apply green color to masks_a
|
|
for mask in masks_a:
|
|
mask = mask.squeeze()
|
|
if mask.shape != frame.shape[:2]:
|
|
mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]),
|
|
interpolation=cv2.INTER_NEAREST)
|
|
colored_mask[mask > 0] = self.green_color
|
|
|
|
# Apply blue color to masks_b
|
|
for mask in masks_b:
|
|
mask = mask.squeeze()
|
|
if mask.shape != frame.shape[:2]:
|
|
mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]),
|
|
interpolation=cv2.INTER_NEAREST)
|
|
colored_mask[mask > 0] = self.blue_color
|
|
|
|
return colored_mask
|
|
|
|
|
|
|
|
def process_and_save_output_video(self, video_path: str, output_video_path: str,
|
|
video_segments: Dict[int, Dict[int, np.ndarray]],
|
|
use_nvenc: bool = False, bitrate: str = "50M",
|
|
batch_size: int = 16) -> bool:
|
|
"""
|
|
Process high-resolution frames, apply upscaled masks, and save the output video.
|
|
|
|
Args:
|
|
video_path: Path to input video
|
|
output_video_path: Path to save output video
|
|
video_segments: Dictionary of frame masks
|
|
use_nvenc: Whether to use NVIDIA hardware encoding
|
|
bitrate: Output video bitrate
|
|
batch_size: Number of frames to process in a single batch
|
|
|
|
Returns:
|
|
True if successful
|
|
"""
|
|
try:
|
|
cap = cv2.VideoCapture(video_path)
|
|
if not cap.isOpened():
|
|
logger.error(f"Could not open video: {video_path}")
|
|
return False
|
|
|
|
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames")
|
|
|
|
# Setup VideoWriter
|
|
out_writer = None
|
|
if self.output_mode == "alpha_channel":
|
|
success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
|
|
if not success:
|
|
logger.error("Failed to setup alpha channel encoder")
|
|
cap.release()
|
|
return False
|
|
use_nvenc = False
|
|
elif use_nvenc:
|
|
success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
|
|
if not success:
|
|
logger.warning("NVENC setup failed, falling back to OpenCV")
|
|
use_nvenc = False
|
|
|
|
if not use_nvenc and self.output_mode != "alpha_channel":
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
|
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
|
|
if not out_writer.isOpened():
|
|
logger.error("Failed to create output video writer")
|
|
cap.release()
|
|
return False
|
|
|
|
# Process frames in batches
|
|
frame_idx = 0
|
|
processed_frames = 0
|
|
|
|
while frame_idx < total_frames:
|
|
batch_frames = []
|
|
batch_masks = []
|
|
|
|
# Read a batch of frames
|
|
for _ in range(batch_size):
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
break
|
|
batch_frames.append(frame)
|
|
|
|
if not batch_frames:
|
|
break
|
|
|
|
# Get masks for the current batch and perform just-in-time upscaling
|
|
for i in range(len(batch_frames)):
|
|
current_frame_idx = frame_idx + i
|
|
if current_frame_idx in video_segments:
|
|
frame_masks = video_segments[current_frame_idx]
|
|
upscaled_masks = []
|
|
for obj_id, mask in frame_masks.items():
|
|
mask = mask.squeeze()
|
|
if mask.shape != (frame_height, frame_width):
|
|
upscaled_mask = cv2.resize(mask.astype(np.uint8),
|
|
(frame_width, frame_height),
|
|
interpolation=cv2.INTER_NEAREST)
|
|
upscaled_masks.append(upscaled_mask)
|
|
else:
|
|
upscaled_masks.append(mask.astype(np.uint8))
|
|
batch_masks.append(upscaled_masks)
|
|
else:
|
|
batch_masks.append([]) # No masks for this frame
|
|
|
|
# Process the batch
|
|
result_batch = []
|
|
for i, frame in enumerate(batch_frames):
|
|
masks = batch_masks[i]
|
|
if masks:
|
|
if self.output_mode == "alpha_channel":
|
|
result_frame = self.apply_mask_with_alpha(frame, masks)
|
|
else:
|
|
result_frame = self.apply_green_mask(frame, masks)
|
|
else:
|
|
# No mask for this frame
|
|
if self.output_mode == "alpha_channel":
|
|
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
|
|
bgra_frame[:, :, 3] = 0
|
|
result_frame = bgra_frame
|
|
else:
|
|
result_frame = frame
|
|
result_batch.append(result_frame)
|
|
|
|
# Write the processed batch
|
|
for result_frame in result_batch:
|
|
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
|
|
self.alpha_process.stdin.write(result_frame.tobytes())
|
|
elif use_nvenc and hasattr(self, 'nvenc_process'):
|
|
self.nvenc_process.stdin.write(result_frame.tobytes())
|
|
else:
|
|
out_writer.write(result_frame)
|
|
|
|
processed_frames += len(batch_frames)
|
|
frame_idx += len(batch_frames)
|
|
|
|
if processed_frames % 100 < batch_size:
|
|
logger.info(f"Processed {processed_frames}/{total_frames} frames")
|
|
|
|
# Cleanup
|
|
cap.release()
|
|
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
|
|
self.alpha_process.stdin.close()
|
|
self.alpha_process.wait()
|
|
elif use_nvenc and hasattr(self, 'nvenc_process'):
|
|
self.nvenc_process.stdin.close()
|
|
self.nvenc_process.wait()
|
|
else:
|
|
if out_writer:
|
|
out_writer.release()
|
|
|
|
logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing video: {e}", exc_info=True)
|
|
return False
|
|
|
|
def _setup_nvenc_encoder(self, output_path: str, width: int, height: int,
|
|
fps: float, bitrate: str) -> bool:
|
|
"""Setup NVENC hardware encoder using FFmpeg."""
|
|
try:
|
|
# Determine encoder based on platform
|
|
if sys.platform == 'darwin':
|
|
encoder = 'hevc_videotoolbox'
|
|
else:
|
|
encoder = 'hevc_nvenc'
|
|
|
|
command = [
|
|
'ffmpeg',
|
|
'-y', # Overwrite output file
|
|
'-f', 'rawvideo',
|
|
'-vcodec', 'rawvideo',
|
|
'-pix_fmt', 'bgr24',
|
|
'-s', f'{width}x{height}',
|
|
'-r', str(fps),
|
|
'-i', '-', # Input from stdin
|
|
'-an', # No audio (will be added later)
|
|
'-vcodec', encoder,
|
|
'-pix_fmt', 'yuv420p', # Changed from nv12 for better compatibility
|
|
'-preset', 'slow',
|
|
'-b:v', bitrate,
|
|
output_path
|
|
]
|
|
|
|
self.nvenc_process = subprocess.Popen(command, stdin=subprocess.PIPE,
|
|
stderr=subprocess.PIPE)
|
|
logger.info(f"Initialized {encoder} hardware encoder")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to setup NVENC encoder: {e}")
|
|
return False
|
|
|
|
def _setup_alpha_encoder(self, output_path: str, width: int, height: int,
|
|
fps: float, bitrate: str) -> bool:
|
|
"""Setup encoder for alpha channel video using FFmpeg with H.264/H.265."""
|
|
try:
|
|
# For VR180 SBS, we'll use H.265 (HEVC) with alpha channel
|
|
# Note: Standard H.264/H.265 don't support alpha directly,
|
|
# so we'll encode the alpha as a separate grayscale channel or use a special pixel format
|
|
|
|
# Determine encoder based on platform
|
|
if sys.platform == 'darwin':
|
|
encoder = 'hevc_videotoolbox'
|
|
else:
|
|
encoder = 'hevc_nvenc'
|
|
|
|
command = [
|
|
'ffmpeg',
|
|
'-y', # Overwrite output file
|
|
'-f', 'rawvideo',
|
|
'-vcodec', 'rawvideo',
|
|
'-pix_fmt', 'bgra', # BGRA for alpha channel
|
|
'-s', f'{width}x{height}',
|
|
'-r', str(fps),
|
|
'-i', '-', # Input from stdin
|
|
'-an', # No audio (will be added later)
|
|
'-c:v', encoder,
|
|
'-pix_fmt', 'yuv420p', # Standard pixel format
|
|
'-preset', 'slow',
|
|
'-b:v', bitrate,
|
|
'-tag:v', 'hvc1', # Required for some players
|
|
output_path
|
|
]
|
|
|
|
self.alpha_process = subprocess.Popen(command, stdin=subprocess.PIPE,
|
|
stderr=subprocess.PIPE)
|
|
self.alpha_output_path = output_path
|
|
logger.info(f"Initialized {encoder} for alpha channel output (will be encoded as transparency in RGB)")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to setup alpha encoder: {e}")
|
|
return False
|
|
|
|
def process_segment(self, segment_info: dict, video_segments: Dict[int, Dict[int, np.ndarray]],
|
|
use_nvenc: bool = False, bitrate: str = "50M") -> bool:
|
|
"""
|
|
Process a single segment and save the output video.
|
|
|
|
Args:
|
|
segment_info: Segment information dictionary
|
|
video_segments: Dictionary of frame masks from SAM2
|
|
use_nvenc: Whether to use hardware encoding
|
|
bitrate: Output video bitrate
|
|
|
|
Returns:
|
|
True if successful
|
|
"""
|
|
input_video = segment_info['video_file']
|
|
if self.output_mode == "alpha_channel":
|
|
output_video = os.path.join(segment_info['directory'], f"output_{segment_info['index']}.mov")
|
|
else:
|
|
output_video = os.path.join(segment_info['directory'], f"output_{segment_info['index']}.mp4")
|
|
|
|
logger.info(f"Processing segment {segment_info['index']} with {self.output_mode}")
|
|
|
|
success = self.process_and_save_output_video(
|
|
input_video,
|
|
output_video,
|
|
video_segments,
|
|
use_nvenc,
|
|
bitrate
|
|
)
|
|
|
|
if success:
|
|
logger.info(f"Successfully created {self.output_mode} video: {output_video}")
|
|
# Mark segment as completed only after video is successfully written
|
|
try:
|
|
output_done_file = os.path.join(segment_info['directory'], "output_frames_done")
|
|
with open(output_done_file, 'w') as f:
|
|
f.write(f"Segment {segment_info['index']} processed and saved successfully.")
|
|
logger.debug(f"Created completion marker for segment {segment_info['index']}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to create completion marker for segment {segment_info['index']}: {e}")
|
|
else:
|
|
logger.error(f"Failed to process segment {segment_info['index']}")
|
|
|
|
return success
|
|
|
|
def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int],
|
|
green_color: Optional[List[int]] = None) -> np.ndarray:
|
|
"""
|
|
Create a full greenscreen frame for fallback when no humans are detected.
|
|
|
|
Args:
|
|
frame_shape: Shape of the frame (height, width, channels)
|
|
green_color: RGB values for green screen color (uses default if None)
|
|
|
|
Returns:
|
|
Full greenscreen frame
|
|
"""
|
|
if green_color is None:
|
|
green_color = self.green_color
|
|
|
|
greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8)
|
|
logger.debug(f"Created full greenscreen frame with shape {frame_shape}")
|
|
return greenscreen_frame
|
|
|
|
def process_greenscreen_only_segment(self, segment_info: dict,
|
|
green_color: Optional[List[int]] = None,
|
|
use_nvenc: bool = False, bitrate: str = "50M") -> bool:
|
|
"""
|
|
Create a full greenscreen segment when no humans are detected.
|
|
Used as fallback in separate eye processing mode.
|
|
|
|
Args:
|
|
segment_info: Segment information dictionary
|
|
green_color: RGB values for green screen color (uses default if None)
|
|
use_nvenc: Whether to use hardware encoding
|
|
bitrate: Output video bitrate
|
|
|
|
Returns:
|
|
True if greenscreen segment was created successfully
|
|
"""
|
|
segment_dir = segment_info['directory']
|
|
video_path = segment_info['video_file']
|
|
segment_idx = segment_info['index']
|
|
|
|
logger.info(f"Creating full greenscreen segment {segment_idx} (no humans detected)")
|
|
|
|
try:
|
|
# Get video properties
|
|
cap = cv2.VideoCapture(video_path)
|
|
if not cap.isOpened():
|
|
logger.error(f"Could not open video: {video_path}")
|
|
return False
|
|
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
|
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
cap.release()
|
|
|
|
# Create output video path
|
|
if self.output_mode == "alpha_channel":
|
|
output_video_path = os.path.join(segment_dir, f"output_{segment_idx}.mov")
|
|
else:
|
|
output_video_path = os.path.join(segment_dir, f"output_{segment_idx}.mp4")
|
|
|
|
# Create greenscreen frame
|
|
if green_color is None:
|
|
green_color = self.green_color
|
|
|
|
greenscreen_frame = self.create_full_greenscreen_frame(
|
|
(height, width, 3), green_color
|
|
)
|
|
|
|
# Setup video writer based on mode and hardware encoding preference
|
|
if use_nvenc:
|
|
success = self._write_greenscreen_with_nvenc(
|
|
output_video_path, greenscreen_frame, frame_count, fps, bitrate
|
|
)
|
|
else:
|
|
success = self._write_greenscreen_with_opencv(
|
|
output_video_path, greenscreen_frame, frame_count, fps
|
|
)
|
|
|
|
if not success:
|
|
logger.error(f"Failed to write greenscreen video for segment {segment_idx}")
|
|
return False
|
|
|
|
# Create empty mask file (black mask since no humans detected)
|
|
mask_output_path = os.path.join(segment_dir, "mask.png")
|
|
black_mask = np.zeros((height, width, 3), dtype=np.uint8)
|
|
cv2.imwrite(mask_output_path, black_mask)
|
|
|
|
# Mark segment as completed
|
|
output_done_file = os.path.join(segment_dir, "output_frames_done")
|
|
with open(output_done_file, 'w') as f:
|
|
f.write(f"Greenscreen segment {segment_idx} completed successfully\n")
|
|
|
|
logger.info(f"Successfully created greenscreen segment {segment_idx}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating greenscreen segment {segment_idx}: {e}")
|
|
return False
|
|
|
|
def _write_greenscreen_with_opencv(self, output_path: str, greenscreen_frame: np.ndarray,
|
|
frame_count: int, fps: float) -> bool:
|
|
"""Write greenscreen video using OpenCV VideoWriter."""
|
|
try:
|
|
if self.output_mode == "alpha_channel":
|
|
# For alpha channel mode, create fully transparent frames
|
|
bgra_frame = cv2.cvtColor(greenscreen_frame, cv2.COLOR_BGR2BGRA)
|
|
bgra_frame[:, :, 3] = 0 # Fully transparent
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
|
out = cv2.VideoWriter(output_path, fourcc, fps,
|
|
(greenscreen_frame.shape[1], greenscreen_frame.shape[0]), True)
|
|
frame_to_write = bgra_frame[:, :, :3] # OpenCV expects BGR for mp4v
|
|
else:
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
|
out = cv2.VideoWriter(output_path, fourcc, fps,
|
|
(greenscreen_frame.shape[1], greenscreen_frame.shape[0]))
|
|
frame_to_write = greenscreen_frame
|
|
|
|
if not out.isOpened():
|
|
logger.error(f"Failed to open video writer for {output_path}")
|
|
return False
|
|
|
|
# Write identical greenscreen frames
|
|
for _ in range(frame_count):
|
|
out.write(frame_to_write)
|
|
|
|
out.release()
|
|
logger.debug(f"Wrote {frame_count} greenscreen frames using OpenCV")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error writing greenscreen with OpenCV: {e}")
|
|
return False
|
|
|
|
def _write_greenscreen_with_nvenc(self, output_path: str, greenscreen_frame: np.ndarray,
|
|
frame_count: int, fps: float, bitrate: str) -> bool:
|
|
"""Write greenscreen video using NVENC hardware encoding."""
|
|
try:
|
|
# Setup NVENC encoder
|
|
if not self._setup_nvenc_encoder(output_path,
|
|
greenscreen_frame.shape[1],
|
|
greenscreen_frame.shape[0],
|
|
fps, bitrate):
|
|
logger.warning("NVENC setup failed for greenscreen, falling back to OpenCV")
|
|
return self._write_greenscreen_with_opencv(output_path, greenscreen_frame, frame_count, fps)
|
|
|
|
# Write identical greenscreen frames
|
|
for _ in range(frame_count):
|
|
self.nvenc_process.stdin.write(greenscreen_frame.tobytes())
|
|
|
|
# Finalize encoding
|
|
self.nvenc_process.stdin.close()
|
|
self.nvenc_process.wait()
|
|
|
|
if self.nvenc_process.returncode != 0:
|
|
logger.error("NVENC encoding failed for greenscreen")
|
|
return False
|
|
|
|
logger.debug(f"Wrote {frame_count} greenscreen frames using NVENC")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error writing greenscreen with NVENC: {e}")
|
|
return False
|
|
|
|
def has_valid_masks(self, video_segments: Optional[Dict[int, Dict[int, np.ndarray]]]) -> bool:
|
|
"""
|
|
Check if video segments contain valid masks.
|
|
|
|
Args:
|
|
video_segments: Video segments dictionary from SAM2
|
|
|
|
Returns:
|
|
True if valid masks are found
|
|
"""
|
|
if not video_segments:
|
|
return False
|
|
|
|
# Check if any frame has non-empty masks
|
|
for frame_idx, frame_masks in video_segments.items():
|
|
for obj_id, mask in frame_masks.items():
|
|
if mask is not None and np.any(mask):
|
|
return True
|
|
|
|
return False |