""" Mask processor module for applying green screen effects. Handles applying masks to video frames to create green screen output. """ import os import cv2 import numpy as np import cupy as cp import subprocess import sys import logging from typing import Dict, List, Any, Optional, Tuple from collections import deque logger = logging.getLogger(__name__) class MaskProcessor: """Handles mask application and green screen processing with quality enhancements.""" def __init__(self, green_color: List[int] = [0, 255, 0], blue_color: List[int] = [255, 0, 0], mask_quality_config: Optional[Dict[str, Any]] = None, output_mode: str = "green_screen"): """ Initialize mask processor with quality enhancement options. Args: green_color: RGB color for green screen background blue_color: RGB color for second object (if needed) mask_quality_config: Configuration dictionary for mask quality improvements output_mode: Output mode - "green_screen" or "alpha_channel" """ self.green_color = green_color self.blue_color = blue_color self.output_mode = output_mode self.use_gpu = self._check_gpu_availability() # Mask quality configuration with defaults if mask_quality_config is None: mask_quality_config = {} self.enable_edge_blur = mask_quality_config.get('enable_edge_blur', False) self.edge_blur_radius = mask_quality_config.get('edge_blur_radius', 3) self.edge_blur_sigma = mask_quality_config.get('edge_blur_sigma', 1.5) self.enable_temporal_smoothing = mask_quality_config.get('enable_temporal_smoothing', False) self.temporal_blend_weight = mask_quality_config.get('temporal_blend_weight', 0.3) self.temporal_history_frames = mask_quality_config.get('temporal_history_frames', 3) self.enable_morphological_cleaning = mask_quality_config.get('enable_morphological_cleaning', False) self.morphology_kernel_size = mask_quality_config.get('morphology_kernel_size', 5) self.min_component_size = mask_quality_config.get('min_component_size', 500) self.alpha_blending_mode = mask_quality_config.get('alpha_blending_mode', 'gaussian') self.alpha_transition_width = mask_quality_config.get('alpha_transition_width', 10) self.enable_bilateral_filter = mask_quality_config.get('enable_bilateral_filter', False) self.bilateral_d = mask_quality_config.get('bilateral_d', 9) self.bilateral_sigma_color = mask_quality_config.get('bilateral_sigma_color', 75) self.bilateral_sigma_space = mask_quality_config.get('bilateral_sigma_space', 75) # Temporal history buffer for mask smoothing self.mask_history = deque(maxlen=self.temporal_history_frames) # Log configuration if any([self.enable_edge_blur, self.enable_temporal_smoothing, self.enable_morphological_cleaning]): logger.info("Mask quality enhancements enabled:") if self.enable_edge_blur: logger.info(f" Edge blur: radius={self.edge_blur_radius}, sigma={self.edge_blur_sigma}") if self.enable_temporal_smoothing: logger.info(f" Temporal smoothing: weight={self.temporal_blend_weight}, history={self.temporal_history_frames}") if self.enable_morphological_cleaning: logger.info(f" Morphological cleaning: kernel={self.morphology_kernel_size}, min_size={self.min_component_size}") logger.info(f" Alpha blending: mode={self.alpha_blending_mode}, width={self.alpha_transition_width}") else: logger.info("Mask quality enhancements disabled - using standard binary masking") logger.info(f"Output mode: {self.output_mode}") def _check_gpu_availability(self) -> bool: """Check if CuPy GPU acceleration is available.""" try: import cupy as cp # Test GPU availability test_array = cp.array([1, 2, 3]) _ = test_array * 2 logger.info("GPU acceleration available via CuPy") return True except Exception as e: logger.warning(f"GPU acceleration not available, using CPU: {e}") return False def enhance_mask_quality(self, mask: np.ndarray) -> np.ndarray: """ Apply all enabled mask quality enhancements. Args: mask: Input binary mask Returns: Enhanced mask with quality improvements applied """ enhanced_mask = mask.copy() # 1. Morphological cleaning if self.enable_morphological_cleaning: enhanced_mask = self._clean_mask_morphologically(enhanced_mask) # 2. Temporal smoothing if self.enable_temporal_smoothing: enhanced_mask = self._apply_temporal_smoothing(enhanced_mask) # 3. Edge enhancement and blurring if self.enable_edge_blur: enhanced_mask = self._apply_edge_blur(enhanced_mask) # 4. Bilateral filtering (if enabled) if self.enable_bilateral_filter: enhanced_mask = self._apply_bilateral_filter(enhanced_mask) return enhanced_mask def _clean_mask_morphologically(self, mask: np.ndarray) -> np.ndarray: """ Clean mask using morphological operations to remove noise and small artifacts. Args: mask: Input binary mask Returns: Cleaned mask """ # Convert to uint8 for OpenCV operations mask_uint8 = (mask * 255).astype(np.uint8) # Create morphological kernel kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (self.morphology_kernel_size, self.morphology_kernel_size)) # Opening operation (erosion followed by dilation) to remove small noise cleaned = cv2.morphologyEx(mask_uint8, cv2.MORPH_OPEN, kernel) # Closing operation (dilation followed by erosion) to fill small holes cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel) # Remove small connected components if self.min_component_size > 0: cleaned = self._remove_small_components(cleaned) return (cleaned / 255.0).astype(np.float32) def _remove_small_components(self, mask: np.ndarray) -> np.ndarray: """ Remove connected components smaller than minimum size. Args: mask: Input binary mask (uint8) Returns: Mask with small components removed """ # Find connected components num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, connectivity=8) # Create output mask output_mask = np.zeros_like(mask) # Keep components larger than minimum size (skip background label 0) for i in range(1, num_labels): component_size = stats[i, cv2.CC_STAT_AREA] if component_size >= self.min_component_size: output_mask[labels == i] = 255 return output_mask def _apply_temporal_smoothing(self, mask: np.ndarray) -> np.ndarray: """ Apply temporal smoothing using mask history. Args: mask: Current frame mask Returns: Temporally smoothed mask """ if len(self.mask_history) == 0: # First frame, no history to blend with self.mask_history.append(mask.copy()) return mask # Blend with previous frames using weighted average smoothed_mask = mask.astype(np.float32) total_weight = 1.0 for i, hist_mask in enumerate(reversed(self.mask_history)): # Exponential decay: more recent frames have higher weight frame_weight = self.temporal_blend_weight * (0.8 ** i) smoothed_mask += hist_mask.astype(np.float32) * frame_weight total_weight += frame_weight # Normalize by total weight smoothed_mask /= total_weight # Update history self.mask_history.append(mask.copy()) return smoothed_mask def _apply_edge_blur(self, mask: np.ndarray) -> np.ndarray: """ Apply Gaussian blur to mask edges for smooth transitions. Args: mask: Input mask Returns: Mask with blurred edges """ # Apply Gaussian blur kernel_size = 2 * self.edge_blur_radius + 1 blurred_mask = cv2.GaussianBlur(mask.astype(np.float32), (kernel_size, kernel_size), self.edge_blur_sigma) return blurred_mask def _apply_bilateral_filter(self, mask: np.ndarray) -> np.ndarray: """ Apply bilateral filtering for edge-preserving smoothing. Args: mask: Input mask Returns: Filtered mask """ # Convert to uint8 for bilateral filter mask_uint8 = (mask * 255).astype(np.uint8) # Apply bilateral filter filtered = cv2.bilateralFilter(mask_uint8, self.bilateral_d, self.bilateral_sigma_color, self.bilateral_sigma_space) return (filtered / 255.0).astype(np.float32) def _create_alpha_mask(self, mask: np.ndarray) -> np.ndarray: """ Create alpha mask with smooth transitions based on blending mode. Args: mask: Input binary/float mask Returns: Alpha mask with smooth transitions """ if self.alpha_blending_mode == "linear": return mask elif self.alpha_blending_mode == "gaussian": # Use distance transform for smooth falloff binary_mask = (mask > 0.5).astype(np.uint8) # Distance transform from mask edges dist_inside = cv2.distanceTransform(binary_mask, cv2.DIST_L2, 5) dist_outside = cv2.distanceTransform(1 - binary_mask, cv2.DIST_L2, 5) # Create smooth alpha based on distance alpha = np.zeros_like(mask, dtype=np.float32) transition_width = self.alpha_transition_width # Inside mask: fade from edge alpha[binary_mask > 0] = np.minimum(1.0, dist_inside[binary_mask > 0] / transition_width) # Outside mask: fade to zero alpha[binary_mask == 0] = np.maximum(0.0, 1.0 - dist_outside[binary_mask == 0] / transition_width) return alpha elif self.alpha_blending_mode == "sigmoid": # Sigmoid-based smooth transition return 1.0 / (1.0 + np.exp(-10 * (mask - 0.5))) else: return mask def apply_green_mask(self, frame: np.ndarray, masks: List[np.ndarray]) -> np.ndarray: """ Apply green screen mask to a frame with quality enhancements. Args: frame: Input video frame (BGR format) masks: List of object masks to apply Returns: Frame with green screen background and enhanced mask quality """ # Combine all masks into a single mask combined_mask = self._combine_masks(masks) # Apply quality enhancements enhanced_mask = self.enhance_mask_quality(combined_mask) # Create alpha mask for smooth blending alpha_mask = self._create_alpha_mask(enhanced_mask) # Apply mask using alpha blending if self.use_gpu: return self._apply_green_mask_gpu_enhanced(frame, alpha_mask) else: return self._apply_green_mask_cpu_enhanced(frame, alpha_mask) def apply_mask_with_alpha(self, frame: np.ndarray, masks: List[np.ndarray]) -> np.ndarray: """ Apply mask to create RGBA frame with alpha channel. Args: frame: Input video frame (BGR format) masks: List of object masks to apply Returns: RGBA frame with alpha channel """ # Combine all masks into a single mask combined_mask = self._combine_masks(masks) # Apply quality enhancements enhanced_mask = self.enhance_mask_quality(combined_mask) # Create alpha mask for smooth blending alpha_mask = self._create_alpha_mask(enhanced_mask) # Resize alpha mask to match frame if needed if alpha_mask.shape != frame.shape[:2]: alpha_mask = cv2.resize(alpha_mask, (frame.shape[1], frame.shape[0])) # Convert BGR to BGRA bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) # Set alpha channel bgra_frame[:, :, 3] = (alpha_mask * 255).astype(np.uint8) return bgra_frame def _combine_masks(self, masks: List[np.ndarray]) -> np.ndarray: """ Combine multiple object masks into a single mask. Args: masks: List of object masks Returns: Combined mask """ if not masks: return np.zeros((0, 0), dtype=np.float32) # Start with first mask combined_mask = masks[0].squeeze().astype(np.float32) # Combine with remaining masks using logical OR for mask in masks[1:]: mask_squeezed = mask.squeeze().astype(np.float32) if mask_squeezed.shape != combined_mask.shape: # Resize mask to match combined mask mask_squeezed = cv2.resize(mask_squeezed, (combined_mask.shape[1], combined_mask.shape[0]), interpolation=cv2.INTER_NEAREST) combined_mask = np.maximum(combined_mask, mask_squeezed) return combined_mask def reset_temporal_history(self): """Reset temporal history buffer. Call this when starting a new segment.""" self.mask_history.clear() logger.debug("Temporal history buffer reset") def _apply_green_mask_gpu_enhanced(self, frame: np.ndarray, alpha_mask: np.ndarray) -> np.ndarray: """GPU-accelerated green mask application with alpha blending using CuPy (Phase 1 optimized).""" try: # Convert to CuPy arrays with optimized data transfer frame_gpu = cp.asarray(frame, dtype=cp.uint8) alpha_gpu = cp.asarray(alpha_mask, dtype=cp.float32) # Resize alpha mask to match frame if needed (vectorized operation) if alpha_gpu.shape != frame_gpu.shape[:2]: # Use CuPy's resize instead of OpenCV for GPU optimization alpha_gpu = cp.array(cv2.resize(cp.asnumpy(alpha_gpu), (frame_gpu.shape[1], frame_gpu.shape[0]))) # Create green background (optimized broadcasting) green_color_gpu = cp.array(self.green_color, dtype=cp.uint8) green_background = cp.broadcast_to(green_color_gpu, frame_gpu.shape) # Apply vectorized alpha blending with optimized memory access alpha_3d = cp.expand_dims(alpha_gpu, axis=2) # Use more efficient computation with explicit typing frame_float = frame_gpu.astype(cp.float32) green_float = green_background.astype(cp.float32) # Vectorized blending operation result_frame = cp.clip(alpha_3d * frame_float + (1.0 - alpha_3d) * green_float, 0, 255) return cp.asnumpy(result_frame.astype(cp.uint8)) except Exception as e: logger.error(f"GPU enhanced processing failed, falling back to CPU: {e}") return self._apply_green_mask_cpu_enhanced(frame, alpha_mask) def _apply_green_mask_cpu_enhanced(self, frame: np.ndarray, alpha_mask: np.ndarray) -> np.ndarray: """CPU-based green mask application with alpha blending (Phase 1 optimized).""" # Resize alpha mask to match frame if needed if alpha_mask.shape != frame.shape[:2]: alpha_mask = cv2.resize(alpha_mask, (frame.shape[1], frame.shape[0])) # Create green background with broadcasting (more efficient) green_color = np.array(self.green_color, dtype=np.uint8) green_background = np.broadcast_to(green_color, frame.shape) # Apply optimized alpha blending with explicit data types alpha_3d = np.expand_dims(alpha_mask.astype(np.float32), axis=2) # Vectorized blending with optimized memory access frame_float = frame.astype(np.float32) green_float = green_background.astype(np.float32) result_frame = np.clip(alpha_3d * frame_float + (1.0 - alpha_3d) * green_float, 0, 255) return result_frame.astype(np.uint8) def apply_colored_mask(self, frame: np.ndarray, masks_a: List[np.ndarray], masks_b: List[np.ndarray]) -> np.ndarray: """ Apply colored masks for visualization (green and blue). Args: frame: Input video frame masks_a: Masks for object A (green) masks_b: Masks for object B (blue) Returns: Frame with colored masks applied """ colored_mask = np.zeros_like(frame) # Apply green color to masks_a for mask in masks_a: mask = mask.squeeze() if mask.shape != frame.shape[:2]: mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST) colored_mask[mask > 0] = self.green_color # Apply blue color to masks_b for mask in masks_b: mask = mask.squeeze() if mask.shape != frame.shape[:2]: mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST) colored_mask[mask > 0] = self.blue_color return colored_mask def process_and_save_output_video(self, video_path: str, output_video_path: str, video_segments: Dict[int, Dict[int, np.ndarray]], use_nvenc: bool = False, bitrate: str = "50M", batch_size: int = 16) -> bool: """ Process high-resolution frames, apply upscaled masks, and save the output video. Args: video_path: Path to input video output_video_path: Path to save output video video_segments: Dictionary of frame masks use_nvenc: Whether to use NVIDIA hardware encoding bitrate: Output video bitrate batch_size: Number of frames to process in a single batch Returns: True if successful """ try: cap = cv2.VideoCapture(video_path) if not cap.isOpened(): logger.error(f"Could not open video: {video_path}") return False frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames") # Setup VideoWriter out_writer = None if self.output_mode == "alpha_channel": success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate) if not success: logger.error("Failed to setup alpha channel encoder") cap.release() return False use_nvenc = False elif use_nvenc: success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate) if not success: logger.warning("NVENC setup failed, falling back to OpenCV") use_nvenc = False if not use_nvenc and self.output_mode != "alpha_channel": fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height)) if not out_writer.isOpened(): logger.error("Failed to create output video writer") cap.release() return False # Process frames in batches frame_idx = 0 processed_frames = 0 while frame_idx < total_frames: batch_frames = [] batch_masks = [] # Read a batch of frames for _ in range(batch_size): ret, frame = cap.read() if not ret: break batch_frames.append(frame) if not batch_frames: break # Get masks for the current batch and perform just-in-time upscaling for i in range(len(batch_frames)): current_frame_idx = frame_idx + i if current_frame_idx in video_segments: frame_masks = video_segments[current_frame_idx] upscaled_masks = [] for obj_id, mask in frame_masks.items(): mask = mask.squeeze() if mask.shape != (frame_height, frame_width): upscaled_mask = cv2.resize(mask.astype(np.uint8), (frame_width, frame_height), interpolation=cv2.INTER_NEAREST) upscaled_masks.append(upscaled_mask) else: upscaled_masks.append(mask.astype(np.uint8)) batch_masks.append(upscaled_masks) else: batch_masks.append([]) # No masks for this frame # Process the batch result_batch = [] for i, frame in enumerate(batch_frames): masks = batch_masks[i] if masks: if self.output_mode == "alpha_channel": result_frame = self.apply_mask_with_alpha(frame, masks) else: result_frame = self.apply_green_mask(frame, masks) else: # No mask for this frame if self.output_mode == "alpha_channel": bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) bgra_frame[:, :, 3] = 0 result_frame = bgra_frame else: result_frame = frame result_batch.append(result_frame) # Write the processed batch for result_frame in result_batch: if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'): self.alpha_process.stdin.write(result_frame.tobytes()) elif use_nvenc and hasattr(self, 'nvenc_process'): self.nvenc_process.stdin.write(result_frame.tobytes()) else: out_writer.write(result_frame) processed_frames += len(batch_frames) frame_idx += len(batch_frames) if processed_frames % 100 < batch_size: logger.info(f"Processed {processed_frames}/{total_frames} frames") # Cleanup cap.release() if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'): self.alpha_process.stdin.close() self.alpha_process.wait() elif use_nvenc and hasattr(self, 'nvenc_process'): self.nvenc_process.stdin.close() self.nvenc_process.wait() else: if out_writer: out_writer.release() logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}") return True except Exception as e: logger.error(f"Error processing video: {e}", exc_info=True) return False def _setup_nvenc_encoder(self, output_path: str, width: int, height: int, fps: float, bitrate: str) -> bool: """Setup NVENC hardware encoder using FFmpeg.""" try: # Determine encoder based on platform if sys.platform == 'darwin': encoder = 'hevc_videotoolbox' else: encoder = 'hevc_nvenc' command = [ 'ffmpeg', '-y', # Overwrite output file '-f', 'rawvideo', '-vcodec', 'rawvideo', '-pix_fmt', 'bgr24', '-s', f'{width}x{height}', '-r', str(fps), '-i', '-', # Input from stdin '-an', # No audio (will be added later) '-vcodec', encoder, '-pix_fmt', 'yuv420p', # Changed from nv12 for better compatibility '-preset', 'slow', '-b:v', bitrate, output_path ] self.nvenc_process = subprocess.Popen(command, stdin=subprocess.PIPE, stderr=subprocess.PIPE) logger.info(f"Initialized {encoder} hardware encoder") return True except Exception as e: logger.error(f"Failed to setup NVENC encoder: {e}") return False def _setup_alpha_encoder(self, output_path: str, width: int, height: int, fps: float, bitrate: str) -> bool: """Setup encoder for alpha channel video using FFmpeg with H.264/H.265.""" try: # For VR180 SBS, we'll use H.265 (HEVC) with alpha channel # Note: Standard H.264/H.265 don't support alpha directly, # so we'll encode the alpha as a separate grayscale channel or use a special pixel format # Determine encoder based on platform if sys.platform == 'darwin': encoder = 'hevc_videotoolbox' else: encoder = 'hevc_nvenc' command = [ 'ffmpeg', '-y', # Overwrite output file '-f', 'rawvideo', '-vcodec', 'rawvideo', '-pix_fmt', 'bgra', # BGRA for alpha channel '-s', f'{width}x{height}', '-r', str(fps), '-i', '-', # Input from stdin '-an', # No audio (will be added later) '-c:v', encoder, '-pix_fmt', 'yuv420p', # Standard pixel format '-preset', 'slow', '-b:v', bitrate, '-tag:v', 'hvc1', # Required for some players output_path ] self.alpha_process = subprocess.Popen(command, stdin=subprocess.PIPE, stderr=subprocess.PIPE) self.alpha_output_path = output_path logger.info(f"Initialized {encoder} for alpha channel output (will be encoded as transparency in RGB)") return True except Exception as e: logger.error(f"Failed to setup alpha encoder: {e}") return False def process_segment(self, segment_info: dict, video_segments: Dict[int, Dict[int, np.ndarray]], use_nvenc: bool = False, bitrate: str = "50M") -> bool: """ Process a single segment and save the output video. Args: segment_info: Segment information dictionary video_segments: Dictionary of frame masks from SAM2 use_nvenc: Whether to use hardware encoding bitrate: Output video bitrate Returns: True if successful """ input_video = segment_info['video_file'] if self.output_mode == "alpha_channel": output_video = os.path.join(segment_info['directory'], f"output_{segment_info['index']}.mov") else: output_video = os.path.join(segment_info['directory'], f"output_{segment_info['index']}.mp4") logger.info(f"Processing segment {segment_info['index']} with {self.output_mode}") success = self.process_and_save_output_video( input_video, output_video, video_segments, use_nvenc, bitrate ) if success: logger.info(f"Successfully created {self.output_mode} video: {output_video}") # Mark segment as completed only after video is successfully written try: output_done_file = os.path.join(segment_info['directory'], "output_frames_done") with open(output_done_file, 'w') as f: f.write(f"Segment {segment_info['index']} processed and saved successfully.") logger.debug(f"Created completion marker for segment {segment_info['index']}") except Exception as e: logger.error(f"Failed to create completion marker for segment {segment_info['index']}: {e}") else: logger.error(f"Failed to process segment {segment_info['index']}") return success def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int], green_color: Optional[List[int]] = None) -> np.ndarray: """ Create a full greenscreen frame for fallback when no humans are detected. Args: frame_shape: Shape of the frame (height, width, channels) green_color: RGB values for green screen color (uses default if None) Returns: Full greenscreen frame """ if green_color is None: green_color = self.green_color greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8) logger.debug(f"Created full greenscreen frame with shape {frame_shape}") return greenscreen_frame def process_greenscreen_only_segment(self, segment_info: dict, green_color: Optional[List[int]] = None, use_nvenc: bool = False, bitrate: str = "50M") -> bool: """ Create a full greenscreen segment when no humans are detected. Used as fallback in separate eye processing mode. Args: segment_info: Segment information dictionary green_color: RGB values for green screen color (uses default if None) use_nvenc: Whether to use hardware encoding bitrate: Output video bitrate Returns: True if greenscreen segment was created successfully """ segment_dir = segment_info['directory'] video_path = segment_info['video_file'] segment_idx = segment_info['index'] logger.info(f"Creating full greenscreen segment {segment_idx} (no humans detected)") try: # Get video properties cap = cv2.VideoCapture(video_path) if not cap.isOpened(): logger.error(f"Could not open video: {video_path}") return False width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) cap.release() # Create output video path if self.output_mode == "alpha_channel": output_video_path = os.path.join(segment_dir, f"output_{segment_idx}.mov") else: output_video_path = os.path.join(segment_dir, f"output_{segment_idx}.mp4") # Create greenscreen frame if green_color is None: green_color = self.green_color greenscreen_frame = self.create_full_greenscreen_frame( (height, width, 3), green_color ) # Setup video writer based on mode and hardware encoding preference if use_nvenc: success = self._write_greenscreen_with_nvenc( output_video_path, greenscreen_frame, frame_count, fps, bitrate ) else: success = self._write_greenscreen_with_opencv( output_video_path, greenscreen_frame, frame_count, fps ) if not success: logger.error(f"Failed to write greenscreen video for segment {segment_idx}") return False # Create empty mask file (black mask since no humans detected) mask_output_path = os.path.join(segment_dir, "mask.png") black_mask = np.zeros((height, width, 3), dtype=np.uint8) cv2.imwrite(mask_output_path, black_mask) # Mark segment as completed output_done_file = os.path.join(segment_dir, "output_frames_done") with open(output_done_file, 'w') as f: f.write(f"Greenscreen segment {segment_idx} completed successfully\n") logger.info(f"Successfully created greenscreen segment {segment_idx}") return True except Exception as e: logger.error(f"Error creating greenscreen segment {segment_idx}: {e}") return False def _write_greenscreen_with_opencv(self, output_path: str, greenscreen_frame: np.ndarray, frame_count: int, fps: float) -> bool: """Write greenscreen video using OpenCV VideoWriter.""" try: if self.output_mode == "alpha_channel": # For alpha channel mode, create fully transparent frames bgra_frame = cv2.cvtColor(greenscreen_frame, cv2.COLOR_BGR2BGRA) bgra_frame[:, :, 3] = 0 # Fully transparent fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (greenscreen_frame.shape[1], greenscreen_frame.shape[0]), True) frame_to_write = bgra_frame[:, :, :3] # OpenCV expects BGR for mp4v else: fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (greenscreen_frame.shape[1], greenscreen_frame.shape[0])) frame_to_write = greenscreen_frame if not out.isOpened(): logger.error(f"Failed to open video writer for {output_path}") return False # Write identical greenscreen frames for _ in range(frame_count): out.write(frame_to_write) out.release() logger.debug(f"Wrote {frame_count} greenscreen frames using OpenCV") return True except Exception as e: logger.error(f"Error writing greenscreen with OpenCV: {e}") return False def _write_greenscreen_with_nvenc(self, output_path: str, greenscreen_frame: np.ndarray, frame_count: int, fps: float, bitrate: str) -> bool: """Write greenscreen video using NVENC hardware encoding.""" try: # Setup NVENC encoder if not self._setup_nvenc_encoder(output_path, greenscreen_frame.shape[1], greenscreen_frame.shape[0], fps, bitrate): logger.warning("NVENC setup failed for greenscreen, falling back to OpenCV") return self._write_greenscreen_with_opencv(output_path, greenscreen_frame, frame_count, fps) # Write identical greenscreen frames for _ in range(frame_count): self.nvenc_process.stdin.write(greenscreen_frame.tobytes()) # Finalize encoding self.nvenc_process.stdin.close() self.nvenc_process.wait() if self.nvenc_process.returncode != 0: logger.error("NVENC encoding failed for greenscreen") return False logger.debug(f"Wrote {frame_count} greenscreen frames using NVENC") return True except Exception as e: logger.error(f"Error writing greenscreen with NVENC: {e}") return False def has_valid_masks(self, video_segments: Optional[Dict[int, Dict[int, np.ndarray]]]) -> bool: """ Check if video segments contain valid masks. Args: video_segments: Video segments dictionary from SAM2 Returns: True if valid masks are found """ if not video_segments: return False # Check if any frame has non-empty masks for frame_idx, frame_masks in video_segments.items(): for obj_id, mask in frame_masks.items(): if mask is not None and np.any(mask): return True return False