samyolo_on_segments/core/mask_processor.py

"""
Mask processor module for applying green screen effects.
Handles applying masks to video frames to create green screen output.
"""

import os
import cv2
import numpy as np
import cupy as cp
import subprocess
import sys
import logging
from typing import Dict, List, Any, Optional, Tuple
from collections import deque

logger = logging.getLogger(__name__)

class MaskProcessor:
    """Handles mask application and green screen processing with quality enhancements."""

    def __init__(self, green_color: List[int] = [0, 255, 0], blue_color: List[int] = [255, 0, 0],
                 mask_quality_config: Optional[Dict[str, Any]] = None,
                 output_mode: str = "green_screen"):
        """
        Initialize mask processor with quality enhancement options.

        Args:
            green_color: RGB color for green screen background
            blue_color: RGB color for second object (if needed)
            mask_quality_config: Configuration dictionary for mask quality improvements
            output_mode: Output mode - "green_screen" or "alpha_channel"
        """
        self.green_color = green_color
        self.blue_color = blue_color
        self.output_mode = output_mode
        self.use_gpu = self._check_gpu_availability()

        # Mask quality configuration with defaults
        if mask_quality_config is None:
            mask_quality_config = {}

        self.enable_edge_blur = mask_quality_config.get('enable_edge_blur', False)
        self.edge_blur_radius = mask_quality_config.get('edge_blur_radius', 3)
        self.edge_blur_sigma = mask_quality_config.get('edge_blur_sigma', 1.5)

        self.enable_temporal_smoothing = mask_quality_config.get('enable_temporal_smoothing', False)
        self.temporal_blend_weight = mask_quality_config.get('temporal_blend_weight', 0.3)
        self.temporal_history_frames = mask_quality_config.get('temporal_history_frames', 3)

        self.enable_morphological_cleaning = mask_quality_config.get('enable_morphological_cleaning', False)
        self.morphology_kernel_size = mask_quality_config.get('morphology_kernel_size', 5)
        self.min_component_size = mask_quality_config.get('min_component_size', 500)

        self.alpha_blending_mode = mask_quality_config.get('alpha_blending_mode', 'gaussian')
        self.alpha_transition_width = mask_quality_config.get('alpha_transition_width', 10)

        self.enable_bilateral_filter = mask_quality_config.get('enable_bilateral_filter', False)
        self.bilateral_d = mask_quality_config.get('bilateral_d', 9)
        self.bilateral_sigma_color = mask_quality_config.get('bilateral_sigma_color', 75)
        self.bilateral_sigma_space = mask_quality_config.get('bilateral_sigma_space', 75)

        # Temporal history buffer for mask smoothing
        self.mask_history = deque(maxlen=self.temporal_history_frames)

        # Log configuration
        if any([self.enable_edge_blur, self.enable_temporal_smoothing, self.enable_morphological_cleaning]):
            logger.info("Mask quality enhancements enabled:")
            if self.enable_edge_blur:
                logger.info(f"  Edge blur: radius={self.edge_blur_radius}, sigma={self.edge_blur_sigma}")
            if self.enable_temporal_smoothing:
                logger.info(f"  Temporal smoothing: weight={self.temporal_blend_weight}, history={self.temporal_history_frames}")
            if self.enable_morphological_cleaning:
                logger.info(f"  Morphological cleaning: kernel={self.morphology_kernel_size}, min_size={self.min_component_size}")
            logger.info(f"  Alpha blending: mode={self.alpha_blending_mode}, width={self.alpha_transition_width}")
        else:
            logger.info("Mask quality enhancements disabled - using standard binary masking")

        logger.info(f"Output mode: {self.output_mode}")

    def _check_gpu_availability(self) -> bool:
        """Check if CuPy GPU acceleration is available."""
        try:
            import cupy as cp
            # Test GPU availability
            test_array = cp.array([1, 2, 3])
            _ = test_array * 2
            logger.info("GPU acceleration available via CuPy")
            return True
        except Exception as e:
            logger.warning(f"GPU acceleration not available, using CPU: {e}")
            return False

    def enhance_mask_quality(self, mask: np.ndarray) -> np.ndarray:
        """
        Apply all enabled mask quality enhancements.

        Args:
            mask: Input binary mask

        Returns:
            Enhanced mask with quality improvements applied
        """
        enhanced_mask = mask.copy()

        # 1. Morphological cleaning
        if self.enable_morphological_cleaning:
            enhanced_mask = self._clean_mask_morphologically(enhanced_mask)

        # 2. Temporal smoothing
        if self.enable_temporal_smoothing:
            enhanced_mask = self._apply_temporal_smoothing(enhanced_mask)

        # 3. Edge enhancement and blurring
        if self.enable_edge_blur:
            enhanced_mask = self._apply_edge_blur(enhanced_mask)

        # 4. Bilateral filtering (if enabled)
        if self.enable_bilateral_filter:
            enhanced_mask = self._apply_bilateral_filter(enhanced_mask)

        return enhanced_mask

    def _clean_mask_morphologically(self, mask: np.ndarray) -> np.ndarray:
        """
        Clean mask using morphological operations to remove noise and small artifacts.

        Args:
            mask: Input binary mask

        Returns:
            Cleaned mask
        """
        # Convert to uint8 for OpenCV operations
        mask_uint8 = (mask * 255).astype(np.uint8)

        # Create morphological kernel
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
                                         (self.morphology_kernel_size, self.morphology_kernel_size))

        # Opening operation (erosion followed by dilation) to remove small noise
        cleaned = cv2.morphologyEx(mask_uint8, cv2.MORPH_OPEN, kernel)

        # Closing operation (dilation followed by erosion) to fill small holes
        cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)

        # Remove small connected components
        if self.min_component_size > 0:
            cleaned = self._remove_small_components(cleaned)

        return (cleaned / 255.0).astype(np.float32)

    def _remove_small_components(self, mask: np.ndarray) -> np.ndarray:
        """
        Remove connected components smaller than minimum size.

        Args:
            mask: Input binary mask (uint8)

        Returns:
            Mask with small components removed
        """
        # Find connected components
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, connectivity=8)

        # Create output mask
        output_mask = np.zeros_like(mask)

        # Keep components larger than minimum size (skip background label 0)
        for i in range(1, num_labels):
            component_size = stats[i, cv2.CC_STAT_AREA]
            if component_size >= self.min_component_size:
                output_mask[labels == i] = 255

        return output_mask

    def _apply_temporal_smoothing(self, mask: np.ndarray) -> np.ndarray:
        """
        Apply temporal smoothing using mask history.

        Args:
            mask: Current frame mask

        Returns:
            Temporally smoothed mask
        """
        if len(self.mask_history) == 0:
            # First frame, no history to blend with
            self.mask_history.append(mask.copy())
            return mask

        # Blend with previous frames using weighted average
        smoothed_mask = mask.astype(np.float32)
        total_weight = 1.0

        for i, hist_mask in enumerate(reversed(self.mask_history)):
            # Exponential decay: more recent frames have higher weight
            frame_weight = self.temporal_blend_weight * (0.8 ** i)
            smoothed_mask += hist_mask.astype(np.float32) * frame_weight
            total_weight += frame_weight

        # Normalize by total weight
        smoothed_mask /= total_weight

        # Update history
        self.mask_history.append(mask.copy())

        return smoothed_mask

    def _apply_edge_blur(self, mask: np.ndarray) -> np.ndarray:
        """
        Apply Gaussian blur to mask edges for smooth transitions.

        Args:
            mask: Input mask

        Returns:
            Mask with blurred edges
        """
        # Apply Gaussian blur
        kernel_size = 2 * self.edge_blur_radius + 1
        blurred_mask = cv2.GaussianBlur(mask.astype(np.float32),
                                       (kernel_size, kernel_size),
                                       self.edge_blur_sigma)

        return blurred_mask

    def _apply_bilateral_filter(self, mask: np.ndarray) -> np.ndarray:
        """
        Apply bilateral filtering for edge-preserving smoothing.

        Args:
            mask: Input mask

        Returns:
            Filtered mask
        """
        # Convert to uint8 for bilateral filter
        mask_uint8 = (mask * 255).astype(np.uint8)

        # Apply bilateral filter
        filtered = cv2.bilateralFilter(mask_uint8, self.bilateral_d,
                                     self.bilateral_sigma_color,
                                     self.bilateral_sigma_space)

        return (filtered / 255.0).astype(np.float32)

    def _create_alpha_mask(self, mask: np.ndarray) -> np.ndarray:
        """
        Create alpha mask with smooth transitions based on blending mode.

        Args:
            mask: Input binary/float mask

        Returns:
            Alpha mask with smooth transitions
        """
        if self.alpha_blending_mode == "linear":
            return mask
        elif self.alpha_blending_mode == "gaussian":
            # Use distance transform for smooth falloff
            binary_mask = (mask > 0.5).astype(np.uint8)

            # Distance transform from mask edges
            dist_inside = cv2.distanceTransform(binary_mask, cv2.DIST_L2, 5)
            dist_outside = cv2.distanceTransform(1 - binary_mask, cv2.DIST_L2, 5)

            # Create smooth alpha based on distance
            alpha = np.zeros_like(mask, dtype=np.float32)
            transition_width = self.alpha_transition_width

            # Inside mask: fade from edge
            alpha[binary_mask > 0] = np.minimum(1.0, dist_inside[binary_mask > 0] / transition_width)

            # Outside mask: fade to zero
            alpha[binary_mask == 0] = np.maximum(0.0, 1.0 - dist_outside[binary_mask == 0] / transition_width)

            return alpha
        elif self.alpha_blending_mode == "sigmoid":
            # Sigmoid-based smooth transition
            return 1.0 / (1.0 + np.exp(-10 * (mask - 0.5)))
        else:
            return mask

    def apply_green_mask(self, frame: np.ndarray, masks: List[np.ndarray]) -> np.ndarray:
        """
        Apply green screen mask to a frame with quality enhancements.

        Args:
            frame: Input video frame (BGR format)
            masks: List of object masks to apply

        Returns:
            Frame with green screen background and enhanced mask quality
        """
        # Combine all masks into a single mask
        combined_mask = self._combine_masks(masks)

        # Apply quality enhancements
        enhanced_mask = self.enhance_mask_quality(combined_mask)

        # Create alpha mask for smooth blending
        alpha_mask = self._create_alpha_mask(enhanced_mask)

        # Apply mask using alpha blending
        if self.use_gpu:
            return self._apply_green_mask_gpu_enhanced(frame, alpha_mask)
        else:
            return self._apply_green_mask_cpu_enhanced(frame, alpha_mask)

    def apply_mask_with_alpha(self, frame: np.ndarray, masks: List[np.ndarray]) -> np.ndarray:
        """
        Apply mask to create RGBA frame with alpha channel.

        Args:
            frame: Input video frame (BGR format)
            masks: List of object masks to apply

        Returns:
            RGBA frame with alpha channel
        """
        # Combine all masks into a single mask
        combined_mask = self._combine_masks(masks)

        # Apply quality enhancements
        enhanced_mask = self.enhance_mask_quality(combined_mask)

        # Create alpha mask for smooth blending
        alpha_mask = self._create_alpha_mask(enhanced_mask)

        # Resize alpha mask to match frame if needed
        if alpha_mask.shape != frame.shape[:2]:
            alpha_mask = cv2.resize(alpha_mask, (frame.shape[1], frame.shape[0]))

        # Convert BGR to BGRA
        bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)

        # Set alpha channel
        bgra_frame[:, :, 3] = (alpha_mask * 255).astype(np.uint8)

        return bgra_frame

    def _combine_masks(self, masks: List[np.ndarray]) -> np.ndarray:
        """
        Combine multiple object masks into a single mask.

        Args:
            masks: List of object masks

        Returns:
            Combined mask
        """
        if not masks:
            return np.zeros((0, 0), dtype=np.float32)

        # Start with first mask
        combined_mask = masks[0].squeeze().astype(np.float32)

        # Combine with remaining masks using logical OR
        for mask in masks[1:]:
            mask_squeezed = mask.squeeze().astype(np.float32)
            if mask_squeezed.shape != combined_mask.shape:
                # Resize mask to match combined mask
                mask_squeezed = cv2.resize(mask_squeezed,
                                         (combined_mask.shape[1], combined_mask.shape[0]),
                                         interpolation=cv2.INTER_NEAREST)
            combined_mask = np.maximum(combined_mask, mask_squeezed)

        return combined_mask

    def reset_temporal_history(self):
        """Reset temporal history buffer. Call this when starting a new segment."""
        self.mask_history.clear()
        logger.debug("Temporal history buffer reset")

    def _apply_green_mask_gpu_enhanced(self, frame: np.ndarray, alpha_mask: np.ndarray) -> np.ndarray:
        """GPU-accelerated green mask application with alpha blending using CuPy (Phase 1 optimized)."""
        try:
            # Convert to CuPy arrays with optimized data transfer
            frame_gpu = cp.asarray(frame, dtype=cp.uint8)
            alpha_gpu = cp.asarray(alpha_mask, dtype=cp.float32)

            # Resize alpha mask to match frame if needed (vectorized operation)
            if alpha_gpu.shape != frame_gpu.shape[:2]:
                # Use CuPy's resize instead of OpenCV for GPU optimization
                alpha_gpu = cp.array(cv2.resize(cp.asnumpy(alpha_gpu),
                                              (frame_gpu.shape[1], frame_gpu.shape[0])))

            # Create green background (optimized broadcasting)
            green_color_gpu = cp.array(self.green_color, dtype=cp.uint8)
            green_background = cp.broadcast_to(green_color_gpu, frame_gpu.shape)

            # Apply vectorized alpha blending with optimized memory access
            alpha_3d = cp.expand_dims(alpha_gpu, axis=2)

            # Use more efficient computation with explicit typing
            frame_float = frame_gpu.astype(cp.float32)
            green_float = green_background.astype(cp.float32)

            # Vectorized blending operation
            result_frame = cp.clip(alpha_3d * frame_float + (1.0 - alpha_3d) * green_float, 0, 255)

            return cp.asnumpy(result_frame.astype(cp.uint8))

        except Exception as e:
            logger.error(f"GPU enhanced processing failed, falling back to CPU: {e}")
            return self._apply_green_mask_cpu_enhanced(frame, alpha_mask)

    def _apply_green_mask_cpu_enhanced(self, frame: np.ndarray, alpha_mask: np.ndarray) -> np.ndarray:
        """CPU-based green mask application with alpha blending (Phase 1 optimized)."""
        # Resize alpha mask to match frame if needed
        if alpha_mask.shape != frame.shape[:2]:
            alpha_mask = cv2.resize(alpha_mask, (frame.shape[1], frame.shape[0]))

        # Create green background with broadcasting (more efficient)
        green_color = np.array(self.green_color, dtype=np.uint8)
        green_background = np.broadcast_to(green_color, frame.shape)

        # Apply optimized alpha blending with explicit data types
        alpha_3d = np.expand_dims(alpha_mask.astype(np.float32), axis=2)

        # Vectorized blending with optimized memory access
        frame_float = frame.astype(np.float32)
        green_float = green_background.astype(np.float32)

        result_frame = np.clip(alpha_3d * frame_float + (1.0 - alpha_3d) * green_float, 0, 255)

        return result_frame.astype(np.uint8)

    def apply_colored_mask(self, frame: np.ndarray, masks_a: List[np.ndarray],
                          masks_b: List[np.ndarray]) -> np.ndarray:
        """
        Apply colored masks for visualization (green and blue).

        Args:
            frame: Input video frame
            masks_a: Masks for object A (green)
            masks_b: Masks for object B (blue)

        Returns:
            Frame with colored masks applied
        """
        colored_mask = np.zeros_like(frame)

        # Apply green color to masks_a
        for mask in masks_a:
            mask = mask.squeeze()
            if mask.shape != frame.shape[:2]:
                mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]),
                                 interpolation=cv2.INTER_NEAREST)
            colored_mask[mask > 0] = self.green_color

        # Apply blue color to masks_b
        for mask in masks_b:
            mask = mask.squeeze()
            if mask.shape != frame.shape[:2]:
                mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]),
                                 interpolation=cv2.INTER_NEAREST)
            colored_mask[mask > 0] = self.blue_color

        return colored_mask


    def process_and_save_output_video(self, video_path: str, output_video_path: str,
                                     video_segments: Dict[int, Dict[int, np.ndarray]],
                                     use_nvenc: bool = False, bitrate: str = "50M",
                                     batch_size: int = 16) -> bool:
        """
        Process high-resolution frames, apply upscaled masks, and save the output video.

        Args:
            video_path: Path to input video
            output_video_path: Path to save output video
            video_segments: Dictionary of frame masks
            use_nvenc: Whether to use NVIDIA hardware encoding
            bitrate: Output video bitrate
            batch_size: Number of frames to process in a single batch

        Returns:
            True if successful
        """
        try:
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                logger.error(f"Could not open video: {video_path}")
                return False

            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames")

            # Setup VideoWriter
            out_writer = None
            if self.output_mode == "alpha_channel":
                success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
                if not success:
                    logger.error("Failed to setup alpha channel encoder")
                    cap.release()
                    return False
                use_nvenc = False
            elif use_nvenc:
                success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
                if not success:
                    logger.warning("NVENC setup failed, falling back to OpenCV")
                    use_nvenc = False

            if not use_nvenc and self.output_mode != "alpha_channel":
                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
                out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
                if not out_writer.isOpened():
                    logger.error("Failed to create output video writer")
                    cap.release()
                    return False

            # Process frames in batches
            frame_idx = 0
            processed_frames = 0

            while frame_idx < total_frames:
                batch_frames = []
                batch_masks = []

                # Read a batch of frames
                for _ in range(batch_size):
                    ret, frame = cap.read()
                    if not ret:
                        break
                    batch_frames.append(frame)

                if not batch_frames:
                    break

                # Get masks for the current batch and perform just-in-time upscaling
                for i in range(len(batch_frames)):
                    current_frame_idx = frame_idx + i
                    if current_frame_idx in video_segments:
                        frame_masks = video_segments[current_frame_idx]
                        upscaled_masks = []
                        for obj_id, mask in frame_masks.items():
                            mask = mask.squeeze()
                            if mask.shape != (frame_height, frame_width):
                                upscaled_mask = cv2.resize(mask.astype(np.uint8),
                                                         (frame_width, frame_height),
                                                         interpolation=cv2.INTER_NEAREST)
                                upscaled_masks.append(upscaled_mask)
                            else:
                                upscaled_masks.append(mask.astype(np.uint8))
                        batch_masks.append(upscaled_masks)
                    else:
                        batch_masks.append([]) # No masks for this frame

                # Process the batch
                result_batch = []
                for i, frame in enumerate(batch_frames):
                    masks = batch_masks[i]
                    if masks:
                        if self.output_mode == "alpha_channel":
                            result_frame = self.apply_mask_with_alpha(frame, masks)
                        else:
                            result_frame = self.apply_green_mask(frame, masks)
                    else:
                        # No mask for this frame
                        if self.output_mode == "alpha_channel":
                            bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
                            bgra_frame[:, :, 3] = 0
                            result_frame = bgra_frame
                        else:
                            result_frame = frame
                    result_batch.append(result_frame)

                # Write the processed batch
                for result_frame in result_batch:
                    if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
                        self.alpha_process.stdin.write(result_frame.tobytes())
                    elif use_nvenc and hasattr(self, 'nvenc_process'):
                        self.nvenc_process.stdin.write(result_frame.tobytes())
                    else:
                        out_writer.write(result_frame)

                processed_frames += len(batch_frames)
                frame_idx += len(batch_frames)

                if processed_frames % 100 < batch_size:
                    logger.info(f"Processed {processed_frames}/{total_frames} frames")

            # Cleanup
            cap.release()
            if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
                self.alpha_process.stdin.close()
                self.alpha_process.wait()
            elif use_nvenc and hasattr(self, 'nvenc_process'):
                self.nvenc_process.stdin.close()
                self.nvenc_process.wait()
            else:
                if out_writer:
                    out_writer.release()

            logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}")
            return True

        except Exception as e:
            logger.error(f"Error processing video: {e}", exc_info=True)
            return False

    def _setup_nvenc_encoder(self, output_path: str, width: int, height: int,
                            fps: float, bitrate: str) -> bool:
        """Setup NVENC hardware encoder using FFmpeg."""
        try:
            # Determine encoder based on platform
            if sys.platform == 'darwin':
                encoder = 'hevc_videotoolbox'
            else:
                encoder = 'hevc_nvenc'

            command = [
                'ffmpeg',
                '-y',  # Overwrite output file
                '-f', 'rawvideo',
                '-vcodec', 'rawvideo',
                '-pix_fmt', 'bgr24',
                '-s', f'{width}x{height}',
                '-r', str(fps),
                '-i', '-',  # Input from stdin
                '-an',  # No audio (will be added later)
                '-vcodec', encoder,
                '-pix_fmt', 'yuv420p',  # Changed from nv12 for better compatibility
                '-preset', 'slow',
                '-b:v', bitrate,
                output_path
            ]

            self.nvenc_process = subprocess.Popen(command, stdin=subprocess.PIPE,
                                                 stderr=subprocess.PIPE)
            logger.info(f"Initialized {encoder} hardware encoder")
            return True

        except Exception as e:
            logger.error(f"Failed to setup NVENC encoder: {e}")
            return False

    def _setup_alpha_encoder(self, output_path: str, width: int, height: int,
                            fps: float, bitrate: str) -> bool:
        """Setup encoder for alpha channel video using FFmpeg with H.264/H.265."""
        try:
            # For VR180 SBS, we'll use H.265 (HEVC) with alpha channel
            # Note: Standard H.264/H.265 don't support alpha directly,
            # so we'll encode the alpha as a separate grayscale channel or use a special pixel format

            # Determine encoder based on platform
            if sys.platform == 'darwin':
                encoder = 'hevc_videotoolbox'
            else:
                encoder = 'hevc_nvenc'

            command = [
                'ffmpeg',
                '-y',  # Overwrite output file
                '-f', 'rawvideo',
                '-vcodec', 'rawvideo',
                '-pix_fmt', 'bgra',  # BGRA for alpha channel
                '-s', f'{width}x{height}',
                '-r', str(fps),
                '-i', '-',  # Input from stdin
                '-an',  # No audio (will be added later)
                '-c:v', encoder,
                '-pix_fmt', 'yuv420p',  # Standard pixel format
                '-preset', 'slow',
                '-b:v', bitrate,
                '-tag:v', 'hvc1',  # Required for some players
                output_path
            ]

            self.alpha_process = subprocess.Popen(command, stdin=subprocess.PIPE,
                                                 stderr=subprocess.PIPE)
            self.alpha_output_path = output_path
            logger.info(f"Initialized {encoder} for alpha channel output (will be encoded as transparency in RGB)")
            return True

        except Exception as e:
            logger.error(f"Failed to setup alpha encoder: {e}")
            return False

    def process_segment(self, segment_info: dict, video_segments: Dict[int, Dict[int, np.ndarray]],
                       use_nvenc: bool = False, bitrate: str = "50M") -> bool:
        """
        Process a single segment and save the output video.

        Args:
            segment_info: Segment information dictionary
            video_segments: Dictionary of frame masks from SAM2
            use_nvenc: Whether to use hardware encoding
            bitrate: Output video bitrate

        Returns:
            True if successful
        """
        input_video = segment_info['video_file']
        if self.output_mode == "alpha_channel":
            output_video = os.path.join(segment_info['directory'], f"output_{segment_info['index']}.mov")
        else:
            output_video = os.path.join(segment_info['directory'], f"output_{segment_info['index']}.mp4")

        logger.info(f"Processing segment {segment_info['index']} with {self.output_mode}")

        success = self.process_and_save_output_video(
            input_video,
            output_video,
            video_segments,
            use_nvenc,
            bitrate
        )

        if success:
            logger.info(f"Successfully created {self.output_mode} video: {output_video}")
            # Mark segment as completed only after video is successfully written
            try:
                output_done_file = os.path.join(segment_info['directory'], "output_frames_done")
                with open(output_done_file, 'w') as f:
                    f.write(f"Segment {segment_info['index']} processed and saved successfully.")
                logger.debug(f"Created completion marker for segment {segment_info['index']}")
            except Exception as e:
                logger.error(f"Failed to create completion marker for segment {segment_info['index']}: {e}")
        else:
            logger.error(f"Failed to process segment {segment_info['index']}")

        return success

    def create_full_greenscreen_frame(self, frame_shape: Tuple[int, int, int],
                                    green_color: Optional[List[int]] = None) -> np.ndarray:
        """
        Create a full greenscreen frame for fallback when no humans are detected.

        Args:
            frame_shape: Shape of the frame (height, width, channels)
            green_color: RGB values for green screen color (uses default if None)

        Returns:
            Full greenscreen frame
        """
        if green_color is None:
            green_color = self.green_color

        greenscreen_frame = np.full(frame_shape, green_color, dtype=np.uint8)
        logger.debug(f"Created full greenscreen frame with shape {frame_shape}")
        return greenscreen_frame

    def process_greenscreen_only_segment(self, segment_info: dict,
                                       green_color: Optional[List[int]] = None,
                                       use_nvenc: bool = False, bitrate: str = "50M") -> bool:
        """
        Create a full greenscreen segment when no humans are detected.
        Used as fallback in separate eye processing mode.

        Args:
            segment_info: Segment information dictionary
            green_color: RGB values for green screen color (uses default if None)
            use_nvenc: Whether to use hardware encoding
            bitrate: Output video bitrate

        Returns:
            True if greenscreen segment was created successfully
        """
        segment_dir = segment_info['directory']
        video_path = segment_info['video_file']
        segment_idx = segment_info['index']

        logger.info(f"Creating full greenscreen segment {segment_idx} (no humans detected)")

        try:
            # Get video properties
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                logger.error(f"Could not open video: {video_path}")
                return False

            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            cap.release()

            # Create output video path
            if self.output_mode == "alpha_channel":
                output_video_path = os.path.join(segment_dir, f"output_{segment_idx}.mov")
            else:
                output_video_path = os.path.join(segment_dir, f"output_{segment_idx}.mp4")

            # Create greenscreen frame
            if green_color is None:
                green_color = self.green_color

            greenscreen_frame = self.create_full_greenscreen_frame(
                (height, width, 3), green_color
            )

            # Setup video writer based on mode and hardware encoding preference
            if use_nvenc:
                success = self._write_greenscreen_with_nvenc(
                    output_video_path, greenscreen_frame, frame_count, fps, bitrate
                )
            else:
                success = self._write_greenscreen_with_opencv(
                    output_video_path, greenscreen_frame, frame_count, fps
                )

            if not success:
                logger.error(f"Failed to write greenscreen video for segment {segment_idx}")
                return False

            # Create empty mask file (black mask since no humans detected)
            mask_output_path = os.path.join(segment_dir, "mask.png")
            black_mask = np.zeros((height, width, 3), dtype=np.uint8)
            cv2.imwrite(mask_output_path, black_mask)

            # Mark segment as completed
            output_done_file = os.path.join(segment_dir, "output_frames_done")
            with open(output_done_file, 'w') as f:
                f.write(f"Greenscreen segment {segment_idx} completed successfully\n")

            logger.info(f"Successfully created greenscreen segment {segment_idx}")
            return True

        except Exception as e:
            logger.error(f"Error creating greenscreen segment {segment_idx}: {e}")
            return False

    def _write_greenscreen_with_opencv(self, output_path: str, greenscreen_frame: np.ndarray,
                                     frame_count: int, fps: float) -> bool:
        """Write greenscreen video using OpenCV VideoWriter."""
        try:
            if self.output_mode == "alpha_channel":
                # For alpha channel mode, create fully transparent frames
                bgra_frame = cv2.cvtColor(greenscreen_frame, cv2.COLOR_BGR2BGRA)
                bgra_frame[:, :, 3] = 0  # Fully transparent
                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
                out = cv2.VideoWriter(output_path, fourcc, fps,
                                    (greenscreen_frame.shape[1], greenscreen_frame.shape[0]), True)
                frame_to_write = bgra_frame[:, :, :3]  # OpenCV expects BGR for mp4v
            else:
                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
                out = cv2.VideoWriter(output_path, fourcc, fps,
                                    (greenscreen_frame.shape[1], greenscreen_frame.shape[0]))
                frame_to_write = greenscreen_frame

            if not out.isOpened():
                logger.error(f"Failed to open video writer for {output_path}")
                return False

            # Write identical greenscreen frames
            for _ in range(frame_count):
                out.write(frame_to_write)

            out.release()
            logger.debug(f"Wrote {frame_count} greenscreen frames using OpenCV")
            return True

        except Exception as e:
            logger.error(f"Error writing greenscreen with OpenCV: {e}")
            return False

    def _write_greenscreen_with_nvenc(self, output_path: str, greenscreen_frame: np.ndarray,
                                    frame_count: int, fps: float, bitrate: str) -> bool:
        """Write greenscreen video using NVENC hardware encoding."""
        try:
            # Setup NVENC encoder
            if not self._setup_nvenc_encoder(output_path,
                                           greenscreen_frame.shape[1],
                                           greenscreen_frame.shape[0],
                                           fps, bitrate):
                logger.warning("NVENC setup failed for greenscreen, falling back to OpenCV")
                return self._write_greenscreen_with_opencv(output_path, greenscreen_frame, frame_count, fps)

            # Write identical greenscreen frames
            for _ in range(frame_count):
                self.nvenc_process.stdin.write(greenscreen_frame.tobytes())

            # Finalize encoding
            self.nvenc_process.stdin.close()
            self.nvenc_process.wait()

            if self.nvenc_process.returncode != 0:
                logger.error("NVENC encoding failed for greenscreen")
                return False

            logger.debug(f"Wrote {frame_count} greenscreen frames using NVENC")
            return True

        except Exception as e:
            logger.error(f"Error writing greenscreen with NVENC: {e}")
            return False

    def has_valid_masks(self, video_segments: Optional[Dict[int, Dict[int, np.ndarray]]]) -> bool:
        """
        Check if video segments contain valid masks.

        Args:
            video_segments: Video segments dictionary from SAM2

        Returns:
            True if valid masks are found
        """
        if not video_segments:
            return False

        # Check if any frame has non-empty masks
        for frame_idx, frame_masks in video_segments.items():
            for obj_id, mask in frame_masks.items():
                if mask is not None and np.any(mask):
                    return True

        return False