working a bit faster

2025-07-31 09:09:22 -07:00
parent 70044e1b10
commit 0057017ac4
5 changed files with 585 additions and 137 deletions
--- a/core/mask_processor.py
+++ b/core/mask_processor.py
@@ -459,41 +459,12 @@ class MaskProcessor:
        
        return colored_mask
    
-    def _precompute_upscaled_masks(self, video_segments: Dict[int, Dict[int, np.ndarray]], 
-                                  target_width: int, target_height: int) -> Dict[int, Dict[int, np.ndarray]]:
-        """
-        Pre-compute all upscaled masks to avoid per-frame upscaling.
-        
-        Args:
-            video_segments: Dictionary of frame masks from SAM2
-            target_width: Target frame width
-            target_height: Target frame height
-            
-        Returns:
-            Dictionary with pre-upscaled masks
-        """
-        logger.info(f"Pre-computing upscaled masks for {len(video_segments)} frames")
-        upscaled_segments = {}
-        
-        for frame_idx, frame_masks in video_segments.items():
-            upscaled_frame_masks = {}
-            for obj_id, mask in frame_masks.items():
-                mask = mask.squeeze()
-                if mask.shape != (target_height, target_width):
-                    upscaled_mask = cv2.resize(mask.astype(np.uint8), 
-                                             (target_width, target_height), 
-                                             interpolation=cv2.INTER_NEAREST)
-                    upscaled_frame_masks[obj_id] = upscaled_mask
-                else:
-                    upscaled_frame_masks[obj_id] = mask.astype(np.uint8)
-            upscaled_segments[frame_idx] = upscaled_frame_masks
-        
-        logger.info(f"Pre-computed upscaled masks for {len(upscaled_segments)} frames")
-        return upscaled_segments
+

    def process_and_save_output_video(self, video_path: str, output_video_path: str, 
                                     video_segments: Dict[int, Dict[int, np.ndarray]], 
-                                     use_nvenc: bool = False, bitrate: str = "50M") -> bool:
+                                     use_nvenc: bool = False, bitrate: str = "50M",
+                                     batch_size: int = 16) -> bool:
        """
        Process high-resolution frames, apply upscaled masks, and save the output video.
        
@@ -503,6 +474,7 @@ class MaskProcessor:
            video_segments: Dictionary of frame masks
            use_nvenc: Whether to use NVIDIA hardware encoding
            bitrate: Output video bitrate
+            batch_size: Number of frames to process in a single batch
            
        Returns:
            True if successful
@@ -520,18 +492,15 @@ class MaskProcessor:
            
            logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames")
            
-            # Pre-compute all upscaled masks (Phase 1 optimization)
-            upscaled_segments = self._precompute_upscaled_masks(video_segments, frame_width, frame_height)
-            
            # Setup VideoWriter
+            out_writer = None
            if self.output_mode == "alpha_channel":
-                # For alpha channel, we need a codec that supports transparency
                success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
                if not success:
                    logger.error("Failed to setup alpha channel encoder")
                    cap.release()
                    return False
-                use_nvenc = False  # Override NVENC for alpha channel
+                use_nvenc = False
            elif use_nvenc:
                success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
                if not success:
@@ -539,106 +508,101 @@ class MaskProcessor:
                    use_nvenc = False
            
            if not use_nvenc and self.output_mode != "alpha_channel":
-                # Use OpenCV VideoWriter
-                fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use mp4v for better compatibility
-                out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
-                if not out.isOpened():
+                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+                out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
+                if not out_writer.isOpened():
                    logger.error("Failed to create output video writer")
                    cap.release()
                    return False
-            
-            # Process frames with batch reading (Phase 1 optimization)
+
+            # Process frames in batches
            frame_idx = 0
            processed_frames = 0
-            batch_size = 10  # Process frames in batches for better I/O performance
-            frame_buffer = []
            
-            # Pre-fill frame buffer
-            for _ in range(min(batch_size, len(upscaled_segments))):
-                ret, frame = cap.read()
-                if ret:
-                    frame_buffer.append(frame)
-                else:
+            while frame_idx < total_frames:
+                batch_frames = []
+                batch_masks = []
+                
+                # Read a batch of frames
+                for _ in range(batch_size):
+                    ret, frame = cap.read()
+                    if not ret:
+                        break
+                    batch_frames.append(frame)
+                
+                if not batch_frames:
                    break
-            
-            buffer_idx = 0
-            while frame_idx < len(upscaled_segments) and buffer_idx < len(frame_buffer):
-                frame = frame_buffer[buffer_idx]
                
-                if frame_idx in upscaled_segments:
-                    # Get pre-computed upscaled masks for this frame (Phase 1 optimization)
-                    upscaled_masks = [upscaled_segments[frame_idx][obj_id] 
-                                    for obj_id in upscaled_segments[frame_idx]]
-                    
-                    # Apply mask based on output mode (no upscaling needed - already done)
-                    if self.output_mode == "alpha_channel":
-                        result_frame = self.apply_mask_with_alpha(frame, upscaled_masks)
+                # Get masks for the current batch and perform just-in-time upscaling
+                for i in range(len(batch_frames)):
+                    current_frame_idx = frame_idx + i
+                    if current_frame_idx in video_segments:
+                        frame_masks = video_segments[current_frame_idx]
+                        upscaled_masks = []
+                        for obj_id, mask in frame_masks.items():
+                            mask = mask.squeeze()
+                            if mask.shape != (frame_height, frame_width):
+                                upscaled_mask = cv2.resize(mask.astype(np.uint8), 
+                                                         (frame_width, frame_height), 
+                                                         interpolation=cv2.INTER_NEAREST)
+                                upscaled_masks.append(upscaled_mask)
+                            else:
+                                upscaled_masks.append(mask.astype(np.uint8))
+                        batch_masks.append(upscaled_masks)
                    else:
-                        result_frame = self.apply_green_mask(frame, upscaled_masks)
-                else:
-                    # No mask for this frame
-                    if self.output_mode == "alpha_channel":
-                        # Create fully transparent frame for alpha channel mode
-                        bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
-                        bgra_frame[:, :, 3] = 0  # Fully transparent
-                        result_frame = bgra_frame
-                        logger.warning(f"No mask for frame {frame_idx}, using transparent frame")
-                    else:
-                        # Use original frame for green screen mode
-                        logger.warning(f"No mask for frame {frame_idx}, using original")
-                        result_frame = frame
+                        batch_masks.append([]) # No masks for this frame
                
-                # Write frame
-                if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
-                    self.alpha_process.stdin.write(result_frame.tobytes())
-                elif use_nvenc and hasattr(self, 'nvenc_process'):
-                    self.nvenc_process.stdin.write(result_frame.tobytes())
-                else:
-                    out.write(result_frame)
-                
-                processed_frames += 1
-                frame_idx += 1
-                buffer_idx += 1
-                
-                # Refill buffer when needed
-                if buffer_idx >= len(frame_buffer) and frame_idx < len(upscaled_segments):
-                    frame_buffer.clear()
-                    buffer_idx = 0
-                    # Read next batch
-                    for _ in range(min(batch_size, len(upscaled_segments) - frame_idx)):
-                        ret, frame = cap.read()
-                        if ret:
-                            frame_buffer.append(frame)
+                # Process the batch
+                result_batch = []
+                for i, frame in enumerate(batch_frames):
+                    masks = batch_masks[i]
+                    if masks:
+                        if self.output_mode == "alpha_channel":
+                            result_frame = self.apply_mask_with_alpha(frame, masks)
                        else:
-                            break
+                            result_frame = self.apply_green_mask(frame, masks)
+                    else:
+                        # No mask for this frame
+                        if self.output_mode == "alpha_channel":
+                            bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
+                            bgra_frame[:, :, 3] = 0
+                            result_frame = bgra_frame
+                        else:
+                            result_frame = frame
+                    result_batch.append(result_frame)
                
-                # Progress logging
-                if processed_frames % 100 == 0:
+                # Write the processed batch
+                for result_frame in result_batch:
+                    if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
+                        self.alpha_process.stdin.write(result_frame.tobytes())
+                    elif use_nvenc and hasattr(self, 'nvenc_process'):
+                        self.nvenc_process.stdin.write(result_frame.tobytes())
+                    else:
+                        out_writer.write(result_frame)
+                
+                processed_frames += len(batch_frames)
+                frame_idx += len(batch_frames)
+                
+                if processed_frames % 100 < batch_size:
                    logger.info(f"Processed {processed_frames}/{total_frames} frames")
-            
+
            # Cleanup
            cap.release()
-            
            if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
                self.alpha_process.stdin.close()
                self.alpha_process.wait()
-                if self.alpha_process.returncode != 0:
-                    logger.error("Alpha channel encoding failed")
-                    return False
            elif use_nvenc and hasattr(self, 'nvenc_process'):
                self.nvenc_process.stdin.close()
                self.nvenc_process.wait()
-                if self.nvenc_process.returncode != 0:
-                    logger.error("NVENC encoding failed")
-                    return False
            else:
-                out.release()
+                if out_writer:
+                    out_writer.release()
            
            logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}")
            return True
            
        except Exception as e:
-            logger.error(f"Error processing video: {e}")
+            logger.error(f"Error processing video: {e}", exc_info=True)
            return False
    
    def _setup_nvenc_encoder(self, output_path: str, width: int, height: int, 
@@ -751,6 +715,14 @@ class MaskProcessor:
        
        if success:
            logger.info(f"Successfully created {self.output_mode} video: {output_video}")
+            # Mark segment as completed only after video is successfully written
+            try:
+                output_done_file = os.path.join(segment_info['directory'], "output_frames_done")
+                with open(output_done_file, 'w') as f:
+                    f.write(f"Segment {segment_info['index']} processed and saved successfully.")
+                logger.debug(f"Created completion marker for segment {segment_info['index']}")
+            except Exception as e:
+                logger.error(f"Failed to create completion marker for segment {segment_info['index']}: {e}")
        else:
            logger.error(f"Failed to process segment {segment_info['index']}")