From 463f881eaf6fe4d48fffd6a315f2dac4026e52bb Mon Sep 17 00:00:00 2001
From: Scott Register <sreg@sreg.io>
Date: Sat, 26 Jul 2025 11:56:51 -0700
Subject: [PATCH] catagory A round 2

---
 vr180_matting/video_processor.py | 93 ++++++++++++++++++++++++++++++++
 vr180_matting/vr180_processor.py | 64 ++++++++++++++++++++--
 2 files changed, 152 insertions(+), 5 deletions(-)

diff --git a/vr180_matting/video_processor.py b/vr180_matting/video_processor.py
index 6d99c0d..30f11e9 100644
--- a/vr180_matting/video_processor.py
+++ b/vr180_matting/video_processor.py
@@ -9,6 +9,9 @@ from tqdm import tqdm
 import warnings
 import time
 import subprocess
+import gc
+import psutil
+import os
 
 from .config import VR180Config
 from .detector import YOLODetector
@@ -49,6 +52,96 @@ class VideoProcessor:
         
         self._initialize_models()
     
+    def _get_process_memory_info(self) -> Dict[str, float]:
+        """Get detailed memory usage for current process and children"""
+        current_process = psutil.Process(os.getpid())
+        
+        # Get memory info for current process
+        memory_info = current_process.memory_info()
+        current_rss = memory_info.rss / 1024**3  # Convert to GB
+        current_vms = memory_info.vms / 1024**3  # Virtual memory
+        
+        # Get memory info for all children
+        children_rss = 0
+        children_vms = 0
+        child_count = 0
+        
+        try:
+            for child in current_process.children(recursive=True):
+                try:
+                    child_memory = child.memory_info()
+                    children_rss += child_memory.rss / 1024**3
+                    children_vms += child_memory.vms / 1024**3
+                    child_count += 1
+                except (psutil.NoSuchProcess, psutil.AccessDenied):
+                    pass
+        except psutil.NoSuchProcess:
+            pass
+        
+        # System memory info
+        system_memory = psutil.virtual_memory()
+        system_total = system_memory.total / 1024**3
+        system_available = system_memory.available / 1024**3
+        system_used = system_memory.used / 1024**3
+        system_percent = system_memory.percent
+        
+        return {
+            'process_rss_gb': current_rss,
+            'process_vms_gb': current_vms,
+            'children_rss_gb': children_rss,
+            'children_vms_gb': children_vms,
+            'total_process_gb': current_rss + children_rss,
+            'child_count': child_count,
+            'system_total_gb': system_total,
+            'system_used_gb': system_used,
+            'system_available_gb': system_available,
+            'system_percent': system_percent
+        }
+    
+    def _print_memory_step(self, step_name: str):
+        """Print memory usage for a specific processing step"""
+        memory_info = self._get_process_memory_info()
+        
+        print(f"\n📊 MEMORY: {step_name}")
+        print(f"   Process RSS: {memory_info['process_rss_gb']:.2f} GB")
+        if memory_info['children_rss_gb'] > 0:
+            print(f"   Children RSS: {memory_info['children_rss_gb']:.2f} GB ({memory_info['child_count']} processes)")
+            print(f"   Total Process: {memory_info['total_process_gb']:.2f} GB")
+        print(f"   System: {memory_info['system_used_gb']:.1f}/{memory_info['system_total_gb']:.1f} GB ({memory_info['system_percent']:.1f}%)")
+        print(f"   Available: {memory_info['system_available_gb']:.1f} GB")
+    
+    def _aggressive_memory_cleanup(self, step_name: str = ""):
+        """Perform aggressive memory cleanup and report before/after"""
+        if step_name:
+            print(f"\n🧹 CLEANUP: Before {step_name}")
+        
+        before_info = self._get_process_memory_info()
+        before_rss = before_info['total_process_gb']
+        
+        # Multiple rounds of garbage collection
+        for i in range(3):
+            gc.collect()
+        
+        # Clear torch cache if available
+        try:
+            import torch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+        except ImportError:
+            pass
+        
+        # Brief pause to allow cleanup
+        time.sleep(0.1)
+        
+        after_info = self._get_process_memory_info()
+        after_rss = after_info['total_process_gb']
+        freed_memory = before_rss - after_rss
+        
+        if step_name:
+            print(f"   Before: {before_rss:.2f} GB → After: {after_rss:.2f} GB")
+            print(f"   Freed: {freed_memory:.2f} GB")
+    
     def _initialize_models(self):
         """Initialize YOLO detector and SAM2 model"""
         print("Initializing models...")
diff --git a/vr180_matting/vr180_processor.py b/vr180_matting/vr180_processor.py
index 5d8e230..9c375ae 100644
--- a/vr180_matting/vr180_processor.py
+++ b/vr180_matting/vr180_processor.py
@@ -232,6 +232,9 @@ class VR180Processor(VideoProcessor):
             # Use the original video's framerate - access through parent class
             original_fps = self.fps if hasattr(self, 'fps') else 30.0
             print(f"Using framerate: {original_fps} fps")
+            
+            # Memory monitoring before ffmpeg
+            self._print_memory_step(f"Before ffmpeg encoding ({eye_name} eye)")
             # Try GPU encoding first, fallback to CPU
             gpu_cmd = [
                 'ffmpeg', '-y',  # -y to overwrite output file
@@ -283,6 +286,9 @@ class VR180Processor(VideoProcessor):
             
             print(f"Created temp video successfully")
             
+            # Memory monitoring after ffmpeg
+            self._print_memory_step(f"After ffmpeg encoding ({eye_name} eye)")
+            
             # Verify the file was created and has content
             if not temp_video_path.exists():
                 raise RuntimeError(f"Temporary video file was not created: {temp_video_path}")
@@ -293,16 +299,28 @@ class VR180Processor(VideoProcessor):
             
             print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)")
             
+            # Memory monitoring and cleanup before SAM2 initialization
+            num_frames = len(eye_frames)  # Store count before freeing
+            first_frame = eye_frames[0].copy()  # Copy first frame for detection before freeing
+            self._print_memory_step(f"Before SAM2 init ({eye_name} eye, {num_frames} frames)")
+            
+            # CRITICAL: Explicitly free eye_frames from memory before SAM2 loads the same video
+            # This prevents the OOM issue where both Python frames and SAM2 frames exist simultaneously
+            del eye_frames  # Free the frames array
+            self._aggressive_memory_cleanup(f"SAM2 init for {eye_name} eye")
+            
             # Initialize SAM2 with video path
+            self._print_memory_step(f"Starting SAM2 init ({eye_name} eye)")
             self.sam2_model.init_video_state(video_path=str(temp_video_path))
+            self._print_memory_step(f"SAM2 initialized ({eye_name} eye)")
             
             # Detect persons in first frame
-            first_frame = eye_frames[0]
             detections = self.detector.detect_persons(first_frame)
             
             if not detections:
                 warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}")
-                return self._create_empty_masks(eye_frames)
+                # Return empty masks for the number of frames
+                return self._create_empty_masks_from_count(num_frames, first_frame.shape)
             
             print(f"Detected {len(detections)} persons in {eye_name} eye first frame")
             
@@ -312,15 +330,33 @@ class VR180Processor(VideoProcessor):
             # Add prompts
             object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels)
             
-            # Propagate masks
+            # Propagate masks (most expensive operation)
+            self._print_memory_step(f"Before SAM2 propagation ({eye_name} eye, {num_frames} frames)")
             video_segments = self.sam2_model.propagate_masks(
                 start_frame=0, 
-                max_frames=len(eye_frames)
+                max_frames=num_frames
             )
+            self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)")
+            
+            # Apply masks - need to reload frames from temp video since we freed the original frames
+            self._print_memory_step(f"Before reloading frames for mask application ({eye_name} eye)")
+            
+            # Read frames back from the temp video for mask application
+            cap = cv2.VideoCapture(str(temp_video_path))
+            reloaded_frames = []
+            
+            for frame_idx in range(num_frames):
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                reloaded_frames.append(frame)
+            cap.release()
+            
+            self._print_memory_step(f"Reloaded {len(reloaded_frames)} frames for mask application")
             
             # Apply masks
             matted_frames = []
-            for frame_idx, frame in enumerate(eye_frames):
+            for frame_idx, frame in enumerate(reloaded_frames):
                 if frame_idx in video_segments:
                     frame_masks = video_segments[frame_idx]
                     combined_mask = self.sam2_model.get_combined_mask(frame_masks)
@@ -335,6 +371,10 @@ class VR180Processor(VideoProcessor):
                 
                 matted_frames.append(matted_frame)
             
+            # Free reloaded frames 
+            del reloaded_frames
+            self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)")
+            
             return matted_frames
             
         finally:
@@ -410,6 +450,20 @@ class VR180Processor(VideoProcessor):
         
         return validated_frames
     
+    def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]:
+        """Create empty masks when no persons detected (without frame array)"""
+        empty_frames = []
+        for _ in range(num_frames):
+            if self.config.output.format == "alpha":
+                # Transparent output
+                output = np.zeros((frame_shape[0], frame_shape[1], 4), dtype=np.uint8)
+            else:
+                # Green screen background
+                output = np.full((frame_shape[0], frame_shape[1], 3), 
+                               self.config.output.background_color, dtype=np.uint8)
+            empty_frames.append(output)
+        return empty_frames
+    
     def _get_mask_area(self, frame: np.ndarray) -> float:
         """Get mask area from processed frame"""
         if frame.shape[2] == 4:  # Alpha channel