first commit

2025-07-26 07:23:50 -07:00
commit cc77989365
15 changed files with 2429 additions and 0 deletions
--- a/vr180_matting/memory_manager.py
+++ b/vr180_matting/memory_manager.py
@@ -0,0 +1,241 @@
+import torch
+import psutil
+import gc
+import warnings
+from typing import Optional, Dict, Any
+from contextlib import contextmanager
+import time
+
+
+class VRAMManager:
+    """VRAM and memory optimization manager"""
+    
+    def __init__(self, max_vram_gb: float = 10.0, device: str = "cuda"):
+        self.max_vram_gb = max_vram_gb
+        self.device = device
+        self.max_vram_bytes = max_vram_gb * 1024**3
+        
+        # Memory tracking
+        self.memory_stats = {
+            'peak_allocated': 0,
+            'peak_reserved': 0,
+            'allocations': 0,
+            'deallocations': 0
+        }
+        
+        self._check_device()
+    
+    def _check_device(self):
+        """Check if CUDA is available and get device info"""
+        if self.device == "cuda":
+            if not torch.cuda.is_available():
+                warnings.warn("CUDA not available, falling back to CPU")
+                self.device = "cpu"
+                return
+            
+            device_props = torch.cuda.get_device_properties(0)
+            total_memory = device_props.total_memory
+            
+            print(f"GPU: {device_props.name}")
+            print(f"Total VRAM: {total_memory / 1024**3:.1f} GB")
+            print(f"Max VRAM limit: {self.max_vram_gb:.1f} GB")
+            
+            if self.max_vram_bytes > total_memory * 0.9:
+                warnings.warn(f"Max VRAM limit ({self.max_vram_gb:.1f} GB) is close to total VRAM")
+    
+    def get_memory_usage(self) -> Dict[str, float]:
+        """Get current memory usage statistics"""
+        stats = {}
+        
+        if self.device == "cuda" and torch.cuda.is_available():
+            stats['vram_allocated'] = torch.cuda.memory_allocated() / 1024**3
+            stats['vram_reserved'] = torch.cuda.memory_reserved() / 1024**3
+            stats['vram_free'] = (torch.cuda.get_device_properties(0).total_memory - 
+                                 torch.cuda.memory_reserved()) / 1024**3
+        else:
+            stats['vram_allocated'] = 0
+            stats['vram_reserved'] = 0
+            stats['vram_free'] = 0
+        
+        # System RAM
+        ram_info = psutil.virtual_memory()
+        stats['ram_used'] = ram_info.used / 1024**3
+        stats['ram_available'] = ram_info.available / 1024**3
+        stats['ram_percent'] = ram_info.percent
+        
+        return stats
+    
+    def check_memory_available(self, required_gb: float) -> bool:
+        """Check if enough memory is available for operation"""
+        stats = self.get_memory_usage()
+        
+        if self.device == "cuda":
+            return stats['vram_free'] >= required_gb
+        else:
+            return stats['ram_available'] >= required_gb
+    
+    def cleanup_memory(self, aggressive: bool = False):
+        """Clean up memory"""
+        if self.device == "cuda" and torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            
+            if aggressive:
+                torch.cuda.ipc_collect()
+                torch.cuda.synchronize()
+        
+        # Python garbage collection
+        gc.collect()
+        
+        if aggressive:
+            # Force garbage collection multiple times
+            for _ in range(3):
+                gc.collect()
+    
+    def estimate_processing_memory(self, 
+                                  frame_height: int, 
+                                  frame_width: int, 
+                                  num_frames: int,
+                                  fp16: bool = True) -> float:
+        """
+        Estimate memory requirements for processing
+        
+        Args:
+            frame_height: Frame height in pixels
+            frame_width: Frame width in pixels  
+            num_frames: Number of frames to process
+            fp16: Whether using FP16 precision
+            
+        Returns:
+            Estimated memory usage in GB
+        """
+        bytes_per_pixel = 2 if fp16 else 4  # FP16 vs FP32
+        
+        # Estimate memory components
+        frame_memory = frame_height * frame_width * 3 * bytes_per_pixel * num_frames
+        model_memory = 2.0 * 1024**3  # ~2GB for SAM2 model
+        yolo_memory = 0.5 * 1024**3   # ~0.5GB for YOLO
+        working_memory = frame_memory * 2  # Working space for masks, etc.
+        
+        total_memory = frame_memory + model_memory + yolo_memory + working_memory
+        
+        return total_memory / 1024**3
+    
+    def get_optimal_chunk_size(self, 
+                              frame_height: int, 
+                              frame_width: int,
+                              target_memory_gb: Optional[float] = None,
+                              fp16: bool = True) -> int:
+        """
+        Calculate optimal chunk size for processing
+        
+        Args:
+            frame_height: Frame height in pixels
+            frame_width: Frame width in pixels
+            target_memory_gb: Target memory usage (defaults to 80% of max VRAM)
+            fp16: Whether using FP16 precision
+            
+        Returns:
+            Optimal number of frames per chunk
+        """
+        if target_memory_gb is None:
+            target_memory_gb = self.max_vram_gb * 0.8
+        
+        # Binary search for optimal chunk size
+        min_frames = 1
+        max_frames = 1000
+        optimal_frames = min_frames
+        
+        while min_frames <= max_frames:
+            mid_frames = (min_frames + max_frames) // 2
+            estimated_memory = self.estimate_processing_memory(
+                frame_height, frame_width, mid_frames, fp16
+            )
+            
+            if estimated_memory <= target_memory_gb:
+                optimal_frames = mid_frames
+                min_frames = mid_frames + 1
+            else:
+                max_frames = mid_frames - 1
+        
+        return max(optimal_frames, 1)
+    
+    @contextmanager
+    def memory_monitor(self, operation_name: str = "operation"):
+        """Context manager for monitoring memory usage during operations"""
+        start_stats = self.get_memory_usage()
+        start_time = time.time()
+        
+        print(f"Starting {operation_name}")
+        print(f"Initial VRAM: {start_stats['vram_allocated']:.2f} GB allocated, "
+              f"{start_stats['vram_free']:.2f} GB free")
+        
+        try:
+            yield self
+        finally:
+            end_stats = self.get_memory_usage()
+            end_time = time.time()
+            
+            vram_diff = end_stats['vram_allocated'] - start_stats['vram_allocated']
+            duration = end_time - start_time
+            
+            print(f"Completed {operation_name} in {duration:.1f}s")
+            print(f"Final VRAM: {end_stats['vram_allocated']:.2f} GB allocated, "
+                  f"{end_stats['vram_free']:.2f} GB free")
+            print(f"VRAM change: {vram_diff:+.2f} GB")
+            
+            # Update peak stats
+            self.memory_stats['peak_allocated'] = max(
+                self.memory_stats['peak_allocated'], 
+                end_stats['vram_allocated']
+            )
+            self.memory_stats['peak_reserved'] = max(
+                self.memory_stats['peak_reserved'], 
+                end_stats['vram_reserved']
+            )
+    
+    def print_memory_report(self):
+        """Print detailed memory usage report"""
+        stats = self.get_memory_usage()
+        
+        print("\n" + "="*50)
+        print("MEMORY USAGE REPORT")
+        print("="*50)
+        
+        if self.device == "cuda":
+            print(f"VRAM Allocated: {stats['vram_allocated']:.2f} GB")
+            print(f"VRAM Reserved:  {stats['vram_reserved']:.2f} GB")
+            print(f"VRAM Free:      {stats['vram_free']:.2f} GB")
+            print(f"Peak Allocated: {self.memory_stats['peak_allocated']:.2f} GB")
+            print(f"Peak Reserved:  {self.memory_stats['peak_reserved']:.2f} GB")
+            print(f"Max VRAM Limit: {self.max_vram_gb:.2f} GB")
+            
+            utilization = (stats['vram_allocated'] / self.max_vram_gb) * 100
+            print(f"VRAM Utilization: {utilization:.1f}%")
+        
+        print(f"\nSystem RAM Used: {stats['ram_used']:.2f} GB")
+        print(f"System RAM Available: {stats['ram_available']:.2f} GB")
+        print(f"System RAM Usage: {stats['ram_percent']:.1f}%")
+        print("="*50 + "\n")
+    
+    def emergency_cleanup(self):
+        """Emergency memory cleanup when running low"""
+        print("WARNING: Running low on memory, performing emergency cleanup...")
+        
+        self.cleanup_memory(aggressive=True)
+        
+        # Additional cleanup steps
+        if self.device == "cuda" and torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats()
+        
+        stats = self.get_memory_usage()
+        print(f"After cleanup - VRAM: {stats['vram_allocated']:.2f} GB, "
+              f"Free: {stats['vram_free']:.2f} GB")
+    
+    def should_emergency_cleanup(self) -> bool:
+        """Check if emergency cleanup is needed"""
+        stats = self.get_memory_usage()
+        
+        if self.device == "cuda":
+            return stats['vram_free'] < 1.0  # Less than 1GB free
+        else:
+            return stats['ram_available'] < 2.0  # Less than 2GB RAM available