foo

2025-07-26 15:18:32 -07:00
parent fb51e82fd4
commit 36f58acb8b
1 changed files with 151 additions and 0 deletions
--- a/debug_memory_leak.py
+++ b/debug_memory_leak.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+"""
+Debug memory leak between chunks - track exactly where memory accumulates
+"""
+
+import psutil
+import gc
+from pathlib import Path
+import sys
+
+def detailed_memory_check(label):
+    """Get detailed memory info"""
+    process = psutil.Process()
+    memory_info = process.memory_info()
+    
+    rss_gb = memory_info.rss / (1024**3)
+    vms_gb = memory_info.vms / (1024**3)
+    
+    # System memory
+    sys_memory = psutil.virtual_memory()
+    available_gb = sys_memory.available / (1024**3)
+    
+    print(f"🔍 {label}:")
+    print(f"   RSS: {rss_gb:.2f} GB (physical memory)")
+    print(f"   VMS: {vms_gb:.2f} GB (virtual memory)")
+    print(f"   Available: {available_gb:.2f} GB")
+    
+    return rss_gb
+
+def simulate_chunk_processing():
+    """Simulate the chunk processing to see where memory accumulates"""
+    
+    print("🚀 SIMULATING CHUNK PROCESSING TO FIND MEMORY LEAK")
+    print("=" * 60)
+    
+    base_memory = detailed_memory_check("0. Baseline")
+    
+    # Step 1: Import everything (with lazy loading)
+    print("\n📦 Step 1: Imports")
+    from vr180_matting.config import VR180Config
+    from vr180_matting.vr180_processor import VR180Processor
+    
+    import_memory = detailed_memory_check("1. After imports")
+    import_growth = import_memory - base_memory
+    print(f"   Growth: +{import_growth:.2f} GB")
+    
+    # Step 2: Load config
+    print("\n⚙️  Step 2: Config loading")
+    config = VR180Config.from_yaml('config.yaml')
+    config_memory = detailed_memory_check("2. After config load")
+    config_growth = config_memory - import_memory
+    print(f"   Growth: +{config_growth:.2f} GB")
+    
+    # Step 3: Initialize processor (models still lazy)
+    print("\n🏗️  Step 3: Processor initialization")
+    processor = VR180Processor(config)
+    processor_memory = detailed_memory_check("3. After processor init")
+    processor_growth = processor_memory - config_memory
+    print(f"   Growth: +{processor_growth:.2f} GB")
+    
+    # Step 4: Load video info (lightweight)
+    print("\n🎬 Step 4: Video info loading")
+    try:
+        video_info = processor.load_video_info(config.input.video_path)
+        print(f"   Video: {video_info.get('width', 'unknown')}x{video_info.get('height', 'unknown')}, "
+              f"{video_info.get('total_frames', 'unknown')} frames")
+    except Exception as e:
+        print(f"   Warning: Could not load video info: {e}")
+    
+    video_info_memory = detailed_memory_check("4. After video info")
+    video_info_growth = video_info_memory - processor_memory
+    print(f"   Growth: +{video_info_growth:.2f} GB")
+    
+    # Step 5: Simulate chunk 0 processing (this is where models actually load)
+    print("\n🔄 Step 5: Simulating chunk 0 processing...")
+    
+    # This is where the real memory usage starts
+    print("   Loading first 10 frames to trigger model loading...")
+    try:
+        # Read a small number of frames to trigger model loading
+        frames = processor.read_video_frames(
+            config.input.video_path,
+            start_frame=0,
+            num_frames=10,  # Just 10 frames to trigger model loading
+            scale_factor=config.processing.scale_factor
+        )
+        
+        frames_memory = detailed_memory_check("5a. After reading 10 frames")
+        frames_growth = frames_memory - video_info_memory
+        print(f"   10 frames growth: +{frames_growth:.2f} GB")
+        
+        # Free frames
+        del frames
+        gc.collect()
+        
+        after_free_memory = detailed_memory_check("5b. After freeing 10 frames")
+        free_improvement = frames_memory - after_free_memory
+        print(f"   Memory freed: -{free_improvement:.2f} GB")
+        
+    except Exception as e:
+        print(f"   Could not simulate frame loading: {e}")
+        after_free_memory = video_info_memory
+    
+    print(f"\n📊 MEMORY ANALYSIS:")
+    print(f"   Baseline → Final: {base_memory:.2f}GB → {after_free_memory:.2f}GB")
+    print(f"   Total growth: +{after_free_memory - base_memory:.2f}GB")
+    
+    if after_free_memory - base_memory > 10:
+        print(f"   🔴 HIGH: Memory growth > 10GB before any real processing")
+        print(f"   💡 This suggests model loading is using too much memory")
+    elif after_free_memory - base_memory > 5:
+        print(f"   🟡 MODERATE: Memory growth 5-10GB")
+        print(f"   💡 Normal for model loading, but monitor chunk processing")
+    else:
+        print(f"   🟢 GOOD: Memory growth < 5GB")
+        print(f"   💡 Initialization memory usage is reasonable")
+    
+    print(f"\n🎯 KEY INSIGHTS:")
+    if import_growth > 1:
+        print(f"   - Import growth: {import_growth:.2f}GB (fixed with lazy loading)")
+    if processor_growth > 10:
+        print(f"   - Processor init: {processor_growth:.2f}GB (investigate model pre-loading)")
+    
+    print(f"\n💡 RECOMMENDATIONS:")
+    if after_free_memory - base_memory > 15:
+        print(f"   1. Reduce chunk_size to 200-300 frames")
+        print(f"   2. Use smaller models (yolov8n instead of yolov8m)")
+        print(f"   3. Enable FP16 mode for SAM2")
+    elif after_free_memory - base_memory > 8:
+        print(f"   1. Monitor chunk processing carefully")
+        print(f"   2. Use streaming merge (should be automatic)")
+        print(f"   3. Current settings may be acceptable")
+    else:
+        print(f"   1. Settings look good for initialization")
+        print(f"   2. Focus on chunk processing memory leaks")
+
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python debug_memory_leak.py <config.yaml>")
+        print("This simulates initialization to find memory leaks")
+        sys.exit(1)
+    
+    config_path = sys.argv[1]
+    if not Path(config_path).exists():
+        print(f"Config file not found: {config_path}")
+        sys.exit(1)
+    
+    simulate_chunk_processing()
+
+if __name__ == "__main__":
+    main()