test2/memory_profiler_script.py

#!/usr/bin/env python3
"""
Memory profiling script for VR180 matting pipeline
Tracks memory usage during processing to identify leaks
"""

import sys
import time
import psutil
import tracemalloc
import subprocess
import gc
from pathlib import Path
from typing import Dict, List, Tuple
import threading
import json

class MemoryProfiler:
    def __init__(self, output_file: str = "memory_profile.json"):
        self.output_file = output_file
        self.data = []
        self.process = psutil.Process()
        self.running = False
        self.thread = None
        self.checkpoint_counter = 0

    def start_monitoring(self, interval: float = 1.0):
        """Start continuous memory monitoring"""
        tracemalloc.start()
        self.running = True
        self.thread = threading.Thread(target=self._monitor_loop, args=(interval,))
        self.thread.daemon = True
        self.thread.start()
        print(f"🔍 Memory monitoring started (interval: {interval}s)")

    def stop_monitoring(self):
        """Stop monitoring and save results"""
        self.running = False
        if self.thread:
            self.thread.join()

        # Get tracemalloc snapshot
        snapshot = tracemalloc.take_snapshot()
        top_stats = snapshot.statistics('lineno')

        # Save detailed results
        results = {
            'timeline': self.data,
            'top_memory_allocations': [
                {
                    'file': stat.traceback.format()[0],
                    'size_mb': stat.size / 1024 / 1024,
                    'count': stat.count
                }
                for stat in top_stats[:20]  # Top 20 allocations
            ],
            'summary': {
                'peak_rss_gb': max([d['rss_gb'] for d in self.data]) if self.data else 0,
                'peak_vram_gb': max([d['vram_gb'] for d in self.data]) if self.data else 0,
                'total_samples': len(self.data)
            }
        }

        with open(self.output_file, 'w') as f:
            json.dump(results, f, indent=2)

        tracemalloc.stop()
        print(f"📊 Memory profile saved to {self.output_file}")

    def _monitor_loop(self, interval: float):
        """Continuous monitoring loop"""
        while self.running:
            try:
                # System memory
                memory_info = self.process.memory_info()
                rss_gb = memory_info.rss / (1024**3)

                # System-wide memory
                sys_memory = psutil.virtual_memory()
                sys_used_gb = (sys_memory.total - sys_memory.available) / (1024**3)
                sys_available_gb = sys_memory.available / (1024**3)

                # GPU memory (if available)
                vram_gb = 0
                vram_free_gb = 0
                try:
                    result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.free',
                                           '--format=csv,noheader,nounits'],
                                          capture_output=True, text=True, timeout=5)
                    if result.returncode == 0:
                        lines = result.stdout.strip().split('\n')
                        if lines and lines[0]:
                            used, free = lines[0].split(', ')
                            vram_gb = float(used) / 1024
                            vram_free_gb = float(free) / 1024
                except Exception:
                    pass

                # Tracemalloc current usage
                try:
                    current, peak = tracemalloc.get_traced_memory()
                    traced_mb = current / (1024**2)
                except Exception:
                    traced_mb = 0

                data_point = {
                    'timestamp': time.time(),
                    'rss_gb': rss_gb,
                    'vram_gb': vram_gb,
                    'vram_free_gb': vram_free_gb,
                    'sys_used_gb': sys_used_gb,
                    'sys_available_gb': sys_available_gb,
                    'traced_mb': traced_mb
                }

                self.data.append(data_point)

                # Print periodic updates and save partial data
                if len(self.data) % 10 == 0:  # Every 10 samples
                    print(f"🔍 Memory: RSS={rss_gb:.2f}GB, VRAM={vram_gb:.2f}GB, Sys={sys_used_gb:.1f}GB")

                    # Save partial data every 30 samples in case of crash
                    if len(self.data) % 30 == 0:
                        self._save_partial_data()

            except Exception as e:
                print(f"Monitoring error: {e}")

            time.sleep(interval)

    def _save_partial_data(self):
        """Save partial data to prevent loss on crash"""
        try:
            partial_file = f"memory_profile_partial_{self.checkpoint_counter}.json"
            with open(partial_file, 'w') as f:
                json.dump({
                    'timeline': self.data,
                    'status': 'partial_save',
                    'samples': len(self.data)
                }, f, indent=2)
            self.checkpoint_counter += 1
        except Exception as e:
            print(f"Failed to save partial data: {e}")

    def log_checkpoint(self, checkpoint_name: str):
        """Log a specific checkpoint"""
        if self.data:
            self.data[-1]['checkpoint'] = checkpoint_name
            latest = self.data[-1]
            print(f"📍 CHECKPOINT [{checkpoint_name}]: RSS={latest['rss_gb']:.2f}GB, VRAM={latest['vram_gb']:.2f}GB")

            # Save checkpoint data immediately
            self._save_partial_data()

def run_with_profiling(config_path: str):
    """Run the VR180 matting with memory profiling"""
    profiler = MemoryProfiler("memory_profile_detailed.json")

    try:
        # Start monitoring
        profiler.start_monitoring(interval=2.0)  # Sample every 2 seconds

        # Log initial state
        profiler.log_checkpoint("STARTUP")

        # Import after starting profiler to catch import memory usage
        print("Importing VR180 processor...")
        from vr180_matting.vr180_processor import VR180Processor
        from vr180_matting.config import VR180Config

        profiler.log_checkpoint("IMPORTS_COMPLETE")

        # Load config
        print(f"Loading config from {config_path}")
        config = VR180Config.from_yaml(config_path)

        profiler.log_checkpoint("CONFIG_LOADED")

        # Initialize processor
        print("Initializing VR180 processor...")
        processor = VR180Processor(config)

        profiler.log_checkpoint("PROCESSOR_INITIALIZED")

        # Force garbage collection
        gc.collect()
        profiler.log_checkpoint("INITIAL_GC_COMPLETE")

        # Run processing
        print("Starting VR180 processing...")
        processor.process_video()

        profiler.log_checkpoint("PROCESSING_COMPLETE")

    except Exception as e:
        print(f"❌ Error during processing: {e}")
        profiler.log_checkpoint(f"ERROR: {str(e)}")
        raise
    finally:
        # Stop monitoring and save results
        profiler.stop_monitoring()

        # Print summary
        print("\n" + "="*60)
        print("MEMORY PROFILING SUMMARY")
        print("="*60)

        if profiler.data:
            peak_rss = max([d['rss_gb'] for d in profiler.data])
            peak_vram = max([d['vram_gb'] for d in profiler.data])

            print(f"Peak RSS Memory: {peak_rss:.2f} GB")
            print(f"Peak VRAM Usage: {peak_vram:.2f} GB")
            print(f"Total Samples: {len(profiler.data)}")

            # Show checkpoints
            checkpoints = [d for d in profiler.data if 'checkpoint' in d]
            if checkpoints:
                print(f"\nCheckpoints ({len(checkpoints)}):")
                for cp in checkpoints:
                    print(f"  {cp['checkpoint']}: RSS={cp['rss_gb']:.2f}GB, VRAM={cp['vram_gb']:.2f}GB")

        print(f"\nDetailed profile saved to: {profiler.output_file}")

def main():
    if len(sys.argv) != 2:
        print("Usage: python memory_profiler_script.py <config.yaml>")
        print("\nThis script runs VR180 matting with detailed memory profiling")
        print("It will:")
        print("- Monitor RSS, VRAM, and system memory every 2 seconds")
        print("- Track memory allocations with tracemalloc")
        print("- Log checkpoints at key processing stages")
        print("- Save detailed JSON report for analysis")
        sys.exit(1)

    config_path = sys.argv[1]

    if not Path(config_path).exists():
        print(f"❌ Config file not found: {config_path}")
        sys.exit(1)

    print("🚀 Starting VR180 Memory Profiling")
    print(f"Config: {config_path}")
    print("="*60)

    run_with_profiling(config_path)

if __name__ == "__main__":
    main()