more memory fixes hopeufly
This commit is contained in:
227
memory_profiler_script.py
Normal file
227
memory_profiler_script.py
Normal file
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Memory profiling script for VR180 matting pipeline
|
||||
Tracks memory usage during processing to identify leaks
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
import psutil
|
||||
import tracemalloc
|
||||
import subprocess
|
||||
import gc
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
import threading
|
||||
import json
|
||||
|
||||
class MemoryProfiler:
|
||||
def __init__(self, output_file: str = "memory_profile.json"):
|
||||
self.output_file = output_file
|
||||
self.data = []
|
||||
self.process = psutil.Process()
|
||||
self.running = False
|
||||
self.thread = None
|
||||
|
||||
def start_monitoring(self, interval: float = 1.0):
|
||||
"""Start continuous memory monitoring"""
|
||||
tracemalloc.start()
|
||||
self.running = True
|
||||
self.thread = threading.Thread(target=self._monitor_loop, args=(interval,))
|
||||
self.thread.daemon = True
|
||||
self.thread.start()
|
||||
print(f"🔍 Memory monitoring started (interval: {interval}s)")
|
||||
|
||||
def stop_monitoring(self):
|
||||
"""Stop monitoring and save results"""
|
||||
self.running = False
|
||||
if self.thread:
|
||||
self.thread.join()
|
||||
|
||||
# Get tracemalloc snapshot
|
||||
snapshot = tracemalloc.take_snapshot()
|
||||
top_stats = snapshot.statistics('lineno')
|
||||
|
||||
# Save detailed results
|
||||
results = {
|
||||
'timeline': self.data,
|
||||
'top_memory_allocations': [
|
||||
{
|
||||
'file': stat.traceback.format()[0],
|
||||
'size_mb': stat.size / 1024 / 1024,
|
||||
'count': stat.count
|
||||
}
|
||||
for stat in top_stats[:20] # Top 20 allocations
|
||||
],
|
||||
'summary': {
|
||||
'peak_rss_gb': max([d['rss_gb'] for d in self.data]) if self.data else 0,
|
||||
'peak_vram_gb': max([d['vram_gb'] for d in self.data]) if self.data else 0,
|
||||
'total_samples': len(self.data)
|
||||
}
|
||||
}
|
||||
|
||||
with open(self.output_file, 'w') as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
tracemalloc.stop()
|
||||
print(f"📊 Memory profile saved to {self.output_file}")
|
||||
|
||||
def _monitor_loop(self, interval: float):
|
||||
"""Continuous monitoring loop"""
|
||||
while self.running:
|
||||
try:
|
||||
# System memory
|
||||
memory_info = self.process.memory_info()
|
||||
rss_gb = memory_info.rss / (1024**3)
|
||||
|
||||
# System-wide memory
|
||||
sys_memory = psutil.virtual_memory()
|
||||
sys_used_gb = (sys_memory.total - sys_memory.available) / (1024**3)
|
||||
sys_available_gb = sys_memory.available / (1024**3)
|
||||
|
||||
# GPU memory (if available)
|
||||
vram_gb = 0
|
||||
vram_free_gb = 0
|
||||
try:
|
||||
result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.free',
|
||||
'--format=csv,noheader,nounits'],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
lines = result.stdout.strip().split('\n')
|
||||
if lines and lines[0]:
|
||||
used, free = lines[0].split(', ')
|
||||
vram_gb = float(used) / 1024
|
||||
vram_free_gb = float(free) / 1024
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Tracemalloc current usage
|
||||
try:
|
||||
current, peak = tracemalloc.get_traced_memory()
|
||||
traced_mb = current / (1024**2)
|
||||
except Exception:
|
||||
traced_mb = 0
|
||||
|
||||
data_point = {
|
||||
'timestamp': time.time(),
|
||||
'rss_gb': rss_gb,
|
||||
'vram_gb': vram_gb,
|
||||
'vram_free_gb': vram_free_gb,
|
||||
'sys_used_gb': sys_used_gb,
|
||||
'sys_available_gb': sys_available_gb,
|
||||
'traced_mb': traced_mb
|
||||
}
|
||||
|
||||
self.data.append(data_point)
|
||||
|
||||
# Print periodic updates
|
||||
if len(self.data) % 10 == 0: # Every 10 samples
|
||||
print(f"🔍 Memory: RSS={rss_gb:.2f}GB, VRAM={vram_gb:.2f}GB, Sys={sys_used_gb:.1f}GB")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Monitoring error: {e}")
|
||||
|
||||
time.sleep(interval)
|
||||
|
||||
def log_checkpoint(self, checkpoint_name: str):
|
||||
"""Log a specific checkpoint"""
|
||||
if self.data:
|
||||
self.data[-1]['checkpoint'] = checkpoint_name
|
||||
latest = self.data[-1]
|
||||
print(f"📍 CHECKPOINT [{checkpoint_name}]: RSS={latest['rss_gb']:.2f}GB, VRAM={latest['vram_gb']:.2f}GB")
|
||||
|
||||
def run_with_profiling(config_path: str):
|
||||
"""Run the VR180 matting with memory profiling"""
|
||||
profiler = MemoryProfiler("memory_profile_detailed.json")
|
||||
|
||||
try:
|
||||
# Start monitoring
|
||||
profiler.start_monitoring(interval=2.0) # Sample every 2 seconds
|
||||
|
||||
# Log initial state
|
||||
profiler.log_checkpoint("STARTUP")
|
||||
|
||||
# Import after starting profiler to catch import memory usage
|
||||
print("Importing VR180 processor...")
|
||||
from vr180_matting.vr180_processor import VR180Processor
|
||||
from vr180_matting.config import VR180Config
|
||||
|
||||
profiler.log_checkpoint("IMPORTS_COMPLETE")
|
||||
|
||||
# Load config
|
||||
print(f"Loading config from {config_path}")
|
||||
config = VR180Config.from_yaml(config_path)
|
||||
|
||||
profiler.log_checkpoint("CONFIG_LOADED")
|
||||
|
||||
# Initialize processor
|
||||
print("Initializing VR180 processor...")
|
||||
processor = VR180Processor(config)
|
||||
|
||||
profiler.log_checkpoint("PROCESSOR_INITIALIZED")
|
||||
|
||||
# Force garbage collection
|
||||
gc.collect()
|
||||
profiler.log_checkpoint("INITIAL_GC_COMPLETE")
|
||||
|
||||
# Run processing
|
||||
print("Starting VR180 processing...")
|
||||
processor.process_video()
|
||||
|
||||
profiler.log_checkpoint("PROCESSING_COMPLETE")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error during processing: {e}")
|
||||
profiler.log_checkpoint(f"ERROR: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
# Stop monitoring and save results
|
||||
profiler.stop_monitoring()
|
||||
|
||||
# Print summary
|
||||
print("\n" + "="*60)
|
||||
print("MEMORY PROFILING SUMMARY")
|
||||
print("="*60)
|
||||
|
||||
if profiler.data:
|
||||
peak_rss = max([d['rss_gb'] for d in profiler.data])
|
||||
peak_vram = max([d['vram_gb'] for d in profiler.data])
|
||||
|
||||
print(f"Peak RSS Memory: {peak_rss:.2f} GB")
|
||||
print(f"Peak VRAM Usage: {peak_vram:.2f} GB")
|
||||
print(f"Total Samples: {len(profiler.data)}")
|
||||
|
||||
# Show checkpoints
|
||||
checkpoints = [d for d in profiler.data if 'checkpoint' in d]
|
||||
if checkpoints:
|
||||
print(f"\nCheckpoints ({len(checkpoints)}):")
|
||||
for cp in checkpoints:
|
||||
print(f" {cp['checkpoint']}: RSS={cp['rss_gb']:.2f}GB, VRAM={cp['vram_gb']:.2f}GB")
|
||||
|
||||
print(f"\nDetailed profile saved to: {profiler.output_file}")
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python memory_profiler_script.py <config.yaml>")
|
||||
print("\nThis script runs VR180 matting with detailed memory profiling")
|
||||
print("It will:")
|
||||
print("- Monitor RSS, VRAM, and system memory every 2 seconds")
|
||||
print("- Track memory allocations with tracemalloc")
|
||||
print("- Log checkpoints at key processing stages")
|
||||
print("- Save detailed JSON report for analysis")
|
||||
sys.exit(1)
|
||||
|
||||
config_path = sys.argv[1]
|
||||
|
||||
if not Path(config_path).exists():
|
||||
print(f"❌ Config file not found: {config_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print("🚀 Starting VR180 Memory Profiling")
|
||||
print(f"Config: {config_path}")
|
||||
print("="*60)
|
||||
|
||||
run_with_profiling(config_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user