quick check

This commit is contained in:
2025-07-26 14:52:44 -07:00
parent 734445cf48
commit ba8706b7ae
2 changed files with 148 additions and 1 deletions

View File

@@ -22,6 +22,7 @@ class MemoryProfiler:
self.process = psutil.Process()
self.running = False
self.thread = None
self.checkpoint_counter = 0
def start_monitoring(self, interval: float = 1.0):
"""Start continuous memory monitoring"""
@@ -114,21 +115,42 @@ class MemoryProfiler:
self.data.append(data_point)
# Print periodic updates
# Print periodic updates and save partial data
if len(self.data) % 10 == 0: # Every 10 samples
print(f"🔍 Memory: RSS={rss_gb:.2f}GB, VRAM={vram_gb:.2f}GB, Sys={sys_used_gb:.1f}GB")
# Save partial data every 30 samples in case of crash
if len(self.data) % 30 == 0:
self._save_partial_data()
except Exception as e:
print(f"Monitoring error: {e}")
time.sleep(interval)
def _save_partial_data(self):
"""Save partial data to prevent loss on crash"""
try:
partial_file = f"memory_profile_partial_{self.checkpoint_counter}.json"
with open(partial_file, 'w') as f:
json.dump({
'timeline': self.data,
'status': 'partial_save',
'samples': len(self.data)
}, f, indent=2)
self.checkpoint_counter += 1
except Exception as e:
print(f"Failed to save partial data: {e}")
def log_checkpoint(self, checkpoint_name: str):
"""Log a specific checkpoint"""
if self.data:
self.data[-1]['checkpoint'] = checkpoint_name
latest = self.data[-1]
print(f"📍 CHECKPOINT [{checkpoint_name}]: RSS={latest['rss_gb']:.2f}GB, VRAM={latest['vram_gb']:.2f}GB")
# Save checkpoint data immediately
self._save_partial_data()
def run_with_profiling(config_path: str):
"""Run the VR180 matting with memory profiling"""

125
quick_memory_check.py Normal file
View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""
Quick memory and system check before running full pipeline
"""
import psutil
import subprocess
import sys
from pathlib import Path
def check_system():
"""Check system resources before starting"""
print("🔍 SYSTEM RESOURCE CHECK")
print("=" * 50)
# Memory info
memory = psutil.virtual_memory()
print(f"📊 RAM:")
print(f" Total: {memory.total / (1024**3):.1f} GB")
print(f" Available: {memory.available / (1024**3):.1f} GB")
print(f" Used: {(memory.total - memory.available) / (1024**3):.1f} GB ({memory.percent:.1f}%)")
# GPU info
try:
result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.used,memory.free',
'--format=csv,noheader,nounits'],
capture_output=True, text=True, timeout=10)
if result.returncode == 0:
lines = result.stdout.strip().split('\n')
print(f"\n🎮 GPU:")
for i, line in enumerate(lines):
if line.strip():
parts = line.split(', ')
if len(parts) >= 4:
name, total, used, free = parts[:4]
total_gb = float(total) / 1024
used_gb = float(used) / 1024
free_gb = float(free) / 1024
print(f" GPU {i}: {name}")
print(f" VRAM: {used_gb:.1f}/{total_gb:.1f} GB ({used_gb/total_gb*100:.1f}% used)")
print(f" Free: {free_gb:.1f} GB")
except Exception as e:
print(f"\n⚠️ Could not get GPU info: {e}")
# Disk space
disk = psutil.disk_usage('/')
print(f"\n💾 Disk (/):")
print(f" Total: {disk.total / (1024**3):.1f} GB")
print(f" Used: {disk.used / (1024**3):.1f} GB ({disk.used/disk.total*100:.1f}%)")
print(f" Free: {disk.free / (1024**3):.1f} GB")
# Check config file
if len(sys.argv) > 1:
config_path = sys.argv[1]
if Path(config_path).exists():
print(f"\n✅ Config file found: {config_path}")
# Try to load and show key settings
try:
import yaml
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
print(f"📋 Key Settings:")
if 'processing' in config:
proc = config['processing']
print(f" Chunk size: {proc.get('chunk_size', 'default')}")
print(f" Scale factor: {proc.get('scale_factor', 'default')}")
if 'hardware' in config:
hw = config['hardware']
print(f" Max VRAM: {hw.get('max_vram_gb', 'default')} GB")
if 'input' in config:
inp = config['input']
video_path = inp.get('video_path', '')
if video_path and Path(video_path).exists():
size_gb = Path(video_path).stat().st_size / (1024**3)
print(f" Input video: {video_path} ({size_gb:.1f} GB)")
else:
print(f" ⚠️ Input video not found: {video_path}")
except Exception as e:
print(f" ⚠️ Could not parse config: {e}")
else:
print(f"\n❌ Config file not found: {config_path}")
return False
# Memory safety warnings
print(f"\n⚠️ MEMORY SAFETY CHECKS:")
available_gb = memory.available / (1024**3)
if available_gb < 10:
print(f" 🔴 LOW MEMORY: Only {available_gb:.1f}GB available")
print(" Consider: reducing chunk_size or scale_factor")
return False
elif available_gb < 20:
print(f" 🟡 MODERATE MEMORY: {available_gb:.1f}GB available")
print(" Recommend: chunk_size ≤ 300, scale_factor ≤ 0.5")
else:
print(f" 🟢 GOOD MEMORY: {available_gb:.1f}GB available")
print(f"\n" + "=" * 50)
return True
def main():
if len(sys.argv) != 2:
print("Usage: python quick_memory_check.py <config.yaml>")
print("\nThis checks system resources before running VR180 matting")
sys.exit(1)
safe_to_run = check_system()
if safe_to_run:
print("✅ System check passed - safe to run VR180 matting")
print("\nTo run with memory profiling:")
print(f" python memory_profiler_script.py {sys.argv[1]}")
print("\nTo run normally:")
print(f" vr180-matting {sys.argv[1]}")
else:
print("❌ System check failed - address issues before running")
sys.exit(1)
if __name__ == "__main__":
main()