From ba8706b7ae3c40dd22c8bf311da0622b968ee7c4 Mon Sep 17 00:00:00 2001 From: Scott Register Date: Sat, 26 Jul 2025 14:52:44 -0700 Subject: [PATCH] quick check --- memory_profiler_script.py | 24 +++++++- quick_memory_check.py | 125 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 quick_memory_check.py diff --git a/memory_profiler_script.py b/memory_profiler_script.py index 80f0655..95ead78 100644 --- a/memory_profiler_script.py +++ b/memory_profiler_script.py @@ -22,6 +22,7 @@ class MemoryProfiler: self.process = psutil.Process() self.running = False self.thread = None + self.checkpoint_counter = 0 def start_monitoring(self, interval: float = 1.0): """Start continuous memory monitoring""" @@ -114,21 +115,42 @@ class MemoryProfiler: self.data.append(data_point) - # Print periodic updates + # Print periodic updates and save partial data if len(self.data) % 10 == 0: # Every 10 samples print(f"šŸ” Memory: RSS={rss_gb:.2f}GB, VRAM={vram_gb:.2f}GB, Sys={sys_used_gb:.1f}GB") + # Save partial data every 30 samples in case of crash + if len(self.data) % 30 == 0: + self._save_partial_data() + except Exception as e: print(f"Monitoring error: {e}") time.sleep(interval) + def _save_partial_data(self): + """Save partial data to prevent loss on crash""" + try: + partial_file = f"memory_profile_partial_{self.checkpoint_counter}.json" + with open(partial_file, 'w') as f: + json.dump({ + 'timeline': self.data, + 'status': 'partial_save', + 'samples': len(self.data) + }, f, indent=2) + self.checkpoint_counter += 1 + except Exception as e: + print(f"Failed to save partial data: {e}") + def log_checkpoint(self, checkpoint_name: str): """Log a specific checkpoint""" if self.data: self.data[-1]['checkpoint'] = checkpoint_name latest = self.data[-1] print(f"šŸ“ CHECKPOINT [{checkpoint_name}]: RSS={latest['rss_gb']:.2f}GB, VRAM={latest['vram_gb']:.2f}GB") + + # Save checkpoint data immediately + self._save_partial_data() def run_with_profiling(config_path: str): """Run the VR180 matting with memory profiling""" diff --git a/quick_memory_check.py b/quick_memory_check.py new file mode 100644 index 0000000..69d16ac --- /dev/null +++ b/quick_memory_check.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +Quick memory and system check before running full pipeline +""" + +import psutil +import subprocess +import sys +from pathlib import Path + +def check_system(): + """Check system resources before starting""" + print("šŸ” SYSTEM RESOURCE CHECK") + print("=" * 50) + + # Memory info + memory = psutil.virtual_memory() + print(f"šŸ“Š RAM:") + print(f" Total: {memory.total / (1024**3):.1f} GB") + print(f" Available: {memory.available / (1024**3):.1f} GB") + print(f" Used: {(memory.total - memory.available) / (1024**3):.1f} GB ({memory.percent:.1f}%)") + + # GPU info + try: + result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.used,memory.free', + '--format=csv,noheader,nounits'], + capture_output=True, text=True, timeout=10) + if result.returncode == 0: + lines = result.stdout.strip().split('\n') + print(f"\nšŸŽ® GPU:") + for i, line in enumerate(lines): + if line.strip(): + parts = line.split(', ') + if len(parts) >= 4: + name, total, used, free = parts[:4] + total_gb = float(total) / 1024 + used_gb = float(used) / 1024 + free_gb = float(free) / 1024 + print(f" GPU {i}: {name}") + print(f" VRAM: {used_gb:.1f}/{total_gb:.1f} GB ({used_gb/total_gb*100:.1f}% used)") + print(f" Free: {free_gb:.1f} GB") + except Exception as e: + print(f"\nāš ļø Could not get GPU info: {e}") + + # Disk space + disk = psutil.disk_usage('/') + print(f"\nšŸ’¾ Disk (/):") + print(f" Total: {disk.total / (1024**3):.1f} GB") + print(f" Used: {disk.used / (1024**3):.1f} GB ({disk.used/disk.total*100:.1f}%)") + print(f" Free: {disk.free / (1024**3):.1f} GB") + + # Check config file + if len(sys.argv) > 1: + config_path = sys.argv[1] + if Path(config_path).exists(): + print(f"\nāœ… Config file found: {config_path}") + + # Try to load and show key settings + try: + import yaml + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + + print(f"šŸ“‹ Key Settings:") + if 'processing' in config: + proc = config['processing'] + print(f" Chunk size: {proc.get('chunk_size', 'default')}") + print(f" Scale factor: {proc.get('scale_factor', 'default')}") + + if 'hardware' in config: + hw = config['hardware'] + print(f" Max VRAM: {hw.get('max_vram_gb', 'default')} GB") + + if 'input' in config: + inp = config['input'] + video_path = inp.get('video_path', '') + if video_path and Path(video_path).exists(): + size_gb = Path(video_path).stat().st_size / (1024**3) + print(f" Input video: {video_path} ({size_gb:.1f} GB)") + else: + print(f" āš ļø Input video not found: {video_path}") + + except Exception as e: + print(f" āš ļø Could not parse config: {e}") + else: + print(f"\nāŒ Config file not found: {config_path}") + return False + + # Memory safety warnings + print(f"\nāš ļø MEMORY SAFETY CHECKS:") + available_gb = memory.available / (1024**3) + + if available_gb < 10: + print(f" šŸ”“ LOW MEMORY: Only {available_gb:.1f}GB available") + print(" Consider: reducing chunk_size or scale_factor") + return False + elif available_gb < 20: + print(f" 🟔 MODERATE MEMORY: {available_gb:.1f}GB available") + print(" Recommend: chunk_size ≤ 300, scale_factor ≤ 0.5") + else: + print(f" 🟢 GOOD MEMORY: {available_gb:.1f}GB available") + + print(f"\n" + "=" * 50) + return True + +def main(): + if len(sys.argv) != 2: + print("Usage: python quick_memory_check.py ") + print("\nThis checks system resources before running VR180 matting") + sys.exit(1) + + safe_to_run = check_system() + + if safe_to_run: + print("āœ… System check passed - safe to run VR180 matting") + print("\nTo run with memory profiling:") + print(f" python memory_profiler_script.py {sys.argv[1]}") + print("\nTo run normally:") + print(f" vr180-matting {sys.argv[1]}") + else: + print("āŒ System check failed - address issues before running") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file