From ba8706b7ae3c40dd22c8bf311da0622b968ee7c4 Mon Sep 17 00:00:00 2001
From: Scott Register <sreg@sreg.io>
Date: Sat, 26 Jul 2025 14:52:44 -0700
Subject: [PATCH] quick check

---
 memory_profiler_script.py |  24 +++++++-
 quick_memory_check.py     | 125 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 quick_memory_check.py

diff --git a/memory_profiler_script.py b/memory_profiler_script.py
index 80f0655..95ead78 100644
--- a/memory_profiler_script.py
+++ b/memory_profiler_script.py
@@ -22,6 +22,7 @@ class MemoryProfiler:
         self.process = psutil.Process()
         self.running = False
         self.thread = None
+        self.checkpoint_counter = 0
         
     def start_monitoring(self, interval: float = 1.0):
         """Start continuous memory monitoring"""
@@ -114,21 +115,42 @@ class MemoryProfiler:
                 
                 self.data.append(data_point)
                 
-                # Print periodic updates
+                # Print periodic updates and save partial data
                 if len(self.data) % 10 == 0:  # Every 10 samples
                     print(f"🔍 Memory: RSS={rss_gb:.2f}GB, VRAM={vram_gb:.2f}GB, Sys={sys_used_gb:.1f}GB")
                     
+                    # Save partial data every 30 samples in case of crash
+                    if len(self.data) % 30 == 0:
+                        self._save_partial_data()
+                    
             except Exception as e:
                 print(f"Monitoring error: {e}")
                 
             time.sleep(interval)
     
+    def _save_partial_data(self):
+        """Save partial data to prevent loss on crash"""
+        try:
+            partial_file = f"memory_profile_partial_{self.checkpoint_counter}.json"
+            with open(partial_file, 'w') as f:
+                json.dump({
+                    'timeline': self.data,
+                    'status': 'partial_save',
+                    'samples': len(self.data)
+                }, f, indent=2)
+            self.checkpoint_counter += 1
+        except Exception as e:
+            print(f"Failed to save partial data: {e}")
+    
     def log_checkpoint(self, checkpoint_name: str):
         """Log a specific checkpoint"""
         if self.data:
             self.data[-1]['checkpoint'] = checkpoint_name
             latest = self.data[-1]
             print(f"📍 CHECKPOINT [{checkpoint_name}]: RSS={latest['rss_gb']:.2f}GB, VRAM={latest['vram_gb']:.2f}GB")
+            
+            # Save checkpoint data immediately
+            self._save_partial_data()
 
 def run_with_profiling(config_path: str):
     """Run the VR180 matting with memory profiling"""
diff --git a/quick_memory_check.py b/quick_memory_check.py
new file mode 100644
index 0000000..69d16ac
--- /dev/null
+++ b/quick_memory_check.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""
+Quick memory and system check before running full pipeline
+"""
+
+import psutil
+import subprocess
+import sys
+from pathlib import Path
+
+def check_system():
+    """Check system resources before starting"""
+    print("🔍 SYSTEM RESOURCE CHECK")
+    print("=" * 50)
+    
+    # Memory info
+    memory = psutil.virtual_memory()
+    print(f"📊 RAM:")
+    print(f"   Total: {memory.total / (1024**3):.1f} GB")
+    print(f"   Available: {memory.available / (1024**3):.1f} GB")
+    print(f"   Used: {(memory.total - memory.available) / (1024**3):.1f} GB ({memory.percent:.1f}%)")
+    
+    # GPU info
+    try:
+        result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.used,memory.free', 
+                               '--format=csv,noheader,nounits'], 
+                              capture_output=True, text=True, timeout=10)
+        if result.returncode == 0:
+            lines = result.stdout.strip().split('\n')
+            print(f"\n🎮 GPU:")
+            for i, line in enumerate(lines):
+                if line.strip():
+                    parts = line.split(', ')
+                    if len(parts) >= 4:
+                        name, total, used, free = parts[:4]
+                        total_gb = float(total) / 1024
+                        used_gb = float(used) / 1024
+                        free_gb = float(free) / 1024
+                        print(f"   GPU {i}: {name}")
+                        print(f"   VRAM: {used_gb:.1f}/{total_gb:.1f} GB ({used_gb/total_gb*100:.1f}% used)")
+                        print(f"   Free: {free_gb:.1f} GB")
+    except Exception as e:
+        print(f"\n⚠️  Could not get GPU info: {e}")
+    
+    # Disk space
+    disk = psutil.disk_usage('/')
+    print(f"\n💾 Disk (/):")
+    print(f"   Total: {disk.total / (1024**3):.1f} GB")
+    print(f"   Used: {disk.used / (1024**3):.1f} GB ({disk.used/disk.total*100:.1f}%)")
+    print(f"   Free: {disk.free / (1024**3):.1f} GB")
+    
+    # Check config file
+    if len(sys.argv) > 1:
+        config_path = sys.argv[1]
+        if Path(config_path).exists():
+            print(f"\n✅ Config file found: {config_path}")
+            
+            # Try to load and show key settings
+            try:
+                import yaml
+                with open(config_path, 'r') as f:
+                    config = yaml.safe_load(f)
+                
+                print(f"📋 Key Settings:")
+                if 'processing' in config:
+                    proc = config['processing']
+                    print(f"   Chunk size: {proc.get('chunk_size', 'default')}")
+                    print(f"   Scale factor: {proc.get('scale_factor', 'default')}")
+                
+                if 'hardware' in config:
+                    hw = config['hardware']
+                    print(f"   Max VRAM: {hw.get('max_vram_gb', 'default')} GB")
+                    
+                if 'input' in config:
+                    inp = config['input']
+                    video_path = inp.get('video_path', '')
+                    if video_path and Path(video_path).exists():
+                        size_gb = Path(video_path).stat().st_size / (1024**3)
+                        print(f"   Input video: {video_path} ({size_gb:.1f} GB)")
+                    else:
+                        print(f"   ⚠️  Input video not found: {video_path}")
+                        
+            except Exception as e:
+                print(f"   ⚠️  Could not parse config: {e}")
+        else:
+            print(f"\n❌ Config file not found: {config_path}")
+            return False
+    
+    # Memory safety warnings
+    print(f"\n⚠️  MEMORY SAFETY CHECKS:")
+    available_gb = memory.available / (1024**3)
+    
+    if available_gb < 10:
+        print(f"   🔴 LOW MEMORY: Only {available_gb:.1f}GB available")
+        print("      Consider: reducing chunk_size or scale_factor")
+        return False
+    elif available_gb < 20:
+        print(f"   🟡 MODERATE MEMORY: {available_gb:.1f}GB available")
+        print("      Recommend: chunk_size ≤ 300, scale_factor ≤ 0.5")
+    else:
+        print(f"   🟢 GOOD MEMORY: {available_gb:.1f}GB available")
+    
+    print(f"\n" + "=" * 50)
+    return True
+
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python quick_memory_check.py <config.yaml>")
+        print("\nThis checks system resources before running VR180 matting")
+        sys.exit(1)
+    
+    safe_to_run = check_system()
+    
+    if safe_to_run:
+        print("✅ System check passed - safe to run VR180 matting")
+        print("\nTo run with memory profiling:")
+        print(f"   python memory_profiler_script.py {sys.argv[1]}")
+        print("\nTo run normally:")
+        print(f"   vr180-matting {sys.argv[1]}")
+    else:
+        print("❌ System check failed - address issues before running")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file