please god work

2025-07-26 17:44:23 -07:00
parent 262cb00b69
commit 3a547b7c21
2 changed files with 26 additions and 9 deletions
--- a/vr180_matting/checkpoint_manager.py
+++ b/vr180_matting/checkpoint_manager.py
@@ -140,12 +140,25 @@ class CheckpointManager:
    def get_completed_chunk_files(self) -> List[Path]:
        """Get list of all completed chunk files in order"""
        chunk_files = []
        missing_chunks = []
        for i in range(self.status['total_chunks']):
            chunk_file = self.get_chunk_file(i)
            if chunk_file:
                chunk_files.append(chunk_file)
            else:
-                break  # Stop at first missing chunk
+                # Check if chunk is marked as completed but file is missing
                if self.is_chunk_completed(i):
                    missing_chunks.append(i)
                    print(f"⚠️  Chunk {i} marked complete but file missing!")
                else:
                    break  # Stop at first unprocessed chunk
        if missing_chunks:
            print(f"❌ Missing checkpoint files for chunks: {missing_chunks}")
            print(f"   This may happen if files were deleted during streaming merge")
            print(f"   These chunks may need to be reprocessed")
        return chunk_files
    def mark_processing_complete(self):
--- a/vr180_matting/video_processor.py
+++ b/vr180_matting/video_processor.py
@@ -453,20 +453,17 @@ class VideoProcessor:
                chunk_data.close()
                del chunk_data, frames_array
-                # Delete chunk file to free disk space
+                # Don't delete checkpoint files - they're needed for potential resume
-                try:
+                # The checkpoint system manages cleanup separately
-                    chunk_file.unlink()
+                print(f"   📋 Keeping checkpoint file: {chunk_file.name}")
                    print(f"   🗑️  Deleted {chunk_file.name}")
                except Exception as e:
                    print(f"   ⚠️  Could not delete {chunk_file.name}: {e}")
                # Aggressive cleanup and memory monitoring after each chunk
                self._aggressive_memory_cleanup(f"After streaming merge chunk {i}")
                # Memory safety check
                memory_info = self._get_process_memory_info()
-                if memory_info['rss_gb'] > 35:  # Warning if approaching 46GB limit
+                if memory_info['total_process_gb'] > 35:  # Warning if approaching 46GB limit
-                    print(f"⚠️  High memory usage: {memory_info['rss_gb']:.1f}GB - forcing cleanup")
+                    print(f"⚠️  High memory usage: {memory_info['total_process_gb']:.1f}GB - forcing cleanup")
                    gc.collect()
                    import torch
                    if torch.cuda.is_available():
@@ -882,6 +879,13 @@ class VideoProcessor:
                # Use streaming merge to avoid memory accumulation (fixes OOM)
                print("\n🎬 Using streaming merge (no memory accumulation)...")
                # For resume scenarios, make sure we have all chunk files
                if resume_info['can_resume']:
                    checkpoint_chunk_files = checkpoint_mgr.get_completed_chunk_files()
                    if len(checkpoint_chunk_files) != len(chunk_files):
                        print(f"⚠️  Using {len(checkpoint_chunk_files)} checkpoint files instead of {len(chunk_files)} temp files")
                        chunk_files = checkpoint_chunk_files
                # Determine audio source for final video
                audio_source = None
                if self.config.output.preserve_audio and Path(self.config.input.video_path).exists():