checkpoints yay

2025-07-26 17:11:07 -07:00
parent caa4ddb5e0
commit 262cb00b69
2 changed files with 283 additions and 18 deletions
--- a/vr180_matting/video_processor.py
+++ b/vr180_matting/video_processor.py
@@ -781,26 +781,55 @@ class VideoProcessor:
            print(f"⚠️  Could not verify frame count: {e}")
    
    def process_video(self) -> None:
-        """Main video processing pipeline"""
+        """Main video processing pipeline with checkpoint/resume support"""
        self.processing_stats['start_time'] = time.time()
        print("Starting VR180 video processing...")
        
        # Load video info
        self.load_video_info(self.config.input.video_path)
        
+        # Initialize checkpoint manager
+        from .checkpoint_manager import CheckpointManager
+        checkpoint_mgr = CheckpointManager(
+            self.config.input.video_path,
+            self.config.output.path
+        )
+        
+        # Check for existing checkpoints
+        resume_info = checkpoint_mgr.get_resume_info()
+        if resume_info['can_resume']:
+            print(f"\n🔄 RESUME DETECTED:")
+            print(f"   Found {resume_info['completed_chunks']} completed chunks")
+            print(f"   Continue from where we left off? (saves time!)")
+            checkpoint_mgr.print_status()
+        
        # Calculate chunking parameters
        chunk_size, overlap_frames = self.calculate_optimal_chunking()
        
+        # Calculate total chunks
+        total_chunks = 0
+        for _ in range(0, self.total_frames, chunk_size - overlap_frames):
+            total_chunks += 1
+        checkpoint_mgr.set_total_chunks(total_chunks)
+        
        # Process video in chunks
        chunk_files = []  # Store file paths instead of frame data
        temp_chunk_dir = Path(tempfile.mkdtemp(prefix="vr180_chunks_"))
        
        try:
+            chunk_idx = 0
            for start_frame in range(0, self.total_frames, chunk_size - overlap_frames):
                end_frame = min(start_frame + chunk_size, self.total_frames)
                frames_to_read = end_frame - start_frame
                
-                chunk_idx = len(chunk_files)
+                # Check if this chunk was already processed
+                existing_chunk = checkpoint_mgr.get_chunk_file(chunk_idx)
+                if existing_chunk:
+                    print(f"\n✅ Chunk {chunk_idx} already processed: {existing_chunk.name}")
+                    chunk_files.append(existing_chunk)
+                    chunk_idx += 1
+                    continue
+                
                print(f"\nProcessing chunk {chunk_idx}: frames {start_frame}-{end_frame}")
                
                # Read chunk frames
@@ -818,7 +847,12 @@ class VideoProcessor:
                chunk_path = temp_chunk_dir / f"chunk_{chunk_idx:04d}.npz"
                print(f"Saving chunk {chunk_idx} to disk...")
                np.savez_compressed(str(chunk_path), frames=matted_frames)
+                
+                # Save to checkpoint
+                checkpoint_mgr.save_chunk(chunk_idx, None, source_chunk_path=chunk_path)
+                
                chunk_files.append(chunk_path)
+                chunk_idx += 1
                
                # Free the frames from memory immediately
                del matted_frames
@@ -837,21 +871,32 @@ class VideoProcessor:
                if self.memory_manager.should_emergency_cleanup():
                    self.memory_manager.emergency_cleanup()
            
-            # Use streaming merge to avoid memory accumulation (fixes OOM)
-            print("\n🎬 Using streaming merge (no memory accumulation)...")
+            # Mark chunk processing as complete
+            checkpoint_mgr.mark_processing_complete()
            
-            # Determine audio source for final video
-            audio_source = None
-            if self.config.output.preserve_audio and Path(self.config.input.video_path).exists():
-                audio_source = self.config.input.video_path
-            
-            # Stream merge chunks directly to output (no memory accumulation)
-            self.merge_chunks_streaming(
-                chunk_files=chunk_files,
-                output_path=self.config.output.path,
-                overlap_frames=overlap_frames,
-                audio_source=audio_source
-            )
+            # Check if merge was already done
+            if resume_info.get('merge_complete', False):
+                print("\n✅ Merge already completed in previous run!")
+                print(f"   Output: {self.config.output.path}")
+            else:
+                # Use streaming merge to avoid memory accumulation (fixes OOM)
+                print("\n🎬 Using streaming merge (no memory accumulation)...")
+                
+                # Determine audio source for final video
+                audio_source = None
+                if self.config.output.preserve_audio and Path(self.config.input.video_path).exists():
+                    audio_source = self.config.input.video_path
+                
+                # Stream merge chunks directly to output (no memory accumulation)
+                self.merge_chunks_streaming(
+                    chunk_files=chunk_files,
+                    output_path=self.config.output.path,
+                    overlap_frames=overlap_frames,
+                    audio_source=audio_source
+                )
+                
+                # Mark merge as complete
+                checkpoint_mgr.mark_merge_complete()
            
            print("✅ Streaming merge complete - no memory accumulation!")
            
@@ -869,11 +914,24 @@ class VideoProcessor:
            
            print("Video processing completed!")
            
+            # Option to clean up checkpoints
+            print("\n🗄️  CHECKPOINT CLEANUP OPTIONS:")
+            print("   Checkpoints saved successfully and can be cleaned up")
+            print("   - Keep checkpoints for debugging: checkpoint_mgr.cleanup_checkpoints(keep_chunks=True)")
+            print("   - Remove all checkpoints: checkpoint_mgr.cleanup_checkpoints()")
+            print(f"   - Checkpoint location: {checkpoint_mgr.checkpoint_dir}")
+            
+            # For now, keep checkpoints by default (user can manually clean)
+            print("\n💡 Checkpoints kept for safety. Delete manually when no longer needed.")
+            
        finally:
-            # Clean up temporary chunk files
+            # Clean up temporary chunk files (but not checkpoints)
            if temp_chunk_dir.exists():
                print("Cleaning up temporary chunk files...")
-                shutil.rmtree(temp_chunk_dir)
+                try:
+                    shutil.rmtree(temp_chunk_dir)
+                except Exception as e:
+                    print(f"⚠️  Could not clean temp directory: {e}")
    
    def _print_processing_statistics(self):
        """Print detailed processing statistics"""