diff --git a/vr180_matting/checkpoint_manager.py b/vr180_matting/checkpoint_manager.py index 3272d89..b16fdd0 100644 --- a/vr180_matting/checkpoint_manager.py +++ b/vr180_matting/checkpoint_manager.py @@ -140,12 +140,25 @@ class CheckpointManager: def get_completed_chunk_files(self) -> List[Path]: """Get list of all completed chunk files in order""" chunk_files = [] + missing_chunks = [] + for i in range(self.status['total_chunks']): chunk_file = self.get_chunk_file(i) if chunk_file: chunk_files.append(chunk_file) else: - break # Stop at first missing chunk + # Check if chunk is marked as completed but file is missing + if self.is_chunk_completed(i): + missing_chunks.append(i) + print(f"⚠️ Chunk {i} marked complete but file missing!") + else: + break # Stop at first unprocessed chunk + + if missing_chunks: + print(f"❌ Missing checkpoint files for chunks: {missing_chunks}") + print(f" This may happen if files were deleted during streaming merge") + print(f" These chunks may need to be reprocessed") + return chunk_files def mark_processing_complete(self): diff --git a/vr180_matting/video_processor.py b/vr180_matting/video_processor.py index 3ee8957..94759cd 100644 --- a/vr180_matting/video_processor.py +++ b/vr180_matting/video_processor.py @@ -453,20 +453,17 @@ class VideoProcessor: chunk_data.close() del chunk_data, frames_array - # Delete chunk file to free disk space - try: - chunk_file.unlink() - print(f" 🗑️ Deleted {chunk_file.name}") - except Exception as e: - print(f" ⚠️ Could not delete {chunk_file.name}: {e}") + # Don't delete checkpoint files - they're needed for potential resume + # The checkpoint system manages cleanup separately + print(f" 📋 Keeping checkpoint file: {chunk_file.name}") # Aggressive cleanup and memory monitoring after each chunk self._aggressive_memory_cleanup(f"After streaming merge chunk {i}") # Memory safety check memory_info = self._get_process_memory_info() - if memory_info['rss_gb'] > 35: # Warning if approaching 46GB limit - print(f"⚠️ High memory usage: {memory_info['rss_gb']:.1f}GB - forcing cleanup") + if memory_info['total_process_gb'] > 35: # Warning if approaching 46GB limit + print(f"⚠️ High memory usage: {memory_info['total_process_gb']:.1f}GB - forcing cleanup") gc.collect() import torch if torch.cuda.is_available(): @@ -882,6 +879,13 @@ class VideoProcessor: # Use streaming merge to avoid memory accumulation (fixes OOM) print("\n🎬 Using streaming merge (no memory accumulation)...") + # For resume scenarios, make sure we have all chunk files + if resume_info['can_resume']: + checkpoint_chunk_files = checkpoint_mgr.get_completed_chunk_files() + if len(checkpoint_chunk_files) != len(chunk_files): + print(f"⚠️ Using {len(checkpoint_chunk_files)} checkpoint files instead of {len(chunk_files)} temp files") + chunk_files = checkpoint_chunk_files + # Determine audio source for final video audio_source = None if self.config.output.preserve_audio and Path(self.config.input.video_path).exists():