more memory fixes hopeufly

This commit is contained in:
2025-07-26 14:33:36 -07:00
parent 80f947c91b
commit 734445cf48
3 changed files with 350 additions and 66 deletions

227
memory_profiler_script.py Normal file
View File

@@ -0,0 +1,227 @@
#!/usr/bin/env python3
"""
Memory profiling script for VR180 matting pipeline
Tracks memory usage during processing to identify leaks
"""
import sys
import time
import psutil
import tracemalloc
import subprocess
import gc
from pathlib import Path
from typing import Dict, List, Tuple
import threading
import json
class MemoryProfiler:
def __init__(self, output_file: str = "memory_profile.json"):
self.output_file = output_file
self.data = []
self.process = psutil.Process()
self.running = False
self.thread = None
def start_monitoring(self, interval: float = 1.0):
"""Start continuous memory monitoring"""
tracemalloc.start()
self.running = True
self.thread = threading.Thread(target=self._monitor_loop, args=(interval,))
self.thread.daemon = True
self.thread.start()
print(f"🔍 Memory monitoring started (interval: {interval}s)")
def stop_monitoring(self):
"""Stop monitoring and save results"""
self.running = False
if self.thread:
self.thread.join()
# Get tracemalloc snapshot
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
# Save detailed results
results = {
'timeline': self.data,
'top_memory_allocations': [
{
'file': stat.traceback.format()[0],
'size_mb': stat.size / 1024 / 1024,
'count': stat.count
}
for stat in top_stats[:20] # Top 20 allocations
],
'summary': {
'peak_rss_gb': max([d['rss_gb'] for d in self.data]) if self.data else 0,
'peak_vram_gb': max([d['vram_gb'] for d in self.data]) if self.data else 0,
'total_samples': len(self.data)
}
}
with open(self.output_file, 'w') as f:
json.dump(results, f, indent=2)
tracemalloc.stop()
print(f"📊 Memory profile saved to {self.output_file}")
def _monitor_loop(self, interval: float):
"""Continuous monitoring loop"""
while self.running:
try:
# System memory
memory_info = self.process.memory_info()
rss_gb = memory_info.rss / (1024**3)
# System-wide memory
sys_memory = psutil.virtual_memory()
sys_used_gb = (sys_memory.total - sys_memory.available) / (1024**3)
sys_available_gb = sys_memory.available / (1024**3)
# GPU memory (if available)
vram_gb = 0
vram_free_gb = 0
try:
result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.free',
'--format=csv,noheader,nounits'],
capture_output=True, text=True, timeout=5)
if result.returncode == 0:
lines = result.stdout.strip().split('\n')
if lines and lines[0]:
used, free = lines[0].split(', ')
vram_gb = float(used) / 1024
vram_free_gb = float(free) / 1024
except Exception:
pass
# Tracemalloc current usage
try:
current, peak = tracemalloc.get_traced_memory()
traced_mb = current / (1024**2)
except Exception:
traced_mb = 0
data_point = {
'timestamp': time.time(),
'rss_gb': rss_gb,
'vram_gb': vram_gb,
'vram_free_gb': vram_free_gb,
'sys_used_gb': sys_used_gb,
'sys_available_gb': sys_available_gb,
'traced_mb': traced_mb
}
self.data.append(data_point)
# Print periodic updates
if len(self.data) % 10 == 0: # Every 10 samples
print(f"🔍 Memory: RSS={rss_gb:.2f}GB, VRAM={vram_gb:.2f}GB, Sys={sys_used_gb:.1f}GB")
except Exception as e:
print(f"Monitoring error: {e}")
time.sleep(interval)
def log_checkpoint(self, checkpoint_name: str):
"""Log a specific checkpoint"""
if self.data:
self.data[-1]['checkpoint'] = checkpoint_name
latest = self.data[-1]
print(f"📍 CHECKPOINT [{checkpoint_name}]: RSS={latest['rss_gb']:.2f}GB, VRAM={latest['vram_gb']:.2f}GB")
def run_with_profiling(config_path: str):
"""Run the VR180 matting with memory profiling"""
profiler = MemoryProfiler("memory_profile_detailed.json")
try:
# Start monitoring
profiler.start_monitoring(interval=2.0) # Sample every 2 seconds
# Log initial state
profiler.log_checkpoint("STARTUP")
# Import after starting profiler to catch import memory usage
print("Importing VR180 processor...")
from vr180_matting.vr180_processor import VR180Processor
from vr180_matting.config import VR180Config
profiler.log_checkpoint("IMPORTS_COMPLETE")
# Load config
print(f"Loading config from {config_path}")
config = VR180Config.from_yaml(config_path)
profiler.log_checkpoint("CONFIG_LOADED")
# Initialize processor
print("Initializing VR180 processor...")
processor = VR180Processor(config)
profiler.log_checkpoint("PROCESSOR_INITIALIZED")
# Force garbage collection
gc.collect()
profiler.log_checkpoint("INITIAL_GC_COMPLETE")
# Run processing
print("Starting VR180 processing...")
processor.process_video()
profiler.log_checkpoint("PROCESSING_COMPLETE")
except Exception as e:
print(f"❌ Error during processing: {e}")
profiler.log_checkpoint(f"ERROR: {str(e)}")
raise
finally:
# Stop monitoring and save results
profiler.stop_monitoring()
# Print summary
print("\n" + "="*60)
print("MEMORY PROFILING SUMMARY")
print("="*60)
if profiler.data:
peak_rss = max([d['rss_gb'] for d in profiler.data])
peak_vram = max([d['vram_gb'] for d in profiler.data])
print(f"Peak RSS Memory: {peak_rss:.2f} GB")
print(f"Peak VRAM Usage: {peak_vram:.2f} GB")
print(f"Total Samples: {len(profiler.data)}")
# Show checkpoints
checkpoints = [d for d in profiler.data if 'checkpoint' in d]
if checkpoints:
print(f"\nCheckpoints ({len(checkpoints)}):")
for cp in checkpoints:
print(f" {cp['checkpoint']}: RSS={cp['rss_gb']:.2f}GB, VRAM={cp['vram_gb']:.2f}GB")
print(f"\nDetailed profile saved to: {profiler.output_file}")
def main():
if len(sys.argv) != 2:
print("Usage: python memory_profiler_script.py <config.yaml>")
print("\nThis script runs VR180 matting with detailed memory profiling")
print("It will:")
print("- Monitor RSS, VRAM, and system memory every 2 seconds")
print("- Track memory allocations with tracemalloc")
print("- Log checkpoints at key processing stages")
print("- Save detailed JSON report for analysis")
sys.exit(1)
config_path = sys.argv[1]
if not Path(config_path).exists():
print(f"❌ Config file not found: {config_path}")
sys.exit(1)
print("🚀 Starting VR180 Memory Profiling")
print(f"Config: {config_path}")
print("="*60)
run_with_profiling(config_path)
if __name__ == "__main__":
main()

View File

@@ -387,19 +387,83 @@ class VideoProcessor:
# Green screen background # Green screen background
return np.full_like(frame, self.config.output.background_color, dtype=np.uint8) return np.full_like(frame, self.config.output.background_color, dtype=np.uint8)
def merge_chunks_streaming(self, chunk_files: List[Path], output_path: str,
overlap_frames: int = 0, audio_source: str = None) -> None:
"""
Merge processed chunks using streaming approach (no memory accumulation)
Args:
chunk_files: List of chunk result files (.npz)
output_path: Final output video path
overlap_frames: Number of overlapping frames
audio_source: Audio source file for final video
"""
from .streaming_video_writer import StreamingVideoWriter
if not chunk_files:
raise ValueError("No chunk files to merge")
print(f"🎬 Streaming merge: {len(chunk_files)} chunks → {output_path}")
# Initialize streaming writer
writer = StreamingVideoWriter(
output_path=output_path,
fps=self.video_info['fps'],
audio_source=audio_source
)
try:
# Process each chunk without accumulation
for i, chunk_file in enumerate(chunk_files):
print(f"📼 Processing chunk {i+1}/{len(chunk_files)}: {chunk_file.name}")
# Load chunk (this is the only copy in memory)
chunk_data = np.load(str(chunk_file))
frames = chunk_data['frames'].tolist() # Convert to list of arrays
chunk_data.close()
# Write chunk with streaming writer
writer.write_chunk(
frames=frames,
chunk_index=i,
overlap_frames=overlap_frames if i > 0 else 0,
blend_with_previous=(i > 0 and overlap_frames > 0)
)
# Immediately free memory
del frames, chunk_data
# Delete chunk file to free disk space
try:
chunk_file.unlink()
print(f" 🗑️ Deleted {chunk_file.name}")
except Exception as e:
print(f" ⚠️ Could not delete {chunk_file.name}: {e}")
# Aggressive cleanup every chunk
self._aggressive_memory_cleanup(f"After processing chunk {i}")
# Finalize the video
writer.finalize()
except Exception as e:
print(f"❌ Streaming merge failed: {e}")
writer.cleanup()
raise
print(f"✅ Streaming merge complete: {output_path}")
def merge_overlapping_chunks(self, def merge_overlapping_chunks(self,
chunk_results: List[List[np.ndarray]], chunk_results: List[List[np.ndarray]],
overlap_frames: int) -> List[np.ndarray]: overlap_frames: int) -> List[np.ndarray]:
""" """
Merge overlapping chunks with blending in overlap regions Legacy merge method - DEPRECATED due to memory accumulation
Use merge_chunks_streaming() instead for memory efficiency
Args:
chunk_results: List of chunk results
overlap_frames: Number of overlapping frames
Returns:
Merged frame sequence
""" """
import warnings
warnings.warn("merge_overlapping_chunks() is deprecated due to memory accumulation. Use merge_chunks_streaming()",
DeprecationWarning, stacklevel=2)
if len(chunk_results) == 1: if len(chunk_results) == 1:
return chunk_results[0] return chunk_results[0]
@@ -640,36 +704,23 @@ class VideoProcessor:
if self.memory_manager.should_emergency_cleanup(): if self.memory_manager.should_emergency_cleanup():
self.memory_manager.emergency_cleanup() self.memory_manager.emergency_cleanup()
# Load and merge chunks from disk # Use streaming merge to avoid memory accumulation (fixes OOM)
print("\nLoading and merging chunks...") print("\n🎬 Using streaming merge (no memory accumulation)...")
chunk_results = []
for i, chunk_file in enumerate(chunk_files):
print(f"Loading {chunk_file.name}...")
chunk_data = np.load(str(chunk_file))
chunk_results.append(chunk_data['frames'])
chunk_data.close() # Close the file
# Delete chunk file immediately after loading to free disk space
try:
chunk_file.unlink()
print(f" Deleted chunk file {chunk_file.name}")
except Exception as e:
print(f" Warning: Could not delete chunk file: {e}")
# Aggressive cleanup every few chunks to prevent accumulation
if i % 3 == 0 and i > 0:
self._aggressive_memory_cleanup(f"after loading chunk {i}")
# Merge chunks # Determine audio source for final video
final_frames = self.merge_overlapping_chunks(chunk_results, overlap_frames) audio_source = None
if self.config.output.preserve_audio and Path(self.config.input.video_path).exists():
audio_source = self.config.input.video_path
# Free chunk results after merging - this is critical! # Stream merge chunks directly to output (no memory accumulation)
del chunk_results self.merge_chunks_streaming(
self._aggressive_memory_cleanup("after merging chunks") chunk_files=chunk_files,
output_path=self.config.output.path,
overlap_frames=overlap_frames,
audio_source=audio_source
)
# Save results print("✅ Streaming merge complete - no memory accumulation!")
print(f"Saving {len(final_frames)} processed frames...")
self.save_video(final_frames, self.config.output.path)
# Calculate final statistics # Calculate final statistics
self.processing_stats['end_time'] = time.time() self.processing_stats['end_time'] = time.time()

View File

@@ -398,44 +398,50 @@ class VR180Processor(VideoProcessor):
self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)") self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)")
# Apply masks - need to reload frames from temp video since we freed the original frames # Apply masks with streaming approach (no frame accumulation)
self._print_memory_step(f"Before reloading frames for mask application ({eye_name} eye)") self._print_memory_step(f"Before streaming mask application ({eye_name} eye)")
# Read frames back from the temp video for mask application # Process frames one at a time without accumulation
cap = cv2.VideoCapture(str(temp_video_path)) cap = cv2.VideoCapture(str(temp_video_path))
reloaded_frames = []
for frame_idx in range(num_frames):
ret, frame = cap.read()
if not ret:
break
reloaded_frames.append(frame)
cap.release()
self._print_memory_step(f"Reloaded {len(reloaded_frames)} frames for mask application")
# Apply masks
matted_frames = [] matted_frames = []
for frame_idx, frame in enumerate(reloaded_frames):
if frame_idx in video_segments: try:
frame_masks = video_segments[frame_idx] for frame_idx in range(num_frames):
combined_mask = self.sam2_model.get_combined_mask(frame_masks) ret, frame = cap.read()
if not ret:
break
matted_frame = self.sam2_model.apply_mask_to_frame( # Apply mask to this single frame
frame, combined_mask, if frame_idx in video_segments:
output_format=self.config.output.format, frame_masks = video_segments[frame_idx]
background_color=self.config.output.background_color combined_mask = self.sam2_model.get_combined_mask(frame_masks)
)
else: matted_frame = self.sam2_model.apply_mask_to_frame(
matted_frame = self._create_empty_mask_frame(frame) frame, combined_mask,
output_format=self.config.output.format,
matted_frames.append(matted_frame) background_color=self.config.output.background_color
)
else:
matted_frame = self._create_empty_mask_frame(frame)
matted_frames.append(matted_frame)
# Free the original frame immediately (no accumulation)
del frame
# Periodic cleanup during processing
if frame_idx % 100 == 0 and frame_idx > 0:
import gc
gc.collect()
finally:
cap.release()
# Free reloaded frames and video segments completely # Free video segments completely
del reloaded_frames
del video_segments # This holds processed masks from SAM2 del video_segments # This holds processed masks from SAM2
self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)") self._aggressive_memory_cleanup(f"After streaming mask application ({eye_name} eye)")
self._print_memory_step(f"Completed streaming mask application ({eye_name} eye)")
return matted_frames return matted_frames
finally: finally: