more memory fixes hopeufly
This commit is contained in:
227
memory_profiler_script.py
Normal file
227
memory_profiler_script.py
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Memory profiling script for VR180 matting pipeline
|
||||||
|
Tracks memory usage during processing to identify leaks
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import psutil
|
||||||
|
import tracemalloc
|
||||||
|
import subprocess
|
||||||
|
import gc
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Tuple
|
||||||
|
import threading
|
||||||
|
import json
|
||||||
|
|
||||||
|
class MemoryProfiler:
|
||||||
|
def __init__(self, output_file: str = "memory_profile.json"):
|
||||||
|
self.output_file = output_file
|
||||||
|
self.data = []
|
||||||
|
self.process = psutil.Process()
|
||||||
|
self.running = False
|
||||||
|
self.thread = None
|
||||||
|
|
||||||
|
def start_monitoring(self, interval: float = 1.0):
|
||||||
|
"""Start continuous memory monitoring"""
|
||||||
|
tracemalloc.start()
|
||||||
|
self.running = True
|
||||||
|
self.thread = threading.Thread(target=self._monitor_loop, args=(interval,))
|
||||||
|
self.thread.daemon = True
|
||||||
|
self.thread.start()
|
||||||
|
print(f"🔍 Memory monitoring started (interval: {interval}s)")
|
||||||
|
|
||||||
|
def stop_monitoring(self):
|
||||||
|
"""Stop monitoring and save results"""
|
||||||
|
self.running = False
|
||||||
|
if self.thread:
|
||||||
|
self.thread.join()
|
||||||
|
|
||||||
|
# Get tracemalloc snapshot
|
||||||
|
snapshot = tracemalloc.take_snapshot()
|
||||||
|
top_stats = snapshot.statistics('lineno')
|
||||||
|
|
||||||
|
# Save detailed results
|
||||||
|
results = {
|
||||||
|
'timeline': self.data,
|
||||||
|
'top_memory_allocations': [
|
||||||
|
{
|
||||||
|
'file': stat.traceback.format()[0],
|
||||||
|
'size_mb': stat.size / 1024 / 1024,
|
||||||
|
'count': stat.count
|
||||||
|
}
|
||||||
|
for stat in top_stats[:20] # Top 20 allocations
|
||||||
|
],
|
||||||
|
'summary': {
|
||||||
|
'peak_rss_gb': max([d['rss_gb'] for d in self.data]) if self.data else 0,
|
||||||
|
'peak_vram_gb': max([d['vram_gb'] for d in self.data]) if self.data else 0,
|
||||||
|
'total_samples': len(self.data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(self.output_file, 'w') as f:
|
||||||
|
json.dump(results, f, indent=2)
|
||||||
|
|
||||||
|
tracemalloc.stop()
|
||||||
|
print(f"📊 Memory profile saved to {self.output_file}")
|
||||||
|
|
||||||
|
def _monitor_loop(self, interval: float):
|
||||||
|
"""Continuous monitoring loop"""
|
||||||
|
while self.running:
|
||||||
|
try:
|
||||||
|
# System memory
|
||||||
|
memory_info = self.process.memory_info()
|
||||||
|
rss_gb = memory_info.rss / (1024**3)
|
||||||
|
|
||||||
|
# System-wide memory
|
||||||
|
sys_memory = psutil.virtual_memory()
|
||||||
|
sys_used_gb = (sys_memory.total - sys_memory.available) / (1024**3)
|
||||||
|
sys_available_gb = sys_memory.available / (1024**3)
|
||||||
|
|
||||||
|
# GPU memory (if available)
|
||||||
|
vram_gb = 0
|
||||||
|
vram_free_gb = 0
|
||||||
|
try:
|
||||||
|
result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.free',
|
||||||
|
'--format=csv,noheader,nounits'],
|
||||||
|
capture_output=True, text=True, timeout=5)
|
||||||
|
if result.returncode == 0:
|
||||||
|
lines = result.stdout.strip().split('\n')
|
||||||
|
if lines and lines[0]:
|
||||||
|
used, free = lines[0].split(', ')
|
||||||
|
vram_gb = float(used) / 1024
|
||||||
|
vram_free_gb = float(free) / 1024
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Tracemalloc current usage
|
||||||
|
try:
|
||||||
|
current, peak = tracemalloc.get_traced_memory()
|
||||||
|
traced_mb = current / (1024**2)
|
||||||
|
except Exception:
|
||||||
|
traced_mb = 0
|
||||||
|
|
||||||
|
data_point = {
|
||||||
|
'timestamp': time.time(),
|
||||||
|
'rss_gb': rss_gb,
|
||||||
|
'vram_gb': vram_gb,
|
||||||
|
'vram_free_gb': vram_free_gb,
|
||||||
|
'sys_used_gb': sys_used_gb,
|
||||||
|
'sys_available_gb': sys_available_gb,
|
||||||
|
'traced_mb': traced_mb
|
||||||
|
}
|
||||||
|
|
||||||
|
self.data.append(data_point)
|
||||||
|
|
||||||
|
# Print periodic updates
|
||||||
|
if len(self.data) % 10 == 0: # Every 10 samples
|
||||||
|
print(f"🔍 Memory: RSS={rss_gb:.2f}GB, VRAM={vram_gb:.2f}GB, Sys={sys_used_gb:.1f}GB")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Monitoring error: {e}")
|
||||||
|
|
||||||
|
time.sleep(interval)
|
||||||
|
|
||||||
|
def log_checkpoint(self, checkpoint_name: str):
|
||||||
|
"""Log a specific checkpoint"""
|
||||||
|
if self.data:
|
||||||
|
self.data[-1]['checkpoint'] = checkpoint_name
|
||||||
|
latest = self.data[-1]
|
||||||
|
print(f"📍 CHECKPOINT [{checkpoint_name}]: RSS={latest['rss_gb']:.2f}GB, VRAM={latest['vram_gb']:.2f}GB")
|
||||||
|
|
||||||
|
def run_with_profiling(config_path: str):
|
||||||
|
"""Run the VR180 matting with memory profiling"""
|
||||||
|
profiler = MemoryProfiler("memory_profile_detailed.json")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Start monitoring
|
||||||
|
profiler.start_monitoring(interval=2.0) # Sample every 2 seconds
|
||||||
|
|
||||||
|
# Log initial state
|
||||||
|
profiler.log_checkpoint("STARTUP")
|
||||||
|
|
||||||
|
# Import after starting profiler to catch import memory usage
|
||||||
|
print("Importing VR180 processor...")
|
||||||
|
from vr180_matting.vr180_processor import VR180Processor
|
||||||
|
from vr180_matting.config import VR180Config
|
||||||
|
|
||||||
|
profiler.log_checkpoint("IMPORTS_COMPLETE")
|
||||||
|
|
||||||
|
# Load config
|
||||||
|
print(f"Loading config from {config_path}")
|
||||||
|
config = VR180Config.from_yaml(config_path)
|
||||||
|
|
||||||
|
profiler.log_checkpoint("CONFIG_LOADED")
|
||||||
|
|
||||||
|
# Initialize processor
|
||||||
|
print("Initializing VR180 processor...")
|
||||||
|
processor = VR180Processor(config)
|
||||||
|
|
||||||
|
profiler.log_checkpoint("PROCESSOR_INITIALIZED")
|
||||||
|
|
||||||
|
# Force garbage collection
|
||||||
|
gc.collect()
|
||||||
|
profiler.log_checkpoint("INITIAL_GC_COMPLETE")
|
||||||
|
|
||||||
|
# Run processing
|
||||||
|
print("Starting VR180 processing...")
|
||||||
|
processor.process_video()
|
||||||
|
|
||||||
|
profiler.log_checkpoint("PROCESSING_COMPLETE")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error during processing: {e}")
|
||||||
|
profiler.log_checkpoint(f"ERROR: {str(e)}")
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
# Stop monitoring and save results
|
||||||
|
profiler.stop_monitoring()
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print("\n" + "="*60)
|
||||||
|
print("MEMORY PROFILING SUMMARY")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
if profiler.data:
|
||||||
|
peak_rss = max([d['rss_gb'] for d in profiler.data])
|
||||||
|
peak_vram = max([d['vram_gb'] for d in profiler.data])
|
||||||
|
|
||||||
|
print(f"Peak RSS Memory: {peak_rss:.2f} GB")
|
||||||
|
print(f"Peak VRAM Usage: {peak_vram:.2f} GB")
|
||||||
|
print(f"Total Samples: {len(profiler.data)}")
|
||||||
|
|
||||||
|
# Show checkpoints
|
||||||
|
checkpoints = [d for d in profiler.data if 'checkpoint' in d]
|
||||||
|
if checkpoints:
|
||||||
|
print(f"\nCheckpoints ({len(checkpoints)}):")
|
||||||
|
for cp in checkpoints:
|
||||||
|
print(f" {cp['checkpoint']}: RSS={cp['rss_gb']:.2f}GB, VRAM={cp['vram_gb']:.2f}GB")
|
||||||
|
|
||||||
|
print(f"\nDetailed profile saved to: {profiler.output_file}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: python memory_profiler_script.py <config.yaml>")
|
||||||
|
print("\nThis script runs VR180 matting with detailed memory profiling")
|
||||||
|
print("It will:")
|
||||||
|
print("- Monitor RSS, VRAM, and system memory every 2 seconds")
|
||||||
|
print("- Track memory allocations with tracemalloc")
|
||||||
|
print("- Log checkpoints at key processing stages")
|
||||||
|
print("- Save detailed JSON report for analysis")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
config_path = sys.argv[1]
|
||||||
|
|
||||||
|
if not Path(config_path).exists():
|
||||||
|
print(f"❌ Config file not found: {config_path}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print("🚀 Starting VR180 Memory Profiling")
|
||||||
|
print(f"Config: {config_path}")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
run_with_profiling(config_path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -387,19 +387,83 @@ class VideoProcessor:
|
|||||||
# Green screen background
|
# Green screen background
|
||||||
return np.full_like(frame, self.config.output.background_color, dtype=np.uint8)
|
return np.full_like(frame, self.config.output.background_color, dtype=np.uint8)
|
||||||
|
|
||||||
|
def merge_chunks_streaming(self, chunk_files: List[Path], output_path: str,
|
||||||
|
overlap_frames: int = 0, audio_source: str = None) -> None:
|
||||||
|
"""
|
||||||
|
Merge processed chunks using streaming approach (no memory accumulation)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunk_files: List of chunk result files (.npz)
|
||||||
|
output_path: Final output video path
|
||||||
|
overlap_frames: Number of overlapping frames
|
||||||
|
audio_source: Audio source file for final video
|
||||||
|
"""
|
||||||
|
from .streaming_video_writer import StreamingVideoWriter
|
||||||
|
|
||||||
|
if not chunk_files:
|
||||||
|
raise ValueError("No chunk files to merge")
|
||||||
|
|
||||||
|
print(f"🎬 Streaming merge: {len(chunk_files)} chunks → {output_path}")
|
||||||
|
|
||||||
|
# Initialize streaming writer
|
||||||
|
writer = StreamingVideoWriter(
|
||||||
|
output_path=output_path,
|
||||||
|
fps=self.video_info['fps'],
|
||||||
|
audio_source=audio_source
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Process each chunk without accumulation
|
||||||
|
for i, chunk_file in enumerate(chunk_files):
|
||||||
|
print(f"📼 Processing chunk {i+1}/{len(chunk_files)}: {chunk_file.name}")
|
||||||
|
|
||||||
|
# Load chunk (this is the only copy in memory)
|
||||||
|
chunk_data = np.load(str(chunk_file))
|
||||||
|
frames = chunk_data['frames'].tolist() # Convert to list of arrays
|
||||||
|
chunk_data.close()
|
||||||
|
|
||||||
|
# Write chunk with streaming writer
|
||||||
|
writer.write_chunk(
|
||||||
|
frames=frames,
|
||||||
|
chunk_index=i,
|
||||||
|
overlap_frames=overlap_frames if i > 0 else 0,
|
||||||
|
blend_with_previous=(i > 0 and overlap_frames > 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Immediately free memory
|
||||||
|
del frames, chunk_data
|
||||||
|
|
||||||
|
# Delete chunk file to free disk space
|
||||||
|
try:
|
||||||
|
chunk_file.unlink()
|
||||||
|
print(f" 🗑️ Deleted {chunk_file.name}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠️ Could not delete {chunk_file.name}: {e}")
|
||||||
|
|
||||||
|
# Aggressive cleanup every chunk
|
||||||
|
self._aggressive_memory_cleanup(f"After processing chunk {i}")
|
||||||
|
|
||||||
|
# Finalize the video
|
||||||
|
writer.finalize()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Streaming merge failed: {e}")
|
||||||
|
writer.cleanup()
|
||||||
|
raise
|
||||||
|
|
||||||
|
print(f"✅ Streaming merge complete: {output_path}")
|
||||||
|
|
||||||
def merge_overlapping_chunks(self,
|
def merge_overlapping_chunks(self,
|
||||||
chunk_results: List[List[np.ndarray]],
|
chunk_results: List[List[np.ndarray]],
|
||||||
overlap_frames: int) -> List[np.ndarray]:
|
overlap_frames: int) -> List[np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Merge overlapping chunks with blending in overlap regions
|
Legacy merge method - DEPRECATED due to memory accumulation
|
||||||
|
Use merge_chunks_streaming() instead for memory efficiency
|
||||||
Args:
|
|
||||||
chunk_results: List of chunk results
|
|
||||||
overlap_frames: Number of overlapping frames
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Merged frame sequence
|
|
||||||
"""
|
"""
|
||||||
|
import warnings
|
||||||
|
warnings.warn("merge_overlapping_chunks() is deprecated due to memory accumulation. Use merge_chunks_streaming()",
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
|
||||||
if len(chunk_results) == 1:
|
if len(chunk_results) == 1:
|
||||||
return chunk_results[0]
|
return chunk_results[0]
|
||||||
|
|
||||||
@@ -640,36 +704,23 @@ class VideoProcessor:
|
|||||||
if self.memory_manager.should_emergency_cleanup():
|
if self.memory_manager.should_emergency_cleanup():
|
||||||
self.memory_manager.emergency_cleanup()
|
self.memory_manager.emergency_cleanup()
|
||||||
|
|
||||||
# Load and merge chunks from disk
|
# Use streaming merge to avoid memory accumulation (fixes OOM)
|
||||||
print("\nLoading and merging chunks...")
|
print("\n🎬 Using streaming merge (no memory accumulation)...")
|
||||||
chunk_results = []
|
|
||||||
for i, chunk_file in enumerate(chunk_files):
|
|
||||||
print(f"Loading {chunk_file.name}...")
|
|
||||||
chunk_data = np.load(str(chunk_file))
|
|
||||||
chunk_results.append(chunk_data['frames'])
|
|
||||||
chunk_data.close() # Close the file
|
|
||||||
|
|
||||||
# Delete chunk file immediately after loading to free disk space
|
# Determine audio source for final video
|
||||||
try:
|
audio_source = None
|
||||||
chunk_file.unlink()
|
if self.config.output.preserve_audio and Path(self.config.input.video_path).exists():
|
||||||
print(f" Deleted chunk file {chunk_file.name}")
|
audio_source = self.config.input.video_path
|
||||||
except Exception as e:
|
|
||||||
print(f" Warning: Could not delete chunk file: {e}")
|
|
||||||
|
|
||||||
# Aggressive cleanup every few chunks to prevent accumulation
|
# Stream merge chunks directly to output (no memory accumulation)
|
||||||
if i % 3 == 0 and i > 0:
|
self.merge_chunks_streaming(
|
||||||
self._aggressive_memory_cleanup(f"after loading chunk {i}")
|
chunk_files=chunk_files,
|
||||||
|
output_path=self.config.output.path,
|
||||||
|
overlap_frames=overlap_frames,
|
||||||
|
audio_source=audio_source
|
||||||
|
)
|
||||||
|
|
||||||
# Merge chunks
|
print("✅ Streaming merge complete - no memory accumulation!")
|
||||||
final_frames = self.merge_overlapping_chunks(chunk_results, overlap_frames)
|
|
||||||
|
|
||||||
# Free chunk results after merging - this is critical!
|
|
||||||
del chunk_results
|
|
||||||
self._aggressive_memory_cleanup("after merging chunks")
|
|
||||||
|
|
||||||
# Save results
|
|
||||||
print(f"Saving {len(final_frames)} processed frames...")
|
|
||||||
self.save_video(final_frames, self.config.output.path)
|
|
||||||
|
|
||||||
# Calculate final statistics
|
# Calculate final statistics
|
||||||
self.processing_stats['end_time'] = time.time()
|
self.processing_stats['end_time'] = time.time()
|
||||||
|
|||||||
@@ -398,44 +398,50 @@ class VR180Processor(VideoProcessor):
|
|||||||
|
|
||||||
self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)")
|
self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)")
|
||||||
|
|
||||||
# Apply masks - need to reload frames from temp video since we freed the original frames
|
# Apply masks with streaming approach (no frame accumulation)
|
||||||
self._print_memory_step(f"Before reloading frames for mask application ({eye_name} eye)")
|
self._print_memory_step(f"Before streaming mask application ({eye_name} eye)")
|
||||||
|
|
||||||
# Read frames back from the temp video for mask application
|
# Process frames one at a time without accumulation
|
||||||
cap = cv2.VideoCapture(str(temp_video_path))
|
cap = cv2.VideoCapture(str(temp_video_path))
|
||||||
reloaded_frames = []
|
|
||||||
|
|
||||||
for frame_idx in range(num_frames):
|
|
||||||
ret, frame = cap.read()
|
|
||||||
if not ret:
|
|
||||||
break
|
|
||||||
reloaded_frames.append(frame)
|
|
||||||
cap.release()
|
|
||||||
|
|
||||||
self._print_memory_step(f"Reloaded {len(reloaded_frames)} frames for mask application")
|
|
||||||
|
|
||||||
# Apply masks
|
|
||||||
matted_frames = []
|
matted_frames = []
|
||||||
for frame_idx, frame in enumerate(reloaded_frames):
|
|
||||||
if frame_idx in video_segments:
|
|
||||||
frame_masks = video_segments[frame_idx]
|
|
||||||
combined_mask = self.sam2_model.get_combined_mask(frame_masks)
|
|
||||||
|
|
||||||
matted_frame = self.sam2_model.apply_mask_to_frame(
|
try:
|
||||||
frame, combined_mask,
|
for frame_idx in range(num_frames):
|
||||||
output_format=self.config.output.format,
|
ret, frame = cap.read()
|
||||||
background_color=self.config.output.background_color
|
if not ret:
|
||||||
)
|
break
|
||||||
else:
|
|
||||||
matted_frame = self._create_empty_mask_frame(frame)
|
|
||||||
|
|
||||||
matted_frames.append(matted_frame)
|
# Apply mask to this single frame
|
||||||
|
if frame_idx in video_segments:
|
||||||
|
frame_masks = video_segments[frame_idx]
|
||||||
|
combined_mask = self.sam2_model.get_combined_mask(frame_masks)
|
||||||
|
|
||||||
# Free reloaded frames and video segments completely
|
matted_frame = self.sam2_model.apply_mask_to_frame(
|
||||||
del reloaded_frames
|
frame, combined_mask,
|
||||||
|
output_format=self.config.output.format,
|
||||||
|
background_color=self.config.output.background_color
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
matted_frame = self._create_empty_mask_frame(frame)
|
||||||
|
|
||||||
|
matted_frames.append(matted_frame)
|
||||||
|
|
||||||
|
# Free the original frame immediately (no accumulation)
|
||||||
|
del frame
|
||||||
|
|
||||||
|
# Periodic cleanup during processing
|
||||||
|
if frame_idx % 100 == 0 and frame_idx > 0:
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
finally:
|
||||||
|
cap.release()
|
||||||
|
|
||||||
|
# Free video segments completely
|
||||||
del video_segments # This holds processed masks from SAM2
|
del video_segments # This holds processed masks from SAM2
|
||||||
self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)")
|
self._aggressive_memory_cleanup(f"After streaming mask application ({eye_name} eye)")
|
||||||
|
|
||||||
|
self._print_memory_step(f"Completed streaming mask application ({eye_name} eye)")
|
||||||
return matted_frames
|
return matted_frames
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
Reference in New Issue
Block a user