catagory A round 2

This commit is contained in:
2025-07-26 11:56:51 -07:00
parent b642b562f0
commit 463f881eaf
2 changed files with 152 additions and 5 deletions

View File

@@ -9,6 +9,9 @@ from tqdm import tqdm
import warnings import warnings
import time import time
import subprocess import subprocess
import gc
import psutil
import os
from .config import VR180Config from .config import VR180Config
from .detector import YOLODetector from .detector import YOLODetector
@@ -49,6 +52,96 @@ class VideoProcessor:
self._initialize_models() self._initialize_models()
def _get_process_memory_info(self) -> Dict[str, float]:
"""Get detailed memory usage for current process and children"""
current_process = psutil.Process(os.getpid())
# Get memory info for current process
memory_info = current_process.memory_info()
current_rss = memory_info.rss / 1024**3 # Convert to GB
current_vms = memory_info.vms / 1024**3 # Virtual memory
# Get memory info for all children
children_rss = 0
children_vms = 0
child_count = 0
try:
for child in current_process.children(recursive=True):
try:
child_memory = child.memory_info()
children_rss += child_memory.rss / 1024**3
children_vms += child_memory.vms / 1024**3
child_count += 1
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
except psutil.NoSuchProcess:
pass
# System memory info
system_memory = psutil.virtual_memory()
system_total = system_memory.total / 1024**3
system_available = system_memory.available / 1024**3
system_used = system_memory.used / 1024**3
system_percent = system_memory.percent
return {
'process_rss_gb': current_rss,
'process_vms_gb': current_vms,
'children_rss_gb': children_rss,
'children_vms_gb': children_vms,
'total_process_gb': current_rss + children_rss,
'child_count': child_count,
'system_total_gb': system_total,
'system_used_gb': system_used,
'system_available_gb': system_available,
'system_percent': system_percent
}
def _print_memory_step(self, step_name: str):
"""Print memory usage for a specific processing step"""
memory_info = self._get_process_memory_info()
print(f"\n📊 MEMORY: {step_name}")
print(f" Process RSS: {memory_info['process_rss_gb']:.2f} GB")
if memory_info['children_rss_gb'] > 0:
print(f" Children RSS: {memory_info['children_rss_gb']:.2f} GB ({memory_info['child_count']} processes)")
print(f" Total Process: {memory_info['total_process_gb']:.2f} GB")
print(f" System: {memory_info['system_used_gb']:.1f}/{memory_info['system_total_gb']:.1f} GB ({memory_info['system_percent']:.1f}%)")
print(f" Available: {memory_info['system_available_gb']:.1f} GB")
def _aggressive_memory_cleanup(self, step_name: str = ""):
"""Perform aggressive memory cleanup and report before/after"""
if step_name:
print(f"\n🧹 CLEANUP: Before {step_name}")
before_info = self._get_process_memory_info()
before_rss = before_info['total_process_gb']
# Multiple rounds of garbage collection
for i in range(3):
gc.collect()
# Clear torch cache if available
try:
import torch
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
except ImportError:
pass
# Brief pause to allow cleanup
time.sleep(0.1)
after_info = self._get_process_memory_info()
after_rss = after_info['total_process_gb']
freed_memory = before_rss - after_rss
if step_name:
print(f" Before: {before_rss:.2f} GB → After: {after_rss:.2f} GB")
print(f" Freed: {freed_memory:.2f} GB")
def _initialize_models(self): def _initialize_models(self):
"""Initialize YOLO detector and SAM2 model""" """Initialize YOLO detector and SAM2 model"""
print("Initializing models...") print("Initializing models...")

View File

@@ -232,6 +232,9 @@ class VR180Processor(VideoProcessor):
# Use the original video's framerate - access through parent class # Use the original video's framerate - access through parent class
original_fps = self.fps if hasattr(self, 'fps') else 30.0 original_fps = self.fps if hasattr(self, 'fps') else 30.0
print(f"Using framerate: {original_fps} fps") print(f"Using framerate: {original_fps} fps")
# Memory monitoring before ffmpeg
self._print_memory_step(f"Before ffmpeg encoding ({eye_name} eye)")
# Try GPU encoding first, fallback to CPU # Try GPU encoding first, fallback to CPU
gpu_cmd = [ gpu_cmd = [
'ffmpeg', '-y', # -y to overwrite output file 'ffmpeg', '-y', # -y to overwrite output file
@@ -283,6 +286,9 @@ class VR180Processor(VideoProcessor):
print(f"Created temp video successfully") print(f"Created temp video successfully")
# Memory monitoring after ffmpeg
self._print_memory_step(f"After ffmpeg encoding ({eye_name} eye)")
# Verify the file was created and has content # Verify the file was created and has content
if not temp_video_path.exists(): if not temp_video_path.exists():
raise RuntimeError(f"Temporary video file was not created: {temp_video_path}") raise RuntimeError(f"Temporary video file was not created: {temp_video_path}")
@@ -293,16 +299,28 @@ class VR180Processor(VideoProcessor):
print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)") print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)")
# Memory monitoring and cleanup before SAM2 initialization
num_frames = len(eye_frames) # Store count before freeing
first_frame = eye_frames[0].copy() # Copy first frame for detection before freeing
self._print_memory_step(f"Before SAM2 init ({eye_name} eye, {num_frames} frames)")
# CRITICAL: Explicitly free eye_frames from memory before SAM2 loads the same video
# This prevents the OOM issue where both Python frames and SAM2 frames exist simultaneously
del eye_frames # Free the frames array
self._aggressive_memory_cleanup(f"SAM2 init for {eye_name} eye")
# Initialize SAM2 with video path # Initialize SAM2 with video path
self._print_memory_step(f"Starting SAM2 init ({eye_name} eye)")
self.sam2_model.init_video_state(video_path=str(temp_video_path)) self.sam2_model.init_video_state(video_path=str(temp_video_path))
self._print_memory_step(f"SAM2 initialized ({eye_name} eye)")
# Detect persons in first frame # Detect persons in first frame
first_frame = eye_frames[0]
detections = self.detector.detect_persons(first_frame) detections = self.detector.detect_persons(first_frame)
if not detections: if not detections:
warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}") warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}")
return self._create_empty_masks(eye_frames) # Return empty masks for the number of frames
return self._create_empty_masks_from_count(num_frames, first_frame.shape)
print(f"Detected {len(detections)} persons in {eye_name} eye first frame") print(f"Detected {len(detections)} persons in {eye_name} eye first frame")
@@ -312,15 +330,33 @@ class VR180Processor(VideoProcessor):
# Add prompts # Add prompts
object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels) object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels)
# Propagate masks # Propagate masks (most expensive operation)
self._print_memory_step(f"Before SAM2 propagation ({eye_name} eye, {num_frames} frames)")
video_segments = self.sam2_model.propagate_masks( video_segments = self.sam2_model.propagate_masks(
start_frame=0, start_frame=0,
max_frames=len(eye_frames) max_frames=num_frames
) )
self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)")
# Apply masks - need to reload frames from temp video since we freed the original frames
self._print_memory_step(f"Before reloading frames for mask application ({eye_name} eye)")
# Read frames back from the temp video for mask application
cap = cv2.VideoCapture(str(temp_video_path))
reloaded_frames = []
for frame_idx in range(num_frames):
ret, frame = cap.read()
if not ret:
break
reloaded_frames.append(frame)
cap.release()
self._print_memory_step(f"Reloaded {len(reloaded_frames)} frames for mask application")
# Apply masks # Apply masks
matted_frames = [] matted_frames = []
for frame_idx, frame in enumerate(eye_frames): for frame_idx, frame in enumerate(reloaded_frames):
if frame_idx in video_segments: if frame_idx in video_segments:
frame_masks = video_segments[frame_idx] frame_masks = video_segments[frame_idx]
combined_mask = self.sam2_model.get_combined_mask(frame_masks) combined_mask = self.sam2_model.get_combined_mask(frame_masks)
@@ -335,6 +371,10 @@ class VR180Processor(VideoProcessor):
matted_frames.append(matted_frame) matted_frames.append(matted_frame)
# Free reloaded frames
del reloaded_frames
self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)")
return matted_frames return matted_frames
finally: finally:
@@ -410,6 +450,20 @@ class VR180Processor(VideoProcessor):
return validated_frames return validated_frames
def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]:
"""Create empty masks when no persons detected (without frame array)"""
empty_frames = []
for _ in range(num_frames):
if self.config.output.format == "alpha":
# Transparent output
output = np.zeros((frame_shape[0], frame_shape[1], 4), dtype=np.uint8)
else:
# Green screen background
output = np.full((frame_shape[0], frame_shape[1], 3),
self.config.output.background_color, dtype=np.uint8)
empty_frames.append(output)
return empty_frames
def _get_mask_area(self, frame: np.ndarray) -> float: def _get_mask_area(self, frame: np.ndarray) -> float:
"""Get mask area from processed frame""" """Get mask area from processed frame"""
if frame.shape[2] == 4: # Alpha channel if frame.shape[2] == 4: # Alpha channel