From 463f881eaf6fe4d48fffd6a315f2dac4026e52bb Mon Sep 17 00:00:00 2001 From: Scott Register Date: Sat, 26 Jul 2025 11:56:51 -0700 Subject: [PATCH] catagory A round 2 --- vr180_matting/video_processor.py | 93 ++++++++++++++++++++++++++++++++ vr180_matting/vr180_processor.py | 64 ++++++++++++++++++++-- 2 files changed, 152 insertions(+), 5 deletions(-) diff --git a/vr180_matting/video_processor.py b/vr180_matting/video_processor.py index 6d99c0d..30f11e9 100644 --- a/vr180_matting/video_processor.py +++ b/vr180_matting/video_processor.py @@ -9,6 +9,9 @@ from tqdm import tqdm import warnings import time import subprocess +import gc +import psutil +import os from .config import VR180Config from .detector import YOLODetector @@ -49,6 +52,96 @@ class VideoProcessor: self._initialize_models() + def _get_process_memory_info(self) -> Dict[str, float]: + """Get detailed memory usage for current process and children""" + current_process = psutil.Process(os.getpid()) + + # Get memory info for current process + memory_info = current_process.memory_info() + current_rss = memory_info.rss / 1024**3 # Convert to GB + current_vms = memory_info.vms / 1024**3 # Virtual memory + + # Get memory info for all children + children_rss = 0 + children_vms = 0 + child_count = 0 + + try: + for child in current_process.children(recursive=True): + try: + child_memory = child.memory_info() + children_rss += child_memory.rss / 1024**3 + children_vms += child_memory.vms / 1024**3 + child_count += 1 + except (psutil.NoSuchProcess, psutil.AccessDenied): + pass + except psutil.NoSuchProcess: + pass + + # System memory info + system_memory = psutil.virtual_memory() + system_total = system_memory.total / 1024**3 + system_available = system_memory.available / 1024**3 + system_used = system_memory.used / 1024**3 + system_percent = system_memory.percent + + return { + 'process_rss_gb': current_rss, + 'process_vms_gb': current_vms, + 'children_rss_gb': children_rss, + 'children_vms_gb': children_vms, + 'total_process_gb': current_rss + children_rss, + 'child_count': child_count, + 'system_total_gb': system_total, + 'system_used_gb': system_used, + 'system_available_gb': system_available, + 'system_percent': system_percent + } + + def _print_memory_step(self, step_name: str): + """Print memory usage for a specific processing step""" + memory_info = self._get_process_memory_info() + + print(f"\n๐Ÿ“Š MEMORY: {step_name}") + print(f" Process RSS: {memory_info['process_rss_gb']:.2f} GB") + if memory_info['children_rss_gb'] > 0: + print(f" Children RSS: {memory_info['children_rss_gb']:.2f} GB ({memory_info['child_count']} processes)") + print(f" Total Process: {memory_info['total_process_gb']:.2f} GB") + print(f" System: {memory_info['system_used_gb']:.1f}/{memory_info['system_total_gb']:.1f} GB ({memory_info['system_percent']:.1f}%)") + print(f" Available: {memory_info['system_available_gb']:.1f} GB") + + def _aggressive_memory_cleanup(self, step_name: str = ""): + """Perform aggressive memory cleanup and report before/after""" + if step_name: + print(f"\n๐Ÿงน CLEANUP: Before {step_name}") + + before_info = self._get_process_memory_info() + before_rss = before_info['total_process_gb'] + + # Multiple rounds of garbage collection + for i in range(3): + gc.collect() + + # Clear torch cache if available + try: + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.synchronize() + except ImportError: + pass + + # Brief pause to allow cleanup + time.sleep(0.1) + + after_info = self._get_process_memory_info() + after_rss = after_info['total_process_gb'] + freed_memory = before_rss - after_rss + + if step_name: + print(f" Before: {before_rss:.2f} GB โ†’ After: {after_rss:.2f} GB") + print(f" Freed: {freed_memory:.2f} GB") + def _initialize_models(self): """Initialize YOLO detector and SAM2 model""" print("Initializing models...") diff --git a/vr180_matting/vr180_processor.py b/vr180_matting/vr180_processor.py index 5d8e230..9c375ae 100644 --- a/vr180_matting/vr180_processor.py +++ b/vr180_matting/vr180_processor.py @@ -232,6 +232,9 @@ class VR180Processor(VideoProcessor): # Use the original video's framerate - access through parent class original_fps = self.fps if hasattr(self, 'fps') else 30.0 print(f"Using framerate: {original_fps} fps") + + # Memory monitoring before ffmpeg + self._print_memory_step(f"Before ffmpeg encoding ({eye_name} eye)") # Try GPU encoding first, fallback to CPU gpu_cmd = [ 'ffmpeg', '-y', # -y to overwrite output file @@ -283,6 +286,9 @@ class VR180Processor(VideoProcessor): print(f"Created temp video successfully") + # Memory monitoring after ffmpeg + self._print_memory_step(f"After ffmpeg encoding ({eye_name} eye)") + # Verify the file was created and has content if not temp_video_path.exists(): raise RuntimeError(f"Temporary video file was not created: {temp_video_path}") @@ -293,16 +299,28 @@ class VR180Processor(VideoProcessor): print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)") + # Memory monitoring and cleanup before SAM2 initialization + num_frames = len(eye_frames) # Store count before freeing + first_frame = eye_frames[0].copy() # Copy first frame for detection before freeing + self._print_memory_step(f"Before SAM2 init ({eye_name} eye, {num_frames} frames)") + + # CRITICAL: Explicitly free eye_frames from memory before SAM2 loads the same video + # This prevents the OOM issue where both Python frames and SAM2 frames exist simultaneously + del eye_frames # Free the frames array + self._aggressive_memory_cleanup(f"SAM2 init for {eye_name} eye") + # Initialize SAM2 with video path + self._print_memory_step(f"Starting SAM2 init ({eye_name} eye)") self.sam2_model.init_video_state(video_path=str(temp_video_path)) + self._print_memory_step(f"SAM2 initialized ({eye_name} eye)") # Detect persons in first frame - first_frame = eye_frames[0] detections = self.detector.detect_persons(first_frame) if not detections: warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}") - return self._create_empty_masks(eye_frames) + # Return empty masks for the number of frames + return self._create_empty_masks_from_count(num_frames, first_frame.shape) print(f"Detected {len(detections)} persons in {eye_name} eye first frame") @@ -312,15 +330,33 @@ class VR180Processor(VideoProcessor): # Add prompts object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels) - # Propagate masks + # Propagate masks (most expensive operation) + self._print_memory_step(f"Before SAM2 propagation ({eye_name} eye, {num_frames} frames)") video_segments = self.sam2_model.propagate_masks( start_frame=0, - max_frames=len(eye_frames) + max_frames=num_frames ) + self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)") + + # Apply masks - need to reload frames from temp video since we freed the original frames + self._print_memory_step(f"Before reloading frames for mask application ({eye_name} eye)") + + # Read frames back from the temp video for mask application + cap = cv2.VideoCapture(str(temp_video_path)) + reloaded_frames = [] + + for frame_idx in range(num_frames): + ret, frame = cap.read() + if not ret: + break + reloaded_frames.append(frame) + cap.release() + + self._print_memory_step(f"Reloaded {len(reloaded_frames)} frames for mask application") # Apply masks matted_frames = [] - for frame_idx, frame in enumerate(eye_frames): + for frame_idx, frame in enumerate(reloaded_frames): if frame_idx in video_segments: frame_masks = video_segments[frame_idx] combined_mask = self.sam2_model.get_combined_mask(frame_masks) @@ -335,6 +371,10 @@ class VR180Processor(VideoProcessor): matted_frames.append(matted_frame) + # Free reloaded frames + del reloaded_frames + self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)") + return matted_frames finally: @@ -410,6 +450,20 @@ class VR180Processor(VideoProcessor): return validated_frames + def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]: + """Create empty masks when no persons detected (without frame array)""" + empty_frames = [] + for _ in range(num_frames): + if self.config.output.format == "alpha": + # Transparent output + output = np.zeros((frame_shape[0], frame_shape[1], 4), dtype=np.uint8) + else: + # Green screen background + output = np.full((frame_shape[0], frame_shape[1], 3), + self.config.output.background_color, dtype=np.uint8) + empty_frames.append(output) + return empty_frames + def _get_mask_area(self, frame: np.ndarray) -> float: """Get mask area from processed frame""" if frame.shape[2] == 4: # Alpha channel