catagory A round 2
This commit is contained in:
@@ -9,6 +9,9 @@ from tqdm import tqdm
|
|||||||
import warnings
|
import warnings
|
||||||
import time
|
import time
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import gc
|
||||||
|
import psutil
|
||||||
|
import os
|
||||||
|
|
||||||
from .config import VR180Config
|
from .config import VR180Config
|
||||||
from .detector import YOLODetector
|
from .detector import YOLODetector
|
||||||
@@ -49,6 +52,96 @@ class VideoProcessor:
|
|||||||
|
|
||||||
self._initialize_models()
|
self._initialize_models()
|
||||||
|
|
||||||
|
def _get_process_memory_info(self) -> Dict[str, float]:
|
||||||
|
"""Get detailed memory usage for current process and children"""
|
||||||
|
current_process = psutil.Process(os.getpid())
|
||||||
|
|
||||||
|
# Get memory info for current process
|
||||||
|
memory_info = current_process.memory_info()
|
||||||
|
current_rss = memory_info.rss / 1024**3 # Convert to GB
|
||||||
|
current_vms = memory_info.vms / 1024**3 # Virtual memory
|
||||||
|
|
||||||
|
# Get memory info for all children
|
||||||
|
children_rss = 0
|
||||||
|
children_vms = 0
|
||||||
|
child_count = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
for child in current_process.children(recursive=True):
|
||||||
|
try:
|
||||||
|
child_memory = child.memory_info()
|
||||||
|
children_rss += child_memory.rss / 1024**3
|
||||||
|
children_vms += child_memory.vms / 1024**3
|
||||||
|
child_count += 1
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||||
|
pass
|
||||||
|
except psutil.NoSuchProcess:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# System memory info
|
||||||
|
system_memory = psutil.virtual_memory()
|
||||||
|
system_total = system_memory.total / 1024**3
|
||||||
|
system_available = system_memory.available / 1024**3
|
||||||
|
system_used = system_memory.used / 1024**3
|
||||||
|
system_percent = system_memory.percent
|
||||||
|
|
||||||
|
return {
|
||||||
|
'process_rss_gb': current_rss,
|
||||||
|
'process_vms_gb': current_vms,
|
||||||
|
'children_rss_gb': children_rss,
|
||||||
|
'children_vms_gb': children_vms,
|
||||||
|
'total_process_gb': current_rss + children_rss,
|
||||||
|
'child_count': child_count,
|
||||||
|
'system_total_gb': system_total,
|
||||||
|
'system_used_gb': system_used,
|
||||||
|
'system_available_gb': system_available,
|
||||||
|
'system_percent': system_percent
|
||||||
|
}
|
||||||
|
|
||||||
|
def _print_memory_step(self, step_name: str):
|
||||||
|
"""Print memory usage for a specific processing step"""
|
||||||
|
memory_info = self._get_process_memory_info()
|
||||||
|
|
||||||
|
print(f"\n📊 MEMORY: {step_name}")
|
||||||
|
print(f" Process RSS: {memory_info['process_rss_gb']:.2f} GB")
|
||||||
|
if memory_info['children_rss_gb'] > 0:
|
||||||
|
print(f" Children RSS: {memory_info['children_rss_gb']:.2f} GB ({memory_info['child_count']} processes)")
|
||||||
|
print(f" Total Process: {memory_info['total_process_gb']:.2f} GB")
|
||||||
|
print(f" System: {memory_info['system_used_gb']:.1f}/{memory_info['system_total_gb']:.1f} GB ({memory_info['system_percent']:.1f}%)")
|
||||||
|
print(f" Available: {memory_info['system_available_gb']:.1f} GB")
|
||||||
|
|
||||||
|
def _aggressive_memory_cleanup(self, step_name: str = ""):
|
||||||
|
"""Perform aggressive memory cleanup and report before/after"""
|
||||||
|
if step_name:
|
||||||
|
print(f"\n🧹 CLEANUP: Before {step_name}")
|
||||||
|
|
||||||
|
before_info = self._get_process_memory_info()
|
||||||
|
before_rss = before_info['total_process_gb']
|
||||||
|
|
||||||
|
# Multiple rounds of garbage collection
|
||||||
|
for i in range(3):
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
# Clear torch cache if available
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Brief pause to allow cleanup
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
after_info = self._get_process_memory_info()
|
||||||
|
after_rss = after_info['total_process_gb']
|
||||||
|
freed_memory = before_rss - after_rss
|
||||||
|
|
||||||
|
if step_name:
|
||||||
|
print(f" Before: {before_rss:.2f} GB → After: {after_rss:.2f} GB")
|
||||||
|
print(f" Freed: {freed_memory:.2f} GB")
|
||||||
|
|
||||||
def _initialize_models(self):
|
def _initialize_models(self):
|
||||||
"""Initialize YOLO detector and SAM2 model"""
|
"""Initialize YOLO detector and SAM2 model"""
|
||||||
print("Initializing models...")
|
print("Initializing models...")
|
||||||
|
|||||||
@@ -232,6 +232,9 @@ class VR180Processor(VideoProcessor):
|
|||||||
# Use the original video's framerate - access through parent class
|
# Use the original video's framerate - access through parent class
|
||||||
original_fps = self.fps if hasattr(self, 'fps') else 30.0
|
original_fps = self.fps if hasattr(self, 'fps') else 30.0
|
||||||
print(f"Using framerate: {original_fps} fps")
|
print(f"Using framerate: {original_fps} fps")
|
||||||
|
|
||||||
|
# Memory monitoring before ffmpeg
|
||||||
|
self._print_memory_step(f"Before ffmpeg encoding ({eye_name} eye)")
|
||||||
# Try GPU encoding first, fallback to CPU
|
# Try GPU encoding first, fallback to CPU
|
||||||
gpu_cmd = [
|
gpu_cmd = [
|
||||||
'ffmpeg', '-y', # -y to overwrite output file
|
'ffmpeg', '-y', # -y to overwrite output file
|
||||||
@@ -283,6 +286,9 @@ class VR180Processor(VideoProcessor):
|
|||||||
|
|
||||||
print(f"Created temp video successfully")
|
print(f"Created temp video successfully")
|
||||||
|
|
||||||
|
# Memory monitoring after ffmpeg
|
||||||
|
self._print_memory_step(f"After ffmpeg encoding ({eye_name} eye)")
|
||||||
|
|
||||||
# Verify the file was created and has content
|
# Verify the file was created and has content
|
||||||
if not temp_video_path.exists():
|
if not temp_video_path.exists():
|
||||||
raise RuntimeError(f"Temporary video file was not created: {temp_video_path}")
|
raise RuntimeError(f"Temporary video file was not created: {temp_video_path}")
|
||||||
@@ -293,16 +299,28 @@ class VR180Processor(VideoProcessor):
|
|||||||
|
|
||||||
print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)")
|
print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)")
|
||||||
|
|
||||||
|
# Memory monitoring and cleanup before SAM2 initialization
|
||||||
|
num_frames = len(eye_frames) # Store count before freeing
|
||||||
|
first_frame = eye_frames[0].copy() # Copy first frame for detection before freeing
|
||||||
|
self._print_memory_step(f"Before SAM2 init ({eye_name} eye, {num_frames} frames)")
|
||||||
|
|
||||||
|
# CRITICAL: Explicitly free eye_frames from memory before SAM2 loads the same video
|
||||||
|
# This prevents the OOM issue where both Python frames and SAM2 frames exist simultaneously
|
||||||
|
del eye_frames # Free the frames array
|
||||||
|
self._aggressive_memory_cleanup(f"SAM2 init for {eye_name} eye")
|
||||||
|
|
||||||
# Initialize SAM2 with video path
|
# Initialize SAM2 with video path
|
||||||
|
self._print_memory_step(f"Starting SAM2 init ({eye_name} eye)")
|
||||||
self.sam2_model.init_video_state(video_path=str(temp_video_path))
|
self.sam2_model.init_video_state(video_path=str(temp_video_path))
|
||||||
|
self._print_memory_step(f"SAM2 initialized ({eye_name} eye)")
|
||||||
|
|
||||||
# Detect persons in first frame
|
# Detect persons in first frame
|
||||||
first_frame = eye_frames[0]
|
|
||||||
detections = self.detector.detect_persons(first_frame)
|
detections = self.detector.detect_persons(first_frame)
|
||||||
|
|
||||||
if not detections:
|
if not detections:
|
||||||
warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}")
|
warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}")
|
||||||
return self._create_empty_masks(eye_frames)
|
# Return empty masks for the number of frames
|
||||||
|
return self._create_empty_masks_from_count(num_frames, first_frame.shape)
|
||||||
|
|
||||||
print(f"Detected {len(detections)} persons in {eye_name} eye first frame")
|
print(f"Detected {len(detections)} persons in {eye_name} eye first frame")
|
||||||
|
|
||||||
@@ -312,15 +330,33 @@ class VR180Processor(VideoProcessor):
|
|||||||
# Add prompts
|
# Add prompts
|
||||||
object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels)
|
object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels)
|
||||||
|
|
||||||
# Propagate masks
|
# Propagate masks (most expensive operation)
|
||||||
|
self._print_memory_step(f"Before SAM2 propagation ({eye_name} eye, {num_frames} frames)")
|
||||||
video_segments = self.sam2_model.propagate_masks(
|
video_segments = self.sam2_model.propagate_masks(
|
||||||
start_frame=0,
|
start_frame=0,
|
||||||
max_frames=len(eye_frames)
|
max_frames=num_frames
|
||||||
)
|
)
|
||||||
|
self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)")
|
||||||
|
|
||||||
|
# Apply masks - need to reload frames from temp video since we freed the original frames
|
||||||
|
self._print_memory_step(f"Before reloading frames for mask application ({eye_name} eye)")
|
||||||
|
|
||||||
|
# Read frames back from the temp video for mask application
|
||||||
|
cap = cv2.VideoCapture(str(temp_video_path))
|
||||||
|
reloaded_frames = []
|
||||||
|
|
||||||
|
for frame_idx in range(num_frames):
|
||||||
|
ret, frame = cap.read()
|
||||||
|
if not ret:
|
||||||
|
break
|
||||||
|
reloaded_frames.append(frame)
|
||||||
|
cap.release()
|
||||||
|
|
||||||
|
self._print_memory_step(f"Reloaded {len(reloaded_frames)} frames for mask application")
|
||||||
|
|
||||||
# Apply masks
|
# Apply masks
|
||||||
matted_frames = []
|
matted_frames = []
|
||||||
for frame_idx, frame in enumerate(eye_frames):
|
for frame_idx, frame in enumerate(reloaded_frames):
|
||||||
if frame_idx in video_segments:
|
if frame_idx in video_segments:
|
||||||
frame_masks = video_segments[frame_idx]
|
frame_masks = video_segments[frame_idx]
|
||||||
combined_mask = self.sam2_model.get_combined_mask(frame_masks)
|
combined_mask = self.sam2_model.get_combined_mask(frame_masks)
|
||||||
@@ -335,6 +371,10 @@ class VR180Processor(VideoProcessor):
|
|||||||
|
|
||||||
matted_frames.append(matted_frame)
|
matted_frames.append(matted_frame)
|
||||||
|
|
||||||
|
# Free reloaded frames
|
||||||
|
del reloaded_frames
|
||||||
|
self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)")
|
||||||
|
|
||||||
return matted_frames
|
return matted_frames
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@@ -410,6 +450,20 @@ class VR180Processor(VideoProcessor):
|
|||||||
|
|
||||||
return validated_frames
|
return validated_frames
|
||||||
|
|
||||||
|
def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]:
|
||||||
|
"""Create empty masks when no persons detected (without frame array)"""
|
||||||
|
empty_frames = []
|
||||||
|
for _ in range(num_frames):
|
||||||
|
if self.config.output.format == "alpha":
|
||||||
|
# Transparent output
|
||||||
|
output = np.zeros((frame_shape[0], frame_shape[1], 4), dtype=np.uint8)
|
||||||
|
else:
|
||||||
|
# Green screen background
|
||||||
|
output = np.full((frame_shape[0], frame_shape[1], 3),
|
||||||
|
self.config.output.background_color, dtype=np.uint8)
|
||||||
|
empty_frames.append(output)
|
||||||
|
return empty_frames
|
||||||
|
|
||||||
def _get_mask_area(self, frame: np.ndarray) -> float:
|
def _get_mask_area(self, frame: np.ndarray) -> float:
|
||||||
"""Get mask area from processed frame"""
|
"""Get mask area from processed frame"""
|
||||||
if frame.shape[2] == 4: # Alpha channel
|
if frame.shape[2] == 4: # Alpha channel
|
||||||
|
|||||||
Reference in New Issue
Block a user