catagory A round 2

This commit is contained in:
2025-07-26 11:56:51 -07:00
parent b642b562f0
commit 463f881eaf
2 changed files with 152 additions and 5 deletions

View File

@@ -232,6 +232,9 @@ class VR180Processor(VideoProcessor):
# Use the original video's framerate - access through parent class
original_fps = self.fps if hasattr(self, 'fps') else 30.0
print(f"Using framerate: {original_fps} fps")
# Memory monitoring before ffmpeg
self._print_memory_step(f"Before ffmpeg encoding ({eye_name} eye)")
# Try GPU encoding first, fallback to CPU
gpu_cmd = [
'ffmpeg', '-y', # -y to overwrite output file
@@ -283,6 +286,9 @@ class VR180Processor(VideoProcessor):
print(f"Created temp video successfully")
# Memory monitoring after ffmpeg
self._print_memory_step(f"After ffmpeg encoding ({eye_name} eye)")
# Verify the file was created and has content
if not temp_video_path.exists():
raise RuntimeError(f"Temporary video file was not created: {temp_video_path}")
@@ -293,16 +299,28 @@ class VR180Processor(VideoProcessor):
print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)")
# Memory monitoring and cleanup before SAM2 initialization
num_frames = len(eye_frames) # Store count before freeing
first_frame = eye_frames[0].copy() # Copy first frame for detection before freeing
self._print_memory_step(f"Before SAM2 init ({eye_name} eye, {num_frames} frames)")
# CRITICAL: Explicitly free eye_frames from memory before SAM2 loads the same video
# This prevents the OOM issue where both Python frames and SAM2 frames exist simultaneously
del eye_frames # Free the frames array
self._aggressive_memory_cleanup(f"SAM2 init for {eye_name} eye")
# Initialize SAM2 with video path
self._print_memory_step(f"Starting SAM2 init ({eye_name} eye)")
self.sam2_model.init_video_state(video_path=str(temp_video_path))
self._print_memory_step(f"SAM2 initialized ({eye_name} eye)")
# Detect persons in first frame
first_frame = eye_frames[0]
detections = self.detector.detect_persons(first_frame)
if not detections:
warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}")
return self._create_empty_masks(eye_frames)
# Return empty masks for the number of frames
return self._create_empty_masks_from_count(num_frames, first_frame.shape)
print(f"Detected {len(detections)} persons in {eye_name} eye first frame")
@@ -312,15 +330,33 @@ class VR180Processor(VideoProcessor):
# Add prompts
object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels)
# Propagate masks
# Propagate masks (most expensive operation)
self._print_memory_step(f"Before SAM2 propagation ({eye_name} eye, {num_frames} frames)")
video_segments = self.sam2_model.propagate_masks(
start_frame=0,
max_frames=len(eye_frames)
max_frames=num_frames
)
self._print_memory_step(f"After SAM2 propagation ({eye_name} eye)")
# Apply masks - need to reload frames from temp video since we freed the original frames
self._print_memory_step(f"Before reloading frames for mask application ({eye_name} eye)")
# Read frames back from the temp video for mask application
cap = cv2.VideoCapture(str(temp_video_path))
reloaded_frames = []
for frame_idx in range(num_frames):
ret, frame = cap.read()
if not ret:
break
reloaded_frames.append(frame)
cap.release()
self._print_memory_step(f"Reloaded {len(reloaded_frames)} frames for mask application")
# Apply masks
matted_frames = []
for frame_idx, frame in enumerate(eye_frames):
for frame_idx, frame in enumerate(reloaded_frames):
if frame_idx in video_segments:
frame_masks = video_segments[frame_idx]
combined_mask = self.sam2_model.get_combined_mask(frame_masks)
@@ -335,6 +371,10 @@ class VR180Processor(VideoProcessor):
matted_frames.append(matted_frame)
# Free reloaded frames
del reloaded_frames
self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)")
return matted_frames
finally:
@@ -410,6 +450,20 @@ class VR180Processor(VideoProcessor):
return validated_frames
def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]:
"""Create empty masks when no persons detected (without frame array)"""
empty_frames = []
for _ in range(num_frames):
if self.config.output.format == "alpha":
# Transparent output
output = np.zeros((frame_shape[0], frame_shape[1], 4), dtype=np.uint8)
else:
# Green screen background
output = np.full((frame_shape[0], frame_shape[1], 3),
self.config.output.background_color, dtype=np.uint8)
empty_frames.append(output)
return empty_frames
def _get_mask_area(self, frame: np.ndarray) -> float:
"""Get mask area from processed frame"""
if frame.shape[2] == 4: # Alpha channel