This commit is contained in:
2025-07-27 09:52:56 -07:00
parent 43be574729
commit 66895a87a0
3 changed files with 298 additions and 66 deletions

View File

@@ -15,7 +15,7 @@ import warnings
from .frame_reader import StreamingFrameReader
from .frame_writer import StreamingFrameWriter
from .stereo_manager import StereoConsistencyManager
from .sam2_streaming import SAM2StreamingProcessor
from .sam2_streaming_simple import SAM2StreamingProcessor
from .detector import PersonDetector
from .config import StreamingConfig
@@ -102,26 +102,17 @@ class VR180StreamingProcessor:
self.initialize()
self.start_time = time.time()
# Initialize SAM2 states for both eyes (streaming mode - no video loading)
print("🎯 Initializing SAM2 streaming states...")
video_info = self.frame_reader.get_video_info()
left_state = self.sam2_processor.init_state(
video_info,
eye='left'
)
right_state = self.sam2_processor.init_state(
video_info,
eye='right'
)
# Simple SAM2 initialization (no complex state management needed)
print("🎯 SAM2 streaming processor ready...")
# Process first frame to establish detections
print("🔍 Processing first frame for initial detection...")
if not self._initialize_tracking(left_state, right_state):
if not self._initialize_tracking():
raise RuntimeError("Failed to initialize tracking - no persons detected")
# Main streaming loop
print("\n🎬 Starting streaming processing loop...")
self._streaming_loop(left_state, right_state)
self._streaming_loop()
except KeyboardInterrupt:
print("\n⚠️ Processing interrupted by user")
@@ -135,7 +126,7 @@ class VR180StreamingProcessor:
finally:
self._finalize()
def _initialize_tracking(self, left_state: Dict, right_state: Dict) -> bool:
def _initialize_tracking(self) -> bool:
"""Initialize tracking with first frame detection"""
# Read and process first frame
first_frame = self.frame_reader.read_frame()
@@ -159,19 +150,15 @@ class VR180StreamingProcessor:
print(f" Detected {len(detections)} person(s) in first frame")
# Add detections to both eyes (streaming - pass frame data)
self.sam2_processor.add_detections(left_state, left_eye, detections, frame_idx=0)
# Process with simple SAM2 approach
left_masks = self.sam2_processor.add_frame_and_detections(left_eye, detections, 0)
# Transfer detections to slave eye
# Transfer detections to right eye
transferred_detections = self.stereo_manager.transfer_detections(
detections,
'left_to_right' if self.stereo_manager.master_eye == 'left' else 'right_to_left'
)
self.sam2_processor.add_detections(right_state, right_eye, transferred_detections, frame_idx=0)
# Process and write first frame
left_masks = self.sam2_processor.propagate_single_frame(left_state, left_eye, 0)
right_masks = self.sam2_processor.propagate_single_frame(right_state, right_eye, 0)
right_masks = self.sam2_processor.add_frame_and_detections(right_eye, transferred_detections, 0)
# Apply masks and write
processed_frame = self._apply_masks_to_frame(first_frame, left_masks, right_masks)
@@ -180,7 +167,7 @@ class VR180StreamingProcessor:
self.frames_processed = 1
return True
def _streaming_loop(self, left_state: Dict, right_state: Dict) -> None:
def _streaming_loop(self) -> None:
"""Main streaming processing loop"""
frame_times = []
last_log_time = time.time()
@@ -196,9 +183,9 @@ class VR180StreamingProcessor:
# Split into eyes
left_eye, right_eye = self.stereo_manager.split_frame(frame)
# Propagate masks for both eyes (streaming approach)
left_masks = self.sam2_processor.propagate_single_frame(left_state, left_eye, frame_idx)
right_masks = self.sam2_processor.propagate_single_frame(right_state, right_eye, frame_idx)
# Process frames with simple approach (no detections in regular frames)
left_masks = self.sam2_processor.add_frame_and_detections(left_eye, [], frame_idx)
right_masks = self.sam2_processor.add_frame_and_detections(right_eye, [], frame_idx)
# Validate stereo consistency
right_masks = self.stereo_manager.validate_masks(
@@ -208,9 +195,7 @@ class VR180StreamingProcessor:
# Apply continuous correction if enabled
if (self.config.matting.continuous_correction and
frame_idx % self.config.matting.correction_interval == 0):
self._apply_continuous_correction(
left_state, right_state, left_eye, right_eye, frame_idx
)
self._apply_continuous_correction(left_eye, right_eye, frame_idx)
# Apply masks and write frame
processed_frame = self._apply_masks_to_frame(frame, left_masks, right_masks)
@@ -282,21 +267,20 @@ class VR180StreamingProcessor:
return left_processed
def _apply_continuous_correction(self,
left_state: Dict,
right_state: Dict,
left_eye: np.ndarray,
right_eye: np.ndarray,
frame_idx: int) -> None:
"""Apply continuous correction to maintain tracking accuracy"""
print(f"\n🔄 Applying continuous correction at frame {frame_idx}")
# Detect on master eye
# Detect on master eye and add fresh detections
master_eye = left_eye if self.stereo_manager.master_eye == 'left' else right_eye
master_state = left_state if self.stereo_manager.master_eye == 'left' else right_state
detections = self.detector.detect_persons(master_eye)
self.sam2_processor.apply_continuous_correction(
master_state, master_eye, frame_idx, self.detector
)
if detections:
print(f" Adding {len(detections)} fresh detection(s) for correction")
# Add fresh detections to help correct drift
self.sam2_processor.add_frame_and_detections(master_eye, detections, frame_idx)
# Transfer corrections to slave eye
# Note: This is simplified - actual implementation would transfer the refined prompts