simplify

2025-07-27 09:52:56 -07:00
parent 43be574729
commit 66895a87a0
3 changed files with 298 additions and 66 deletions
--- a/vr180_streaming/streaming_processor.py
+++ b/vr180_streaming/streaming_processor.py
@@ -15,7 +15,7 @@ import warnings
 from .frame_reader import StreamingFrameReader
 from .frame_writer import StreamingFrameWriter
 from .stereo_manager import StereoConsistencyManager
-from .sam2_streaming import SAM2StreamingProcessor
+from .sam2_streaming_simple import SAM2StreamingProcessor
 from .detector import PersonDetector
 from .config import StreamingConfig

@@ -102,26 +102,17 @@ class VR180StreamingProcessor:
            self.initialize()
            self.start_time = time.time()
            
-            # Initialize SAM2 states for both eyes (streaming mode - no video loading)
-            print("🎯 Initializing SAM2 streaming states...")
-            video_info = self.frame_reader.get_video_info()
-            left_state = self.sam2_processor.init_state(
-                video_info,
-                eye='left'
-            )
-            right_state = self.sam2_processor.init_state(
-                video_info,
-                eye='right'
-            )
+            # Simple SAM2 initialization (no complex state management needed)
+            print("🎯 SAM2 streaming processor ready...")
            
            # Process first frame to establish detections
            print("🔍 Processing first frame for initial detection...")
-            if not self._initialize_tracking(left_state, right_state):
+            if not self._initialize_tracking():
                raise RuntimeError("Failed to initialize tracking - no persons detected")
                
            # Main streaming loop
            print("\n🎬 Starting streaming processing loop...")
-            self._streaming_loop(left_state, right_state)
+            self._streaming_loop()
            
        except KeyboardInterrupt:
            print("\n⚠️  Processing interrupted by user")
@@ -135,7 +126,7 @@ class VR180StreamingProcessor:
        finally:
            self._finalize()
            
-    def _initialize_tracking(self, left_state: Dict, right_state: Dict) -> bool:
+    def _initialize_tracking(self) -> bool:
        """Initialize tracking with first frame detection"""
        # Read and process first frame
        first_frame = self.frame_reader.read_frame()
@@ -159,19 +150,15 @@ class VR180StreamingProcessor:
            
        print(f"   Detected {len(detections)} person(s) in first frame")
        
-        # Add detections to both eyes (streaming - pass frame data)
-        self.sam2_processor.add_detections(left_state, left_eye, detections, frame_idx=0)
+        # Process with simple SAM2 approach
+        left_masks = self.sam2_processor.add_frame_and_detections(left_eye, detections, 0)
        
-        # Transfer detections to slave eye
+        # Transfer detections to right eye
        transferred_detections = self.stereo_manager.transfer_detections(
            detections, 
            'left_to_right' if self.stereo_manager.master_eye == 'left' else 'right_to_left'
        )
-        self.sam2_processor.add_detections(right_state, right_eye, transferred_detections, frame_idx=0)
-        
-        # Process and write first frame
-        left_masks = self.sam2_processor.propagate_single_frame(left_state, left_eye, 0)
-        right_masks = self.sam2_processor.propagate_single_frame(right_state, right_eye, 0)
+        right_masks = self.sam2_processor.add_frame_and_detections(right_eye, transferred_detections, 0)
        
        # Apply masks and write
        processed_frame = self._apply_masks_to_frame(first_frame, left_masks, right_masks)
@@ -180,7 +167,7 @@ class VR180StreamingProcessor:
        self.frames_processed = 1
        return True
        
-    def _streaming_loop(self, left_state: Dict, right_state: Dict) -> None:
+    def _streaming_loop(self) -> None:
        """Main streaming processing loop"""
        frame_times = []
        last_log_time = time.time()
@@ -196,9 +183,9 @@ class VR180StreamingProcessor:
            # Split into eyes
            left_eye, right_eye = self.stereo_manager.split_frame(frame)
            
-            # Propagate masks for both eyes (streaming approach)
-            left_masks = self.sam2_processor.propagate_single_frame(left_state, left_eye, frame_idx)
-            right_masks = self.sam2_processor.propagate_single_frame(right_state, right_eye, frame_idx)
+            # Process frames with simple approach (no detections in regular frames)
+            left_masks = self.sam2_processor.add_frame_and_detections(left_eye, [], frame_idx)
+            right_masks = self.sam2_processor.add_frame_and_detections(right_eye, [], frame_idx)
            
            # Validate stereo consistency
            right_masks = self.stereo_manager.validate_masks(
@@ -208,9 +195,7 @@ class VR180StreamingProcessor:
            # Apply continuous correction if enabled
            if (self.config.matting.continuous_correction and 
                frame_idx % self.config.matting.correction_interval == 0):
-                self._apply_continuous_correction(
-                    left_state, right_state, left_eye, right_eye, frame_idx
-                )
+                self._apply_continuous_correction(left_eye, right_eye, frame_idx)
                
            # Apply masks and write frame
            processed_frame = self._apply_masks_to_frame(frame, left_masks, right_masks)
@@ -282,21 +267,20 @@ class VR180StreamingProcessor:
            return left_processed
            
    def _apply_continuous_correction(self,
-                                   left_state: Dict,
-                                   right_state: Dict,
                                   left_eye: np.ndarray,
                                   right_eye: np.ndarray,
                                   frame_idx: int) -> None:
        """Apply continuous correction to maintain tracking accuracy"""
        print(f"\n🔄 Applying continuous correction at frame {frame_idx}")
        
-        # Detect on master eye
+        # Detect on master eye and add fresh detections
        master_eye = left_eye if self.stereo_manager.master_eye == 'left' else right_eye
-        master_state = left_state if self.stereo_manager.master_eye == 'left' else right_state
+        detections = self.detector.detect_persons(master_eye)
        
-        self.sam2_processor.apply_continuous_correction(
-            master_state, master_eye, frame_idx, self.detector
-        )
+        if detections:
+            print(f"   Adding {len(detections)} fresh detection(s) for correction")
+            # Add fresh detections to help correct drift
+            self.sam2_processor.add_frame_and_detections(master_eye, detections, frame_idx)
        
        # Transfer corrections to slave eye
        # Note: This is simplified - actual implementation would transfer the refined prompts