stereo mask working

2025-07-31 11:13:31 -07:00
parent 0057017ac4
commit b97a3752a7
8 changed files with 1247 additions and 206 deletions
--- a/core/sam2_processor.py
+++ b/core/sam2_processor.py
@@ -237,13 +237,21 @@ class SAM2Processor:
        
        # Fallback to synchronous creation
        try:
+            logger.info(f"Creating low-res video synchronously: {input_video_path} -> {output_video_path}")
            self.create_low_res_video(input_video_path, output_video_path, scale)
-            return os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0
+            
+            if os.path.exists(output_video_path) and os.path.getsize(output_video_path) > 0:
+                logger.info(f"Successfully created low-res video: {output_video_path} ({os.path.getsize(output_video_path)} bytes)")
+                return True
+            else:
+                logger.error(f"Low-res video creation failed - file doesn't exist or is empty: {output_video_path}")
+                return False
        except Exception as e:
            logger.error(f"Failed to create low-res video {output_video_path}: {e}")
            return False
    
-    def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]]) -> bool:
+    def add_yolo_prompts_to_predictor(self, inference_state, prompts: List[Dict[str, Any]], 
+                                     inference_scale: float = 1.0) -> bool:
        """
        Add YOLO detection prompts to SAM2 predictor.
        Includes error handling matching the working spec.md implementation.
@@ -251,6 +259,7 @@ class SAM2Processor:
        Args:
            inference_state: SAM2 inference state
            prompts: List of prompt dictionaries with obj_id and bbox
+            inference_scale: Scale factor to apply to bounding boxes
            
        Returns:
            True if prompts were added successfully
@@ -268,14 +277,20 @@ class SAM2Processor:
            bbox = prompt['bbox']
            confidence = prompt.get('confidence', 'unknown')
            
-            logger.info(f"SAM2 Debug: Adding prompt {i+1}/{len(prompts)}: Object {obj_id}, bbox={bbox}, conf={confidence}")
+            # Scale bounding box for SAM2 inference resolution
+            scaled_bbox = bbox * inference_scale
+            
+            logger.info(f"SAM2 Debug: Adding prompt {i+1}/{len(prompts)}: Object {obj_id}")
+            logger.info(f"  Original bbox: {bbox}")
+            logger.info(f"  Scaled bbox (scale={inference_scale}): {scaled_bbox}")
+            logger.info(f"  Confidence: {confidence}")
            
            try:
                _, out_obj_ids, out_mask_logits = self.predictor.add_new_points_or_box(
                    inference_state=inference_state,
                    frame_idx=0,
                    obj_id=obj_id,
-                    box=bbox.astype(np.float32),
+                    box=scaled_bbox.astype(np.float32),
                )
                
                logger.info(f"SAM2 Debug: ✓ Successfully added Object {obj_id} - returned obj_ids: {out_obj_ids}")
@@ -443,7 +458,7 @@ class SAM2Processor:
            
            # Add prompts or previous masks
            if yolo_prompts:
-                if not self.add_yolo_prompts_to_predictor(inference_state, yolo_prompts):
+                if not self.add_yolo_prompts_to_predictor(inference_state, yolo_prompts, inference_scale):
                    return None
            elif previous_masks:
                if not self.add_previous_masks_to_predictor(inference_state, previous_masks):
@@ -583,7 +598,7 @@ class SAM2Processor:
            inference_state = self.predictor.init_state(video_path=temp_video_path, async_loading_frames=True)
            
            # Add prompts
-            if not self.add_yolo_prompts_to_predictor(inference_state, prompts):
+            if not self.add_yolo_prompts_to_predictor(inference_state, prompts, inference_scale):
                logger.error("Failed to add prompts for first frame debug")
                return False
            
@@ -798,7 +813,7 @@ class SAM2Processor:
                    eye_prompt['obj_id'] = 1  # Always use obj_id=1 for single eye
                    eye_prompts.append(eye_prompt)
                
-                if not self.add_yolo_prompts_to_predictor(inference_state, eye_prompts):
+                if not self.add_yolo_prompts_to_predictor(inference_state, eye_prompts, inference_scale):
                    logger.error(f"Failed to add prompts for {eye_side} eye")
                    return None