Files
test2/vr180_streaming/stereo_manager.py
2025-07-27 08:01:08 -07:00

324 lines
12 KiB
Python

"""
Stereo consistency manager for VR180 side-by-side video processing
"""
import numpy as np
from typing import Tuple, List, Dict, Any, Optional
import cv2
import warnings
class StereoConsistencyManager:
"""Manage stereo consistency between left and right eye views"""
def __init__(self, config: Dict[str, Any]):
self.config = config
self.master_eye = config.get('stereo', {}).get('master_eye', 'left')
self.disparity_correction = config.get('stereo', {}).get('disparity_correction', True)
self.consistency_threshold = config.get('stereo', {}).get('consistency_threshold', 0.3)
# Stereo calibration parameters (can be loaded from config)
self.baseline = config.get('stereo', {}).get('baseline', 65.0) # mm, typical IPD
self.focal_length = config.get('stereo', {}).get('focal_length', 1000.0) # pixels
# Statistics tracking
self.stats = {
'frames_processed': 0,
'corrections_applied': 0,
'detection_transfers': 0,
'mask_validations': 0
}
print(f"👀 Stereo consistency manager initialized:")
print(f" Master eye: {self.master_eye}")
print(f" Disparity correction: {self.disparity_correction}")
print(f" Consistency threshold: {self.consistency_threshold}")
def split_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Split side-by-side frame into left and right eye views
Args:
frame: SBS frame
Returns:
Tuple of (left_eye, right_eye) frames
"""
height, width = frame.shape[:2]
split_point = width // 2
left_eye = frame[:, :split_point]
right_eye = frame[:, split_point:]
return left_eye, right_eye
def combine_frames(self, left_eye: np.ndarray, right_eye: np.ndarray) -> np.ndarray:
"""
Combine left and right eye frames back to SBS format
Args:
left_eye: Left eye frame
right_eye: Right eye frame
Returns:
Combined SBS frame
"""
# Ensure same height
if left_eye.shape[0] != right_eye.shape[0]:
target_height = min(left_eye.shape[0], right_eye.shape[0])
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
return np.hstack([left_eye, right_eye])
def transfer_detections(self,
detections: List[Dict[str, Any]],
direction: str = 'left_to_right') -> List[Dict[str, Any]]:
"""
Transfer detections from master to slave eye with disparity adjustment
Args:
detections: List of detection dicts with 'box' key
direction: Transfer direction ('left_to_right' or 'right_to_left')
Returns:
Transferred detections adjusted for stereo disparity
"""
transferred = []
for det in detections:
box = det['box'] # [x1, y1, x2, y2]
if self.disparity_correction:
# Calculate disparity based on estimated depth
# Closer objects have larger disparity
box_width = box[2] - box[0]
estimated_depth = self._estimate_depth_from_size(box_width)
disparity = self._calculate_disparity(estimated_depth)
# Apply disparity shift
if direction == 'left_to_right':
# Right eye sees objects shifted left
adjusted_box = [
box[0] - disparity,
box[1],
box[2] - disparity,
box[3]
]
else: # right_to_left
# Left eye sees objects shifted right
adjusted_box = [
box[0] + disparity,
box[1],
box[2] + disparity,
box[3]
]
else:
# No disparity correction
adjusted_box = box.copy()
# Create transferred detection
transferred_det = det.copy()
transferred_det['box'] = adjusted_box
transferred_det['confidence'] = det.get('confidence', 1.0) * 0.95 # Slight reduction
transferred_det['transferred'] = True
transferred.append(transferred_det)
self.stats['detection_transfers'] += len(detections)
return transferred
def validate_masks(self,
left_masks: np.ndarray,
right_masks: np.ndarray,
frame_idx: int = 0) -> np.ndarray:
"""
Validate and correct right eye masks based on left eye
Args:
left_masks: Master eye masks
right_masks: Slave eye masks to validate
frame_idx: Current frame index for logging
Returns:
Validated/corrected right eye masks
"""
self.stats['mask_validations'] += 1
# Quick validation - compare mask areas
left_area = np.sum(left_masks > 0)
right_area = np.sum(right_masks > 0)
if left_area == 0:
# No person in left eye, clear right eye too
if right_area > 0:
warnings.warn(f"Frame {frame_idx}: No person in left eye but found in right - clearing")
self.stats['corrections_applied'] += 1
return np.zeros_like(right_masks)
return right_masks
# Calculate area ratio
area_ratio = right_area / (left_area + 1e-6)
# Check if correction needed
if abs(area_ratio - 1.0) > self.consistency_threshold:
print(f" Frame {frame_idx}: Area mismatch (ratio={area_ratio:.2f}) - applying correction")
self.stats['corrections_applied'] += 1
# Apply correction based on severity
if area_ratio < 0.5 or area_ratio > 2.0:
# Significant difference - use template matching
right_masks = self._correct_mask_from_template(left_masks, right_masks)
else:
# Minor difference - blend masks
right_masks = self._blend_masks(left_masks, right_masks, area_ratio)
return right_masks
def combine_masks(self, left_masks: np.ndarray, right_masks: np.ndarray) -> np.ndarray:
"""
Combine left and right eye masks back to SBS format
Args:
left_masks: Left eye masks
right_masks: Right eye masks
Returns:
Combined SBS masks
"""
# Handle different mask formats
if left_masks.ndim == 2 and right_masks.ndim == 2:
# Single channel masks
return np.hstack([left_masks, right_masks])
elif left_masks.ndim == 3 and right_masks.ndim == 3:
# Multi-channel masks (e.g., per-object)
return np.concatenate([left_masks, right_masks], axis=1)
else:
raise ValueError(f"Incompatible mask dimensions: {left_masks.shape} vs {right_masks.shape}")
def _estimate_depth_from_size(self, object_width_pixels: float) -> float:
"""
Estimate object depth from its width in pixels
Assumes average human width of 45cm
Args:
object_width_pixels: Width of detected person in pixels
Returns:
Estimated depth in meters
"""
HUMAN_WIDTH_M = 0.45 # Average human shoulder width
# Using similar triangles: depth = (focal_length * real_width) / pixel_width
depth = (self.focal_length * HUMAN_WIDTH_M) / max(object_width_pixels, 1)
# Clamp to reasonable range (0.5m to 10m)
return np.clip(depth, 0.5, 10.0)
def _calculate_disparity(self, depth_m: float) -> float:
"""
Calculate stereo disparity in pixels for given depth
Args:
depth_m: Depth in meters
Returns:
Disparity in pixels
"""
# Disparity = (baseline * focal_length) / depth
# Convert baseline from mm to m
disparity_pixels = (self.baseline / 1000.0 * self.focal_length) / depth_m
return disparity_pixels
def _correct_mask_from_template(self,
template_mask: np.ndarray,
target_mask: np.ndarray) -> np.ndarray:
"""
Correct target mask using template mask with disparity adjustment
Args:
template_mask: Master eye mask to use as template
target_mask: Mask to correct
Returns:
Corrected mask
"""
if not self.disparity_correction:
# Simple copy without disparity
return template_mask.copy()
# Calculate average disparity from mask centroid
template_moments = cv2.moments(template_mask.astype(np.uint8))
if template_moments['m00'] > 0:
cx_template = int(template_moments['m10'] / template_moments['m00'])
# Estimate depth from mask size
mask_width = np.sum(np.any(template_mask > 0, axis=0))
depth = self._estimate_depth_from_size(mask_width)
disparity = int(self._calculate_disparity(depth))
# Shift template mask by disparity
if self.master_eye == 'left':
# Right eye sees shifted left
translation = np.float32([[1, 0, -disparity], [0, 1, 0]])
else:
# Left eye sees shifted right
translation = np.float32([[1, 0, disparity], [0, 1, 0]])
corrected = cv2.warpAffine(
template_mask.astype(np.float32),
translation,
(template_mask.shape[1], template_mask.shape[0])
)
return corrected
else:
# No valid mask to correct from
return template_mask.copy()
def _blend_masks(self,
mask1: np.ndarray,
mask2: np.ndarray,
area_ratio: float) -> np.ndarray:
"""
Blend two masks based on area ratio
Args:
mask1: First mask
mask2: Second mask
area_ratio: Ratio of mask2/mask1 areas
Returns:
Blended mask
"""
# Calculate blend weight based on how far off the ratio is
blend_weight = min(abs(area_ratio - 1.0) / self.consistency_threshold, 1.0)
# Blend towards mask1 (master) based on weight
blended = mask1 * blend_weight + mask2 * (1 - blend_weight)
# Threshold to binary
return (blended > 0.5).astype(mask1.dtype)
def get_stats(self) -> Dict[str, Any]:
"""Get processing statistics"""
self.stats['frames_processed'] = self.stats.get('mask_validations', 0)
if self.stats['frames_processed'] > 0:
self.stats['correction_rate'] = (
self.stats['corrections_applied'] / self.stats['frames_processed']
)
else:
self.stats['correction_rate'] = 0.0
return self.stats.copy()
def reset_stats(self) -> None:
"""Reset statistics"""
self.stats = {
'frames_processed': 0,
'corrections_applied': 0,
'detection_transfers': 0,
'mask_validations': 0
}