streaming part1
This commit is contained in:
324
vr180_streaming/stereo_manager.py
Normal file
324
vr180_streaming/stereo_manager.py
Normal file
@@ -0,0 +1,324 @@
|
||||
"""
|
||||
Stereo consistency manager for VR180 side-by-side video processing
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Tuple, List, Dict, Any, Optional
|
||||
import cv2
|
||||
import warnings
|
||||
|
||||
|
||||
class StereoConsistencyManager:
|
||||
"""Manage stereo consistency between left and right eye views"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
self.config = config
|
||||
self.master_eye = config.get('stereo', {}).get('master_eye', 'left')
|
||||
self.disparity_correction = config.get('stereo', {}).get('disparity_correction', True)
|
||||
self.consistency_threshold = config.get('stereo', {}).get('consistency_threshold', 0.3)
|
||||
|
||||
# Stereo calibration parameters (can be loaded from config)
|
||||
self.baseline = config.get('stereo', {}).get('baseline', 65.0) # mm, typical IPD
|
||||
self.focal_length = config.get('stereo', {}).get('focal_length', 1000.0) # pixels
|
||||
|
||||
# Statistics tracking
|
||||
self.stats = {
|
||||
'frames_processed': 0,
|
||||
'corrections_applied': 0,
|
||||
'detection_transfers': 0,
|
||||
'mask_validations': 0
|
||||
}
|
||||
|
||||
print(f"👀 Stereo consistency manager initialized:")
|
||||
print(f" Master eye: {self.master_eye}")
|
||||
print(f" Disparity correction: {self.disparity_correction}")
|
||||
print(f" Consistency threshold: {self.consistency_threshold}")
|
||||
|
||||
def split_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Split side-by-side frame into left and right eye views
|
||||
|
||||
Args:
|
||||
frame: SBS frame
|
||||
|
||||
Returns:
|
||||
Tuple of (left_eye, right_eye) frames
|
||||
"""
|
||||
height, width = frame.shape[:2]
|
||||
split_point = width // 2
|
||||
|
||||
left_eye = frame[:, :split_point]
|
||||
right_eye = frame[:, split_point:]
|
||||
|
||||
return left_eye, right_eye
|
||||
|
||||
def combine_frames(self, left_eye: np.ndarray, right_eye: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Combine left and right eye frames back to SBS format
|
||||
|
||||
Args:
|
||||
left_eye: Left eye frame
|
||||
right_eye: Right eye frame
|
||||
|
||||
Returns:
|
||||
Combined SBS frame
|
||||
"""
|
||||
# Ensure same height
|
||||
if left_eye.shape[0] != right_eye.shape[0]:
|
||||
target_height = min(left_eye.shape[0], right_eye.shape[0])
|
||||
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
|
||||
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
|
||||
|
||||
return np.hstack([left_eye, right_eye])
|
||||
|
||||
def transfer_detections(self,
|
||||
detections: List[Dict[str, Any]],
|
||||
direction: str = 'left_to_right') -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Transfer detections from master to slave eye with disparity adjustment
|
||||
|
||||
Args:
|
||||
detections: List of detection dicts with 'box' key
|
||||
direction: Transfer direction ('left_to_right' or 'right_to_left')
|
||||
|
||||
Returns:
|
||||
Transferred detections adjusted for stereo disparity
|
||||
"""
|
||||
transferred = []
|
||||
|
||||
for det in detections:
|
||||
box = det['box'] # [x1, y1, x2, y2]
|
||||
|
||||
if self.disparity_correction:
|
||||
# Calculate disparity based on estimated depth
|
||||
# Closer objects have larger disparity
|
||||
box_width = box[2] - box[0]
|
||||
estimated_depth = self._estimate_depth_from_size(box_width)
|
||||
disparity = self._calculate_disparity(estimated_depth)
|
||||
|
||||
# Apply disparity shift
|
||||
if direction == 'left_to_right':
|
||||
# Right eye sees objects shifted left
|
||||
adjusted_box = [
|
||||
box[0] - disparity,
|
||||
box[1],
|
||||
box[2] - disparity,
|
||||
box[3]
|
||||
]
|
||||
else: # right_to_left
|
||||
# Left eye sees objects shifted right
|
||||
adjusted_box = [
|
||||
box[0] + disparity,
|
||||
box[1],
|
||||
box[2] + disparity,
|
||||
box[3]
|
||||
]
|
||||
else:
|
||||
# No disparity correction
|
||||
adjusted_box = box.copy()
|
||||
|
||||
# Create transferred detection
|
||||
transferred_det = det.copy()
|
||||
transferred_det['box'] = adjusted_box
|
||||
transferred_det['confidence'] = det.get('confidence', 1.0) * 0.95 # Slight reduction
|
||||
transferred_det['transferred'] = True
|
||||
|
||||
transferred.append(transferred_det)
|
||||
|
||||
self.stats['detection_transfers'] += len(detections)
|
||||
return transferred
|
||||
|
||||
def validate_masks(self,
|
||||
left_masks: np.ndarray,
|
||||
right_masks: np.ndarray,
|
||||
frame_idx: int = 0) -> np.ndarray:
|
||||
"""
|
||||
Validate and correct right eye masks based on left eye
|
||||
|
||||
Args:
|
||||
left_masks: Master eye masks
|
||||
right_masks: Slave eye masks to validate
|
||||
frame_idx: Current frame index for logging
|
||||
|
||||
Returns:
|
||||
Validated/corrected right eye masks
|
||||
"""
|
||||
self.stats['mask_validations'] += 1
|
||||
|
||||
# Quick validation - compare mask areas
|
||||
left_area = np.sum(left_masks > 0)
|
||||
right_area = np.sum(right_masks > 0)
|
||||
|
||||
if left_area == 0:
|
||||
# No person in left eye, clear right eye too
|
||||
if right_area > 0:
|
||||
warnings.warn(f"Frame {frame_idx}: No person in left eye but found in right - clearing")
|
||||
self.stats['corrections_applied'] += 1
|
||||
return np.zeros_like(right_masks)
|
||||
return right_masks
|
||||
|
||||
# Calculate area ratio
|
||||
area_ratio = right_area / (left_area + 1e-6)
|
||||
|
||||
# Check if correction needed
|
||||
if abs(area_ratio - 1.0) > self.consistency_threshold:
|
||||
print(f" Frame {frame_idx}: Area mismatch (ratio={area_ratio:.2f}) - applying correction")
|
||||
self.stats['corrections_applied'] += 1
|
||||
|
||||
# Apply correction based on severity
|
||||
if area_ratio < 0.5 or area_ratio > 2.0:
|
||||
# Significant difference - use template matching
|
||||
right_masks = self._correct_mask_from_template(left_masks, right_masks)
|
||||
else:
|
||||
# Minor difference - blend masks
|
||||
right_masks = self._blend_masks(left_masks, right_masks, area_ratio)
|
||||
|
||||
return right_masks
|
||||
|
||||
def combine_masks(self, left_masks: np.ndarray, right_masks: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Combine left and right eye masks back to SBS format
|
||||
|
||||
Args:
|
||||
left_masks: Left eye masks
|
||||
right_masks: Right eye masks
|
||||
|
||||
Returns:
|
||||
Combined SBS masks
|
||||
"""
|
||||
# Handle different mask formats
|
||||
if left_masks.ndim == 2 and right_masks.ndim == 2:
|
||||
# Single channel masks
|
||||
return np.hstack([left_masks, right_masks])
|
||||
elif left_masks.ndim == 3 and right_masks.ndim == 3:
|
||||
# Multi-channel masks (e.g., per-object)
|
||||
return np.concatenate([left_masks, right_masks], axis=1)
|
||||
else:
|
||||
raise ValueError(f"Incompatible mask dimensions: {left_masks.shape} vs {right_masks.shape}")
|
||||
|
||||
def _estimate_depth_from_size(self, object_width_pixels: float) -> float:
|
||||
"""
|
||||
Estimate object depth from its width in pixels
|
||||
Assumes average human width of 45cm
|
||||
|
||||
Args:
|
||||
object_width_pixels: Width of detected person in pixels
|
||||
|
||||
Returns:
|
||||
Estimated depth in meters
|
||||
"""
|
||||
HUMAN_WIDTH_M = 0.45 # Average human shoulder width
|
||||
|
||||
# Using similar triangles: depth = (focal_length * real_width) / pixel_width
|
||||
depth = (self.focal_length * HUMAN_WIDTH_M) / max(object_width_pixels, 1)
|
||||
|
||||
# Clamp to reasonable range (0.5m to 10m)
|
||||
return np.clip(depth, 0.5, 10.0)
|
||||
|
||||
def _calculate_disparity(self, depth_m: float) -> float:
|
||||
"""
|
||||
Calculate stereo disparity in pixels for given depth
|
||||
|
||||
Args:
|
||||
depth_m: Depth in meters
|
||||
|
||||
Returns:
|
||||
Disparity in pixels
|
||||
"""
|
||||
# Disparity = (baseline * focal_length) / depth
|
||||
# Convert baseline from mm to m
|
||||
disparity_pixels = (self.baseline / 1000.0 * self.focal_length) / depth_m
|
||||
|
||||
return disparity_pixels
|
||||
|
||||
def _correct_mask_from_template(self,
|
||||
template_mask: np.ndarray,
|
||||
target_mask: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Correct target mask using template mask with disparity adjustment
|
||||
|
||||
Args:
|
||||
template_mask: Master eye mask to use as template
|
||||
target_mask: Mask to correct
|
||||
|
||||
Returns:
|
||||
Corrected mask
|
||||
"""
|
||||
if not self.disparity_correction:
|
||||
# Simple copy without disparity
|
||||
return template_mask.copy()
|
||||
|
||||
# Calculate average disparity from mask centroid
|
||||
template_moments = cv2.moments(template_mask.astype(np.uint8))
|
||||
if template_moments['m00'] > 0:
|
||||
cx_template = int(template_moments['m10'] / template_moments['m00'])
|
||||
|
||||
# Estimate depth from mask size
|
||||
mask_width = np.sum(np.any(template_mask > 0, axis=0))
|
||||
depth = self._estimate_depth_from_size(mask_width)
|
||||
disparity = int(self._calculate_disparity(depth))
|
||||
|
||||
# Shift template mask by disparity
|
||||
if self.master_eye == 'left':
|
||||
# Right eye sees shifted left
|
||||
translation = np.float32([[1, 0, -disparity], [0, 1, 0]])
|
||||
else:
|
||||
# Left eye sees shifted right
|
||||
translation = np.float32([[1, 0, disparity], [0, 1, 0]])
|
||||
|
||||
corrected = cv2.warpAffine(
|
||||
template_mask.astype(np.float32),
|
||||
translation,
|
||||
(template_mask.shape[1], template_mask.shape[0])
|
||||
)
|
||||
|
||||
return corrected
|
||||
else:
|
||||
# No valid mask to correct from
|
||||
return template_mask.copy()
|
||||
|
||||
def _blend_masks(self,
|
||||
mask1: np.ndarray,
|
||||
mask2: np.ndarray,
|
||||
area_ratio: float) -> np.ndarray:
|
||||
"""
|
||||
Blend two masks based on area ratio
|
||||
|
||||
Args:
|
||||
mask1: First mask
|
||||
mask2: Second mask
|
||||
area_ratio: Ratio of mask2/mask1 areas
|
||||
|
||||
Returns:
|
||||
Blended mask
|
||||
"""
|
||||
# Calculate blend weight based on how far off the ratio is
|
||||
blend_weight = min(abs(area_ratio - 1.0) / self.consistency_threshold, 1.0)
|
||||
|
||||
# Blend towards mask1 (master) based on weight
|
||||
blended = mask1 * blend_weight + mask2 * (1 - blend_weight)
|
||||
|
||||
# Threshold to binary
|
||||
return (blended > 0.5).astype(mask1.dtype)
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get processing statistics"""
|
||||
self.stats['frames_processed'] = self.stats.get('mask_validations', 0)
|
||||
|
||||
if self.stats['frames_processed'] > 0:
|
||||
self.stats['correction_rate'] = (
|
||||
self.stats['corrections_applied'] / self.stats['frames_processed']
|
||||
)
|
||||
else:
|
||||
self.stats['correction_rate'] = 0.0
|
||||
|
||||
return self.stats.copy()
|
||||
|
||||
def reset_stats(self) -> None:
|
||||
"""Reset statistics"""
|
||||
self.stats = {
|
||||
'frames_processed': 0,
|
||||
'corrections_applied': 0,
|
||||
'detection_transfers': 0,
|
||||
'mask_validations': 0
|
||||
}
|
||||
Reference in New Issue
Block a user