495 lines
20 KiB
Python
495 lines
20 KiB
Python
import cv2
|
|
import numpy as np
|
|
from typing import List, Dict, Any, Optional, Tuple
|
|
from pathlib import Path
|
|
import warnings
|
|
|
|
from .video_processor import VideoProcessor
|
|
from .config import VR180Config
|
|
|
|
|
|
class VR180Processor(VideoProcessor):
|
|
"""Enhanced video processor with VR180-specific optimizations"""
|
|
|
|
def __init__(self, config: VR180Config):
|
|
super().__init__(config)
|
|
|
|
# VR180 specific properties
|
|
self.left_eye_width = 0
|
|
self.right_eye_width = 0
|
|
self.eye_height = 0
|
|
self.sbs_split_point = 0
|
|
|
|
def analyze_sbs_layout(self) -> Dict[str, Any]:
|
|
"""
|
|
Analyze side-by-side layout and determine eye regions
|
|
|
|
Returns:
|
|
Dictionary with eye region information
|
|
"""
|
|
if self.video_info is None:
|
|
raise RuntimeError("Video info not loaded")
|
|
|
|
total_width = self.video_info['width']
|
|
total_height = self.video_info['height']
|
|
|
|
# Assume equal split for VR180 SBS
|
|
self.sbs_split_point = total_width // 2
|
|
self.left_eye_width = self.sbs_split_point
|
|
self.right_eye_width = total_width - self.sbs_split_point
|
|
self.eye_height = total_height
|
|
|
|
layout_info = {
|
|
'total_width': total_width,
|
|
'total_height': total_height,
|
|
'split_point': self.sbs_split_point,
|
|
'left_eye_region': (0, 0, self.left_eye_width, self.eye_height),
|
|
'right_eye_region': (self.sbs_split_point, 0, self.right_eye_width, self.eye_height),
|
|
'eye_aspect_ratio': self.left_eye_width / self.eye_height
|
|
}
|
|
|
|
print(f"VR180 SBS Layout: {total_width}x{total_height}")
|
|
print(f"Split point: {self.sbs_split_point}")
|
|
print(f"Left eye: {self.left_eye_width}x{self.eye_height}")
|
|
print(f"Right eye: {self.right_eye_width}x{self.eye_height}")
|
|
|
|
return layout_info
|
|
|
|
def split_sbs_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
Split side-by-side frame into left and right eye views
|
|
|
|
Args:
|
|
frame: Input SBS frame
|
|
|
|
Returns:
|
|
Tuple of (left_eye_frame, right_eye_frame)
|
|
"""
|
|
if self.sbs_split_point == 0:
|
|
self.sbs_split_point = frame.shape[1] // 2
|
|
|
|
left_eye = frame[:, :self.sbs_split_point]
|
|
right_eye = frame[:, self.sbs_split_point:]
|
|
|
|
return left_eye, right_eye
|
|
|
|
def combine_sbs_frame(self, left_eye: np.ndarray, right_eye: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Combine left and right eye frames back into side-by-side format
|
|
|
|
Args:
|
|
left_eye: Left eye frame
|
|
right_eye: Right eye frame
|
|
|
|
Returns:
|
|
Combined SBS frame
|
|
"""
|
|
# Ensure frames have same height
|
|
if left_eye.shape[0] != right_eye.shape[0]:
|
|
target_height = min(left_eye.shape[0], right_eye.shape[0])
|
|
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
|
|
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
|
|
|
|
# Combine horizontally
|
|
combined = np.hstack([left_eye, right_eye])
|
|
return combined
|
|
|
|
def process_with_disparity_mapping(self,
|
|
frames: List[np.ndarray],
|
|
chunk_idx: int = 0) -> List[np.ndarray]:
|
|
"""
|
|
Process frames using disparity mapping optimization
|
|
|
|
Args:
|
|
frames: List of SBS frames
|
|
chunk_idx: Chunk index
|
|
|
|
Returns:
|
|
List of processed SBS frames
|
|
"""
|
|
print(f"Processing chunk {chunk_idx} with disparity mapping ({len(frames)} frames)")
|
|
|
|
# Split all frames into left/right eyes
|
|
left_eye_frames = []
|
|
right_eye_frames = []
|
|
|
|
for frame in frames:
|
|
left, right = self.split_sbs_frame(frame)
|
|
left_eye_frames.append(left)
|
|
right_eye_frames.append(right)
|
|
|
|
# Process left eye at full quality
|
|
print("Processing left eye...")
|
|
with self.memory_manager.memory_monitor(f"left eye chunk {chunk_idx}"):
|
|
left_matted = self._process_eye_sequence(left_eye_frames, "left", chunk_idx)
|
|
|
|
# Process right eye with cross-validation
|
|
print("Processing right eye with cross-validation...")
|
|
with self.memory_manager.memory_monitor(f"right eye chunk {chunk_idx}"):
|
|
right_matted = self._process_eye_sequence_with_validation(
|
|
right_eye_frames, left_matted, "right", chunk_idx
|
|
)
|
|
|
|
# Combine results back to SBS format
|
|
combined_frames = []
|
|
for left_frame, right_frame in zip(left_matted, right_matted):
|
|
if self.config.output.maintain_sbs:
|
|
combined = self.combine_sbs_frame(left_frame, right_frame)
|
|
else:
|
|
# Return as separate eye outputs
|
|
combined = {'left': left_frame, 'right': right_frame}
|
|
combined_frames.append(combined)
|
|
|
|
return combined_frames
|
|
|
|
def _process_eye_sequence(self,
|
|
eye_frames: List[np.ndarray],
|
|
eye_name: str,
|
|
chunk_idx: int) -> List[np.ndarray]:
|
|
"""Process a single eye sequence"""
|
|
if not eye_frames:
|
|
return []
|
|
|
|
# Create a unique temporary video for this eye processing
|
|
import uuid
|
|
temp_video_name = f"temp_sam2_{eye_name}_chunk{chunk_idx}_{uuid.uuid4().hex[:8]}.mp4"
|
|
temp_video_path = Path.cwd() / temp_video_name
|
|
|
|
try:
|
|
# Write frames to temporary video
|
|
height, width = eye_frames[0].shape[:2]
|
|
|
|
# Try different codecs if mp4v fails
|
|
codecs_to_try = [
|
|
('mp4v', '.mp4'),
|
|
('XVID', '.avi'),
|
|
('MJPG', '.avi')
|
|
]
|
|
|
|
writer = None
|
|
for fourcc_str, ext in codecs_to_try:
|
|
fourcc = cv2.VideoWriter_fourcc(*fourcc_str)
|
|
temp_video_path_with_ext = temp_video_path.with_suffix(ext)
|
|
|
|
print(f"Trying codec {fourcc_str} with path {temp_video_path_with_ext}")
|
|
print(f"Video params: size=({width}, {height}), fps=30.0")
|
|
|
|
writer = cv2.VideoWriter(str(temp_video_path_with_ext), fourcc, 30.0, (width, height))
|
|
|
|
if writer.isOpened():
|
|
# Test writing the first frame
|
|
test_frame = eye_frames[0].copy()
|
|
if test_frame.dtype != np.uint8:
|
|
test_frame = test_frame.astype(np.uint8)
|
|
if not test_frame.flags['C_CONTIGUOUS']:
|
|
test_frame = np.ascontiguousarray(test_frame)
|
|
|
|
test_success = writer.write(test_frame)
|
|
print(f"Test write with {fourcc_str}: {'SUCCESS' if test_success else 'FAILED'}")
|
|
|
|
if test_success:
|
|
temp_video_path = temp_video_path_with_ext
|
|
print(f"Using codec {fourcc_str} for temp video")
|
|
# Reset writer to start fresh
|
|
writer.release()
|
|
writer = cv2.VideoWriter(str(temp_video_path_with_ext), fourcc, 30.0, (width, height))
|
|
break
|
|
else:
|
|
writer.release()
|
|
writer = None
|
|
else:
|
|
print(f"Failed to open writer with {fourcc_str}")
|
|
writer.release()
|
|
writer = None
|
|
|
|
if writer is None:
|
|
raise RuntimeError("Failed to open video writer with any codec")
|
|
|
|
# Debug frame properties
|
|
first_frame = eye_frames[0]
|
|
print(f"Frame properties: shape={first_frame.shape}, dtype={first_frame.dtype}, "
|
|
f"min={first_frame.min()}, max={first_frame.max()}")
|
|
|
|
for i, frame in enumerate(eye_frames):
|
|
# Ensure frame is in the right format for OpenCV
|
|
if frame.dtype != np.uint8:
|
|
frame = frame.astype(np.uint8)
|
|
|
|
# Ensure frame is contiguous
|
|
if not frame.flags['C_CONTIGUOUS']:
|
|
frame = np.ascontiguousarray(frame)
|
|
|
|
success = writer.write(frame)
|
|
if not success:
|
|
print(f"Failed to write frame {i}/{len(eye_frames)}")
|
|
print(f"Frame {i} properties: shape={frame.shape}, dtype={frame.dtype}, contiguous={frame.flags['C_CONTIGUOUS']}")
|
|
raise RuntimeError(f"Failed to write frame {i} to {temp_video_path}")
|
|
|
|
if i % 50 == 0:
|
|
print(f"Written {i}/{len(eye_frames)} frames")
|
|
|
|
writer.release()
|
|
del writer # Ensure it's fully released
|
|
|
|
# Verify the file was created and has content
|
|
if not temp_video_path.exists():
|
|
raise RuntimeError(f"Temporary video file was not created: {temp_video_path}")
|
|
|
|
file_size = temp_video_path.stat().st_size
|
|
if file_size == 0:
|
|
raise RuntimeError(f"Temporary video file is empty: {temp_video_path}")
|
|
|
|
print(f"Created temp video {temp_video_path} ({file_size / 1024 / 1024:.1f} MB)")
|
|
|
|
# Initialize SAM2 with video path
|
|
self.sam2_model.init_video_state(video_path=str(temp_video_path))
|
|
|
|
# Detect persons in first frame
|
|
first_frame = eye_frames[0]
|
|
detections = self.detector.detect_persons(first_frame)
|
|
|
|
if not detections:
|
|
warnings.warn(f"No persons detected in {eye_name} eye, chunk {chunk_idx}")
|
|
return self._create_empty_masks(eye_frames)
|
|
|
|
print(f"Detected {len(detections)} persons in {eye_name} eye first frame")
|
|
|
|
# Convert to SAM2 prompts
|
|
box_prompts, labels = self.detector.convert_to_sam_prompts(detections)
|
|
|
|
# Add prompts
|
|
object_ids = self.sam2_model.add_person_prompts(0, box_prompts, labels)
|
|
|
|
# Propagate masks
|
|
video_segments = self.sam2_model.propagate_masks(
|
|
start_frame=0,
|
|
max_frames=len(eye_frames)
|
|
)
|
|
|
|
# Apply masks
|
|
matted_frames = []
|
|
for frame_idx, frame in enumerate(eye_frames):
|
|
if frame_idx in video_segments:
|
|
frame_masks = video_segments[frame_idx]
|
|
combined_mask = self.sam2_model.get_combined_mask(frame_masks)
|
|
|
|
matted_frame = self.sam2_model.apply_mask_to_frame(
|
|
frame, combined_mask,
|
|
output_format=self.config.output.format,
|
|
background_color=self.config.output.background_color
|
|
)
|
|
else:
|
|
matted_frame = self._create_empty_mask_frame(frame)
|
|
|
|
matted_frames.append(matted_frame)
|
|
|
|
return matted_frames
|
|
|
|
finally:
|
|
# Always cleanup
|
|
self.sam2_model.cleanup()
|
|
|
|
# Remove temporary video file
|
|
try:
|
|
if temp_video_path.exists():
|
|
temp_video_path.unlink()
|
|
except Exception as e:
|
|
warnings.warn(f"Failed to cleanup temp video {temp_video_path}: {e}")
|
|
|
|
def _process_eye_sequence_with_validation(self,
|
|
right_eye_frames: List[np.ndarray],
|
|
left_eye_results: List[np.ndarray],
|
|
eye_name: str,
|
|
chunk_idx: int) -> List[np.ndarray]:
|
|
"""
|
|
Process right eye with validation against left eye results
|
|
|
|
Args:
|
|
right_eye_frames: Right eye frame sequence
|
|
left_eye_results: Processed left eye results for validation
|
|
eye_name: Eye identifier
|
|
chunk_idx: Chunk index
|
|
|
|
Returns:
|
|
Processed right eye frames
|
|
"""
|
|
# For now, process right eye independently
|
|
# TODO: Implement stereo consistency validation
|
|
right_matted = self._process_eye_sequence(right_eye_frames, eye_name, chunk_idx)
|
|
|
|
# Apply stereo consistency checks
|
|
validated_results = self._validate_stereo_consistency(
|
|
left_eye_results, right_matted
|
|
)
|
|
|
|
return validated_results
|
|
|
|
def _validate_stereo_consistency(self,
|
|
left_results: List[np.ndarray],
|
|
right_results: List[np.ndarray]) -> List[np.ndarray]:
|
|
"""
|
|
Validate and correct stereo consistency between left and right eye results
|
|
|
|
Args:
|
|
left_results: Left eye processed frames
|
|
right_results: Right eye processed frames
|
|
|
|
Returns:
|
|
Validated right eye frames
|
|
"""
|
|
validated_frames = []
|
|
|
|
for i, (left_frame, right_frame) in enumerate(zip(left_results, right_results)):
|
|
# Simple validation: check if mask areas are similar
|
|
left_mask_area = self._get_mask_area(left_frame)
|
|
right_mask_area = self._get_mask_area(right_frame)
|
|
|
|
# If areas differ significantly, apply correction
|
|
area_ratio = right_mask_area / (left_mask_area + 1e-6)
|
|
|
|
if area_ratio < 0.5 or area_ratio > 2.0:
|
|
# Significant difference - apply correction
|
|
corrected_frame = self._apply_stereo_correction(
|
|
left_frame, right_frame, area_ratio
|
|
)
|
|
validated_frames.append(corrected_frame)
|
|
else:
|
|
validated_frames.append(right_frame)
|
|
|
|
return validated_frames
|
|
|
|
def _get_mask_area(self, frame: np.ndarray) -> float:
|
|
"""Get mask area from processed frame"""
|
|
if frame.shape[2] == 4: # Alpha channel
|
|
mask = frame[:, :, 3] > 0
|
|
else: # Green screen - detect non-background pixels
|
|
bg_color = np.array(self.config.output.background_color)
|
|
diff = np.abs(frame.astype(np.float32) - bg_color).sum(axis=2)
|
|
mask = diff > 30 # Threshold for non-background
|
|
|
|
return np.sum(mask)
|
|
|
|
def _apply_stereo_correction(self,
|
|
left_frame: np.ndarray,
|
|
right_frame: np.ndarray,
|
|
area_ratio: float) -> np.ndarray:
|
|
"""
|
|
Apply stereo correction to right frame based on left frame
|
|
|
|
This is a simplified correction - in production, you'd use
|
|
proper disparity mapping and stereo geometry
|
|
"""
|
|
# For now, return the right frame as-is
|
|
# TODO: Implement proper stereo correction algorithm
|
|
return right_frame
|
|
|
|
def process_chunk(self,
|
|
frames: List[np.ndarray],
|
|
chunk_idx: int = 0) -> List[np.ndarray]:
|
|
"""
|
|
Override parent method to handle VR180-specific processing
|
|
|
|
Args:
|
|
frames: List of SBS frames to process
|
|
chunk_idx: Chunk index for logging
|
|
|
|
Returns:
|
|
List of processed frames
|
|
"""
|
|
if not frames:
|
|
return []
|
|
|
|
# Analyze SBS layout if not done yet
|
|
if self.sbs_split_point == 0:
|
|
sample_frame = frames[0]
|
|
self.sbs_split_point = sample_frame.shape[1] // 2
|
|
|
|
# Choose processing method based on configuration
|
|
if self.config.matting.use_disparity_mapping:
|
|
return self.process_with_disparity_mapping(frames, chunk_idx)
|
|
else:
|
|
# Process each eye independently and combine
|
|
return self._process_eyes_independently(frames, chunk_idx)
|
|
|
|
def _process_eyes_independently(self,
|
|
frames: List[np.ndarray],
|
|
chunk_idx: int) -> List[np.ndarray]:
|
|
"""Process left and right eyes independently"""
|
|
print(f"Processing chunk {chunk_idx} with independent eye processing")
|
|
|
|
# Split frames
|
|
left_eye_frames = []
|
|
right_eye_frames = []
|
|
|
|
for frame in frames:
|
|
left, right = self.split_sbs_frame(frame)
|
|
left_eye_frames.append(left)
|
|
right_eye_frames.append(right)
|
|
|
|
# Process each eye
|
|
print("Processing left eye...")
|
|
left_matted = self._process_eye_sequence(left_eye_frames, "left", chunk_idx)
|
|
|
|
print("Processing right eye...")
|
|
right_matted = self._process_eye_sequence(right_eye_frames, "right", chunk_idx)
|
|
|
|
# Combine results
|
|
combined_frames = []
|
|
for left_frame, right_frame in zip(left_matted, right_matted):
|
|
if self.config.output.maintain_sbs:
|
|
combined = self.combine_sbs_frame(left_frame, right_frame)
|
|
else:
|
|
combined = {'left': left_frame, 'right': right_frame}
|
|
combined_frames.append(combined)
|
|
|
|
return combined_frames
|
|
|
|
def save_video(self, frames: List[np.ndarray], output_path: str):
|
|
"""
|
|
Override parent method to handle VR180-specific output formats
|
|
|
|
Args:
|
|
frames: List of processed frames
|
|
output_path: Output path
|
|
"""
|
|
if not frames:
|
|
raise ValueError("No frames to save")
|
|
|
|
output_path = Path(output_path)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Check if frames are in separate eye format
|
|
if isinstance(frames[0], dict) and 'left' in frames[0]:
|
|
# Save separate eye videos
|
|
self._save_separate_eye_videos(frames, output_path)
|
|
else:
|
|
# Save as combined SBS video
|
|
super().save_video(frames, str(output_path))
|
|
|
|
def _save_separate_eye_videos(self, frames: List[Dict[str, np.ndarray]], output_path: Path):
|
|
"""Save left and right eye videos separately"""
|
|
left_frames = [frame['left'] for frame in frames]
|
|
right_frames = [frame['right'] for frame in frames]
|
|
|
|
# Save left eye
|
|
left_path = output_path.parent / f"{output_path.stem}_left{output_path.suffix}"
|
|
super().save_video(left_frames, str(left_path))
|
|
|
|
# Save right eye
|
|
right_path = output_path.parent / f"{output_path.stem}_right{output_path.suffix}"
|
|
super().save_video(right_frames, str(right_path))
|
|
|
|
print(f"Saved separate eye videos: {left_path}, {right_path}")
|
|
|
|
def process_video(self) -> None:
|
|
"""
|
|
Override parent method to add VR180-specific initialization
|
|
"""
|
|
print("Starting VR180 video processing...")
|
|
|
|
# Load video info and analyze SBS layout
|
|
self.load_video_info(self.config.input.video_path)
|
|
self.analyze_sbs_layout()
|
|
|
|
# Continue with parent processing
|
|
super().process_video() |