This commit is contained in:
2025-07-26 12:29:32 -07:00
parent ccc68a3895
commit 725a781456
3 changed files with 165 additions and 21 deletions

View File

@@ -9,4 +9,7 @@ ultralytics>=8.0.0
tqdm>=4.65.0 tqdm>=4.65.0
psutil>=5.9.0 psutil>=5.9.0
ffmpeg-python>=0.2.0 ffmpeg-python>=0.2.0
decord>=0.6.0 decord>=0.6.0
# GPU acceleration (optional but recommended for stereo validation speedup)
# cupy-cuda11x>=12.0.0 # For CUDA 11.x
# cupy-cuda12x>=12.0.0 # For CUDA 12.x - uncomment appropriate version

View File

@@ -18,6 +18,28 @@ pip install -r requirements.txt
echo "📹 Installing decord for video processing..." echo "📹 Installing decord for video processing..."
pip install decord pip install decord
# Install CuPy for GPU acceleration of stereo validation
echo "🚀 Installing CuPy for GPU acceleration..."
# Auto-detect CUDA version and install appropriate CuPy
python -c "
import torch
if torch.cuda.is_available():
cuda_version = torch.version.cuda
print(f'CUDA version detected: {cuda_version}')
if cuda_version.startswith('11.'):
import subprocess
subprocess.run(['pip', 'install', 'cupy-cuda11x>=12.0.0'])
print('Installed CuPy for CUDA 11.x')
elif cuda_version.startswith('12.'):
import subprocess
subprocess.run(['pip', 'install', 'cupy-cuda12x>=12.0.0'])
print('Installed CuPy for CUDA 12.x')
else:
print(f'Unsupported CUDA version: {cuda_version}')
else:
print('CUDA not available, skipping CuPy installation')
"
# Install SAM2 separately (not on PyPI) # Install SAM2 separately (not on PyPI)
echo "🎯 Installing SAM2..." echo "🎯 Installing SAM2..."
pip install git+https://github.com/facebookresearch/segment-anything-2.git pip install git+https://github.com/facebookresearch/segment-anything-2.git

View File

@@ -89,7 +89,7 @@ class VR180Processor(VideoProcessor):
def combine_sbs_frame(self, left_eye: np.ndarray, right_eye: np.ndarray) -> np.ndarray: def combine_sbs_frame(self, left_eye: np.ndarray, right_eye: np.ndarray) -> np.ndarray:
""" """
Combine left and right eye frames back into side-by-side format Combine left and right eye frames back into side-by-side format with GPU acceleration
Args: Args:
left_eye: Left eye frame left_eye: Left eye frame
@@ -98,15 +98,39 @@ class VR180Processor(VideoProcessor):
Returns: Returns:
Combined SBS frame Combined SBS frame
""" """
# Ensure frames have same height try:
if left_eye.shape[0] != right_eye.shape[0]: import cupy as cp
target_height = min(left_eye.shape[0], right_eye.shape[0])
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height)) # Transfer to GPU for faster combination
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height)) left_gpu = cp.asarray(left_eye)
right_gpu = cp.asarray(right_eye)
# Combine horizontally
combined = np.hstack([left_eye, right_eye]) # Ensure frames have same height
return combined if left_gpu.shape[0] != right_gpu.shape[0]:
target_height = min(left_gpu.shape[0], right_gpu.shape[0])
# Note: OpenCV resize not available in CuPy, fall back to CPU for resize
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
left_gpu = cp.asarray(left_eye)
right_gpu = cp.asarray(right_eye)
# Combine horizontally on GPU (much faster for large arrays)
combined_gpu = cp.hstack([left_gpu, right_gpu])
# Transfer back to CPU
return cp.asnumpy(combined_gpu)
except ImportError:
# Fallback to CPU NumPy
# Ensure frames have same height
if left_eye.shape[0] != right_eye.shape[0]:
target_height = min(left_eye.shape[0], right_eye.shape[0])
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
# Combine horizontally
combined = np.hstack([left_eye, right_eye])
return combined
def process_with_disparity_mapping(self, def process_with_disparity_mapping(self,
frames: List[np.ndarray], frames: List[np.ndarray],
@@ -420,7 +444,7 @@ class VR180Processor(VideoProcessor):
left_results: List[np.ndarray], left_results: List[np.ndarray],
right_results: List[np.ndarray]) -> List[np.ndarray]: right_results: List[np.ndarray]) -> List[np.ndarray]:
""" """
Validate and correct stereo consistency between left and right eye results Validate and correct stereo consistency between left and right eye results using GPU acceleration
Args: Args:
left_results: Left eye processed frames left_results: Left eye processed frames
@@ -429,9 +453,84 @@ class VR180Processor(VideoProcessor):
Returns: Returns:
Validated right eye frames Validated right eye frames
""" """
print(f"🔍 VALIDATION: Starting stereo consistency check ({len(left_results)} frames)")
try:
import cupy as cp
return self._validate_stereo_consistency_gpu(left_results, right_results)
except ImportError:
print(" Warning: CuPy not available, using CPU validation")
return self._validate_stereo_consistency_cpu(left_results, right_results)
def _validate_stereo_consistency_gpu(self,
left_results: List[np.ndarray],
right_results: List[np.ndarray]) -> List[np.ndarray]:
"""GPU-accelerated batch stereo validation using CuPy"""
import cupy as cp
print(" Using GPU acceleration for stereo validation")
# Convert all frames to GPU at once (batch processing)
print(" Transferring frames to GPU...")
left_stack = cp.stack([cp.asarray(frame) for frame in left_results])
right_stack = cp.stack([cp.asarray(frame) for frame in right_results])
print(" Computing mask areas on GPU...")
# Batch calculate all mask areas
if left_stack.shape[3] == 4: # Alpha channel
left_masks = left_stack[:, :, :, 3] > 0
right_masks = right_stack[:, :, :, 3] > 0
else: # Green screen detection
bg_color = cp.array(self.config.output.background_color)
left_diff = cp.abs(left_stack.astype(cp.float32) - bg_color).sum(axis=3)
right_diff = cp.abs(right_stack.astype(cp.float32) - bg_color).sum(axis=3)
left_masks = left_diff > 30
right_masks = right_diff > 30
# Calculate all areas at once (massive parallel speedup)
left_areas = cp.sum(left_masks, axis=(1, 2))
right_areas = cp.sum(right_masks, axis=(1, 2))
area_ratios = right_areas.astype(cp.float32) / (left_areas.astype(cp.float32) + 1e-6)
# Find frames needing correction
needs_correction = (area_ratios < 0.5) | (area_ratios > 2.0)
correction_count = int(cp.sum(needs_correction))
print(f" GPU validation complete: {correction_count}/{len(left_results)} frames need correction")
# Transfer results back to CPU for processing
area_ratios_cpu = cp.asnumpy(area_ratios)
needs_correction_cpu = cp.asnumpy(needs_correction)
validated_frames = []
for i, (needs_fix, ratio) in enumerate(zip(needs_correction_cpu, area_ratios_cpu)):
if i % 100 == 0:
print(f" Processing validation results: {i}/{len(left_results)}")
if needs_fix:
# Apply correction
corrected_frame = self._apply_stereo_correction(
left_results[i], right_results[i], float(ratio)
)
validated_frames.append(corrected_frame)
else:
validated_frames.append(right_results[i])
print("✅ VALIDATION: GPU stereo consistency check complete")
return validated_frames
def _validate_stereo_consistency_cpu(self,
left_results: List[np.ndarray],
right_results: List[np.ndarray]) -> List[np.ndarray]:
"""CPU fallback for stereo validation"""
print(" Using CPU validation (slower)")
validated_frames = [] validated_frames = []
for i, (left_frame, right_frame) in enumerate(zip(left_results, right_results)): for i, (left_frame, right_frame) in enumerate(zip(left_results, right_results)):
if i % 50 == 0: # Progress every 50 frames
print(f" CPU validation progress: {i}/{len(left_results)}")
# Simple validation: check if mask areas are similar # Simple validation: check if mask areas are similar
left_mask_area = self._get_mask_area(left_frame) left_mask_area = self._get_mask_area(left_frame)
right_mask_area = self._get_mask_area(right_frame) right_mask_area = self._get_mask_area(right_frame)
@@ -448,6 +547,7 @@ class VR180Processor(VideoProcessor):
else: else:
validated_frames.append(right_frame) validated_frames.append(right_frame)
print("✅ VALIDATION: CPU stereo consistency check complete")
return validated_frames return validated_frames
def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]: def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]:
@@ -465,15 +565,34 @@ class VR180Processor(VideoProcessor):
return empty_frames return empty_frames
def _get_mask_area(self, frame: np.ndarray) -> float: def _get_mask_area(self, frame: np.ndarray) -> float:
"""Get mask area from processed frame""" """Get mask area from processed frame using GPU acceleration"""
if frame.shape[2] == 4: # Alpha channel try:
mask = frame[:, :, 3] > 0 import cupy as cp
else: # Green screen - detect non-background pixels
bg_color = np.array(self.config.output.background_color) # Transfer to GPU
diff = np.abs(frame.astype(np.float32) - bg_color).sum(axis=2) frame_gpu = cp.asarray(frame)
mask = diff > 30 # Threshold for non-background
if frame.shape[2] == 4: # Alpha channel
return np.sum(mask) mask_gpu = frame_gpu[:, :, 3] > 0
else: # Green screen - detect non-background pixels
bg_color_gpu = cp.array(self.config.output.background_color)
diff_gpu = cp.abs(frame_gpu.astype(cp.float32) - bg_color_gpu).sum(axis=2)
mask_gpu = diff_gpu > 30 # Threshold for non-background
# Calculate area on GPU and return as Python int
area = int(cp.sum(mask_gpu))
return area
except ImportError:
# Fallback to CPU NumPy if CuPy not available
if frame.shape[2] == 4: # Alpha channel
mask = frame[:, :, 3] > 0
else: # Green screen - detect non-background pixels
bg_color = np.array(self.config.output.background_color)
diff = np.abs(frame.astype(np.float32) - bg_color).sum(axis=2)
mask = diff > 30 # Threshold for non-background
return np.sum(mask)
def _apply_stereo_correction(self, def _apply_stereo_correction(self,
left_frame: np.ndarray, left_frame: np.ndarray,