Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f93abcb08 | |||
| c368d6dc97 | |||
| e7e9c5597b | |||
| 3af16df71e | |||
| df7b009a7b | |||
| 725a781456 |
@@ -10,3 +10,6 @@ tqdm>=4.65.0
|
|||||||
psutil>=5.9.0
|
psutil>=5.9.0
|
||||||
ffmpeg-python>=0.2.0
|
ffmpeg-python>=0.2.0
|
||||||
decord>=0.6.0
|
decord>=0.6.0
|
||||||
|
# GPU acceleration (optional but recommended for stereo validation speedup)
|
||||||
|
# cupy-cuda11x>=12.0.0 # For CUDA 11.x
|
||||||
|
# cupy-cuda12x>=12.0.0 # For CUDA 12.x - uncomment appropriate version
|
||||||
@@ -18,6 +18,28 @@ pip install -r requirements.txt
|
|||||||
echo "📹 Installing decord for video processing..."
|
echo "📹 Installing decord for video processing..."
|
||||||
pip install decord
|
pip install decord
|
||||||
|
|
||||||
|
# Install CuPy for GPU acceleration of stereo validation
|
||||||
|
echo "🚀 Installing CuPy for GPU acceleration..."
|
||||||
|
# Auto-detect CUDA version and install appropriate CuPy
|
||||||
|
python -c "
|
||||||
|
import torch
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
cuda_version = torch.version.cuda
|
||||||
|
print(f'CUDA version detected: {cuda_version}')
|
||||||
|
if cuda_version.startswith('11.'):
|
||||||
|
import subprocess
|
||||||
|
subprocess.run(['pip', 'install', 'cupy-cuda11x>=12.0.0'])
|
||||||
|
print('Installed CuPy for CUDA 11.x')
|
||||||
|
elif cuda_version.startswith('12.'):
|
||||||
|
import subprocess
|
||||||
|
subprocess.run(['pip', 'install', 'cupy-cuda12x>=12.0.0'])
|
||||||
|
print('Installed CuPy for CUDA 12.x')
|
||||||
|
else:
|
||||||
|
print(f'Unsupported CUDA version: {cuda_version}')
|
||||||
|
else:
|
||||||
|
print('CUDA not available, skipping CuPy installation')
|
||||||
|
"
|
||||||
|
|
||||||
# Install SAM2 separately (not on PyPI)
|
# Install SAM2 separately (not on PyPI)
|
||||||
echo "🎯 Installing SAM2..."
|
echo "🎯 Installing SAM2..."
|
||||||
pip install git+https://github.com/facebookresearch/segment-anything-2.git
|
pip install git+https://github.com/facebookresearch/segment-anything-2.git
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import warnings
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
|
import gc
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from sam2.build_sam import build_sam2_video_predictor
|
from sam2.build_sam import build_sam2_video_predictor
|
||||||
@@ -32,6 +33,8 @@ class SAM2VideoMatting:
|
|||||||
self.device = device
|
self.device = device
|
||||||
self.memory_offload = memory_offload
|
self.memory_offload = memory_offload
|
||||||
self.fp16 = fp16
|
self.fp16 = fp16
|
||||||
|
self.model_cfg = model_cfg
|
||||||
|
self.checkpoint_path = checkpoint_path
|
||||||
self.predictor = None
|
self.predictor = None
|
||||||
self.inference_state = None
|
self.inference_state = None
|
||||||
self.video_segments = {}
|
self.video_segments = {}
|
||||||
@@ -74,7 +77,8 @@ class SAM2VideoMatting:
|
|||||||
def init_video_state(self, video_frames: List[np.ndarray] = None, video_path: str = None) -> None:
|
def init_video_state(self, video_frames: List[np.ndarray] = None, video_path: str = None) -> None:
|
||||||
"""Initialize video inference state"""
|
"""Initialize video inference state"""
|
||||||
if self.predictor is None:
|
if self.predictor is None:
|
||||||
raise RuntimeError("SAM2 model not loaded")
|
# Recreate predictor if it was cleaned up
|
||||||
|
self._load_model(self.model_cfg, self.checkpoint_path)
|
||||||
|
|
||||||
if video_path is not None:
|
if video_path is not None:
|
||||||
# Use video path directly (SAM2's preferred method)
|
# Use video path directly (SAM2's preferred method)
|
||||||
@@ -256,11 +260,23 @@ class SAM2VideoMatting:
|
|||||||
"""Clean up resources"""
|
"""Clean up resources"""
|
||||||
if self.inference_state is not None:
|
if self.inference_state is not None:
|
||||||
try:
|
try:
|
||||||
if hasattr(self.predictor, 'cleanup_state'):
|
# Reset SAM2 state first (critical for memory cleanup)
|
||||||
|
if self.predictor is not None and hasattr(self.predictor, 'reset_state'):
|
||||||
|
self.predictor.reset_state(self.inference_state)
|
||||||
|
|
||||||
|
# Fallback to cleanup_state if available
|
||||||
|
elif self.predictor is not None and hasattr(self.predictor, 'cleanup_state'):
|
||||||
self.predictor.cleanup_state(self.inference_state)
|
self.predictor.cleanup_state(self.inference_state)
|
||||||
|
|
||||||
|
# Explicitly delete inference state and video segments
|
||||||
|
del self.inference_state
|
||||||
|
if hasattr(self, 'video_segments') and self.video_segments:
|
||||||
|
del self.video_segments
|
||||||
|
self.video_segments = {}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
warnings.warn(f"Failed to cleanup SAM2 state: {e}")
|
warnings.warn(f"Failed to cleanup SAM2 state: {e}")
|
||||||
|
finally:
|
||||||
self.inference_state = None
|
self.inference_state = None
|
||||||
|
|
||||||
# Clean up temporary video file
|
# Clean up temporary video file
|
||||||
@@ -277,6 +293,22 @@ class SAM2VideoMatting:
|
|||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
# Explicitly delete predictor for fresh creation next time
|
||||||
|
if self.predictor is not None:
|
||||||
|
try:
|
||||||
|
del self.predictor
|
||||||
|
except Exception as e:
|
||||||
|
warnings.warn(f"Failed to delete predictor: {e}")
|
||||||
|
finally:
|
||||||
|
self.predictor = None
|
||||||
|
|
||||||
|
# Force garbage collection (critical for memory leak prevention)
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
"""Destructor to ensure cleanup"""
|
"""Destructor to ensure cleanup"""
|
||||||
|
try:
|
||||||
self.cleanup()
|
self.cleanup()
|
||||||
|
except Exception:
|
||||||
|
# Ignore errors during Python shutdown
|
||||||
|
pass
|
||||||
@@ -132,6 +132,26 @@ class VideoProcessor:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Clear OpenCV internal caches
|
||||||
|
try:
|
||||||
|
# Clear OpenCV video capture cache
|
||||||
|
cv2.setUseOptimized(False)
|
||||||
|
cv2.setUseOptimized(True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Clear CuPy caches if available
|
||||||
|
try:
|
||||||
|
import cupy as cp
|
||||||
|
cp._default_memory_pool.free_all_blocks()
|
||||||
|
cp._default_pinned_memory_pool.free_all_blocks()
|
||||||
|
cp.get_default_memory_pool().free_all_blocks()
|
||||||
|
cp.get_default_pinned_memory_pool().free_all_blocks()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: Could not clear CuPy cache: {e}")
|
||||||
|
|
||||||
# Force Linux to release memory back to OS
|
# Force Linux to release memory back to OS
|
||||||
if sys.platform == 'linux':
|
if sys.platform == 'linux':
|
||||||
try:
|
try:
|
||||||
@@ -623,16 +643,27 @@ class VideoProcessor:
|
|||||||
# Load and merge chunks from disk
|
# Load and merge chunks from disk
|
||||||
print("\nLoading and merging chunks...")
|
print("\nLoading and merging chunks...")
|
||||||
chunk_results = []
|
chunk_results = []
|
||||||
for chunk_file in chunk_files:
|
for i, chunk_file in enumerate(chunk_files):
|
||||||
print(f"Loading {chunk_file.name}...")
|
print(f"Loading {chunk_file.name}...")
|
||||||
chunk_data = np.load(str(chunk_file))
|
chunk_data = np.load(str(chunk_file))
|
||||||
chunk_results.append(chunk_data['frames'])
|
chunk_results.append(chunk_data['frames'])
|
||||||
chunk_data.close() # Close the file
|
chunk_data.close() # Close the file
|
||||||
|
|
||||||
|
# Delete chunk file immediately after loading to free disk space
|
||||||
|
try:
|
||||||
|
chunk_file.unlink()
|
||||||
|
print(f" Deleted chunk file {chunk_file.name}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: Could not delete chunk file: {e}")
|
||||||
|
|
||||||
|
# Aggressive cleanup every few chunks to prevent accumulation
|
||||||
|
if i % 3 == 0 and i > 0:
|
||||||
|
self._aggressive_memory_cleanup(f"after loading chunk {i}")
|
||||||
|
|
||||||
# Merge chunks
|
# Merge chunks
|
||||||
final_frames = self.merge_overlapping_chunks(chunk_results, overlap_frames)
|
final_frames = self.merge_overlapping_chunks(chunk_results, overlap_frames)
|
||||||
|
|
||||||
# Free chunk results after merging
|
# Free chunk results after merging - this is critical!
|
||||||
del chunk_results
|
del chunk_results
|
||||||
self._aggressive_memory_cleanup("after merging chunks")
|
self._aggressive_memory_cleanup("after merging chunks")
|
||||||
|
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ class VR180Processor(VideoProcessor):
|
|||||||
|
|
||||||
def combine_sbs_frame(self, left_eye: np.ndarray, right_eye: np.ndarray) -> np.ndarray:
|
def combine_sbs_frame(self, left_eye: np.ndarray, right_eye: np.ndarray) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Combine left and right eye frames back into side-by-side format
|
Combine left and right eye frames back into side-by-side format with GPU acceleration
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
left_eye: Left eye frame
|
left_eye: Left eye frame
|
||||||
@@ -98,14 +98,44 @@ class VR180Processor(VideoProcessor):
|
|||||||
Returns:
|
Returns:
|
||||||
Combined SBS frame
|
Combined SBS frame
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
|
import cupy as cp
|
||||||
|
|
||||||
|
# Transfer to GPU for faster combination
|
||||||
|
left_gpu = cp.asarray(left_eye)
|
||||||
|
right_gpu = cp.asarray(right_eye)
|
||||||
|
|
||||||
|
# Ensure frames have same height
|
||||||
|
if left_gpu.shape[0] != right_gpu.shape[0]:
|
||||||
|
target_height = min(left_gpu.shape[0], right_gpu.shape[0])
|
||||||
|
# Note: OpenCV resize not available in CuPy, fall back to CPU for resize
|
||||||
|
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
|
||||||
|
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
|
||||||
|
left_gpu = cp.asarray(left_eye)
|
||||||
|
right_gpu = cp.asarray(right_eye)
|
||||||
|
|
||||||
|
# Combine horizontally on GPU (much faster for large arrays)
|
||||||
|
combined_gpu = cp.hstack([left_gpu, right_gpu])
|
||||||
|
|
||||||
|
# Transfer back to CPU and ensure we get a copy, not a view
|
||||||
|
combined = cp.asnumpy(combined_gpu).copy()
|
||||||
|
|
||||||
|
# Free GPU memory immediately
|
||||||
|
del left_gpu, right_gpu, combined_gpu
|
||||||
|
cp._default_memory_pool.free_all_blocks()
|
||||||
|
|
||||||
|
return combined
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
# Fallback to CPU NumPy
|
||||||
# Ensure frames have same height
|
# Ensure frames have same height
|
||||||
if left_eye.shape[0] != right_eye.shape[0]:
|
if left_eye.shape[0] != right_eye.shape[0]:
|
||||||
target_height = min(left_eye.shape[0], right_eye.shape[0])
|
target_height = min(left_eye.shape[0], right_eye.shape[0])
|
||||||
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
|
left_eye = cv2.resize(left_eye, (left_eye.shape[1], target_height))
|
||||||
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
|
right_eye = cv2.resize(right_eye, (right_eye.shape[1], target_height))
|
||||||
|
|
||||||
# Combine horizontally
|
# Combine horizontally and ensure we get a copy, not a view
|
||||||
combined = np.hstack([left_eye, right_eye])
|
combined = np.hstack([left_eye, right_eye]).copy()
|
||||||
return combined
|
return combined
|
||||||
|
|
||||||
def process_with_disparity_mapping(self,
|
def process_with_disparity_mapping(self,
|
||||||
@@ -152,6 +182,10 @@ class VR180Processor(VideoProcessor):
|
|||||||
with self.memory_manager.memory_monitor(f"left eye chunk {chunk_idx}"):
|
with self.memory_manager.memory_monitor(f"left eye chunk {chunk_idx}"):
|
||||||
left_matted = self._process_eye_sequence(left_eye_frames, "left", chunk_idx)
|
left_matted = self._process_eye_sequence(left_eye_frames, "left", chunk_idx)
|
||||||
|
|
||||||
|
# Free left eye frames after processing (before right eye to save memory)
|
||||||
|
del left_eye_frames
|
||||||
|
self._aggressive_memory_cleanup(f"After left eye processing chunk {chunk_idx}")
|
||||||
|
|
||||||
# Process right eye with cross-validation
|
# Process right eye with cross-validation
|
||||||
print("Processing right eye with cross-validation...")
|
print("Processing right eye with cross-validation...")
|
||||||
with self.memory_manager.memory_monitor(f"right eye chunk {chunk_idx}"):
|
with self.memory_manager.memory_monitor(f"right eye chunk {chunk_idx}"):
|
||||||
@@ -159,6 +193,10 @@ class VR180Processor(VideoProcessor):
|
|||||||
right_eye_frames, left_matted, "right", chunk_idx
|
right_eye_frames, left_matted, "right", chunk_idx
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Free right eye frames after processing
|
||||||
|
del right_eye_frames
|
||||||
|
self._aggressive_memory_cleanup(f"After right eye processing chunk {chunk_idx}")
|
||||||
|
|
||||||
# Combine results back to SBS format
|
# Combine results back to SBS format
|
||||||
combined_frames = []
|
combined_frames = []
|
||||||
for left_frame, right_frame in zip(left_matted, right_matted):
|
for left_frame, right_frame in zip(left_matted, right_matted):
|
||||||
@@ -169,6 +207,11 @@ class VR180Processor(VideoProcessor):
|
|||||||
combined = {'left': left_frame, 'right': right_frame}
|
combined = {'left': left_frame, 'right': right_frame}
|
||||||
combined_frames.append(combined)
|
combined_frames.append(combined)
|
||||||
|
|
||||||
|
# Free the individual eye results after combining
|
||||||
|
del left_matted
|
||||||
|
del right_matted
|
||||||
|
self._aggressive_memory_cleanup(f"After combining frames chunk {chunk_idx}")
|
||||||
|
|
||||||
return combined_frames
|
return combined_frames
|
||||||
|
|
||||||
def _process_eye_sequence(self,
|
def _process_eye_sequence(self,
|
||||||
@@ -371,8 +414,9 @@ class VR180Processor(VideoProcessor):
|
|||||||
|
|
||||||
matted_frames.append(matted_frame)
|
matted_frames.append(matted_frame)
|
||||||
|
|
||||||
# Free reloaded frames
|
# Free reloaded frames and video segments completely
|
||||||
del reloaded_frames
|
del reloaded_frames
|
||||||
|
del video_segments # This holds processed masks from SAM2
|
||||||
self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)")
|
self._aggressive_memory_cleanup(f"After mask application ({eye_name} eye)")
|
||||||
|
|
||||||
return matted_frames
|
return matted_frames
|
||||||
@@ -414,13 +458,17 @@ class VR180Processor(VideoProcessor):
|
|||||||
left_eye_results, right_matted
|
left_eye_results, right_matted
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# CRITICAL: Free the intermediate results to prevent memory accumulation
|
||||||
|
del left_eye_results # Don't keep left eye results after validation
|
||||||
|
del right_matted # Don't keep unvalidated right results
|
||||||
|
|
||||||
return validated_results
|
return validated_results
|
||||||
|
|
||||||
def _validate_stereo_consistency(self,
|
def _validate_stereo_consistency(self,
|
||||||
left_results: List[np.ndarray],
|
left_results: List[np.ndarray],
|
||||||
right_results: List[np.ndarray]) -> List[np.ndarray]:
|
right_results: List[np.ndarray]) -> List[np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Validate and correct stereo consistency between left and right eye results
|
Validate and correct stereo consistency between left and right eye results using GPU acceleration
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
left_results: Left eye processed frames
|
left_results: Left eye processed frames
|
||||||
@@ -429,9 +477,120 @@ class VR180Processor(VideoProcessor):
|
|||||||
Returns:
|
Returns:
|
||||||
Validated right eye frames
|
Validated right eye frames
|
||||||
"""
|
"""
|
||||||
|
print(f"🔍 VALIDATION: Starting stereo consistency check ({len(left_results)} frames)")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cupy as cp
|
||||||
|
return self._validate_stereo_consistency_gpu(left_results, right_results)
|
||||||
|
except ImportError:
|
||||||
|
print(" Warning: CuPy not available, using CPU validation")
|
||||||
|
return self._validate_stereo_consistency_cpu(left_results, right_results)
|
||||||
|
|
||||||
|
def _validate_stereo_consistency_gpu(self,
|
||||||
|
left_results: List[np.ndarray],
|
||||||
|
right_results: List[np.ndarray]) -> List[np.ndarray]:
|
||||||
|
"""GPU-accelerated batch stereo validation using CuPy with memory-safe batching"""
|
||||||
|
import cupy as cp
|
||||||
|
|
||||||
|
print(" Using GPU acceleration for stereo validation")
|
||||||
|
|
||||||
|
# Process in batches to avoid GPU OOM
|
||||||
|
batch_size = 50 # Process 50 frames at a time (safe for 45GB GPU)
|
||||||
|
total_frames = len(left_results)
|
||||||
|
area_ratios_all = []
|
||||||
|
needs_correction_all = []
|
||||||
|
|
||||||
|
print(f" Processing {total_frames} frames in batches of {batch_size}...")
|
||||||
|
|
||||||
|
for batch_start in range(0, total_frames, batch_size):
|
||||||
|
batch_end = min(batch_start + batch_size, total_frames)
|
||||||
|
batch_frames = batch_end - batch_start
|
||||||
|
|
||||||
|
if batch_start % 100 == 0:
|
||||||
|
print(f" GPU batch {batch_start//batch_size + 1}: frames {batch_start}-{batch_end}")
|
||||||
|
|
||||||
|
# Get batch slices
|
||||||
|
left_batch = left_results[batch_start:batch_end]
|
||||||
|
right_batch = right_results[batch_start:batch_end]
|
||||||
|
|
||||||
|
# Convert batch to GPU
|
||||||
|
left_stack = cp.stack([cp.asarray(frame) for frame in left_batch])
|
||||||
|
right_stack = cp.stack([cp.asarray(frame) for frame in right_batch])
|
||||||
|
|
||||||
|
# Batch calculate mask areas for this batch
|
||||||
|
if left_stack.shape[3] == 4: # Alpha channel
|
||||||
|
left_masks = left_stack[:, :, :, 3] > 0
|
||||||
|
right_masks = right_stack[:, :, :, 3] > 0
|
||||||
|
else: # Green screen detection
|
||||||
|
bg_color = cp.array(self.config.output.background_color)
|
||||||
|
left_diff = cp.abs(left_stack.astype(cp.float32) - bg_color).sum(axis=3)
|
||||||
|
right_diff = cp.abs(right_stack.astype(cp.float32) - bg_color).sum(axis=3)
|
||||||
|
left_masks = left_diff > 30
|
||||||
|
right_masks = right_diff > 30
|
||||||
|
|
||||||
|
# Calculate areas for this batch
|
||||||
|
left_areas = cp.sum(left_masks, axis=(1, 2))
|
||||||
|
right_areas = cp.sum(right_masks, axis=(1, 2))
|
||||||
|
area_ratios = right_areas.astype(cp.float32) / (left_areas.astype(cp.float32) + 1e-6)
|
||||||
|
|
||||||
|
# Find frames needing correction in this batch
|
||||||
|
needs_correction = (area_ratios < 0.5) | (area_ratios > 2.0)
|
||||||
|
|
||||||
|
# Transfer batch results back to CPU and accumulate
|
||||||
|
area_ratios_all.extend(cp.asnumpy(area_ratios))
|
||||||
|
needs_correction_all.extend(cp.asnumpy(needs_correction))
|
||||||
|
|
||||||
|
# Free GPU memory for this batch
|
||||||
|
del left_stack, right_stack, left_masks, right_masks
|
||||||
|
del left_areas, right_areas, area_ratios, needs_correction
|
||||||
|
cp._default_memory_pool.free_all_blocks()
|
||||||
|
|
||||||
|
# CRITICAL: Release ALL CuPy memory back to system after validation
|
||||||
|
try:
|
||||||
|
# Force release of all GPU memory pools
|
||||||
|
cp._default_memory_pool.free_all_blocks()
|
||||||
|
cp._default_pinned_memory_pool.free_all_blocks()
|
||||||
|
|
||||||
|
# Clear CuPy cache completely
|
||||||
|
cp.get_default_memory_pool().free_all_blocks()
|
||||||
|
cp.get_default_pinned_memory_pool().free_all_blocks()
|
||||||
|
|
||||||
|
print(f" CuPy memory pools cleared")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: Could not clear CuPy memory pools: {e}")
|
||||||
|
|
||||||
|
correction_count = sum(needs_correction_all)
|
||||||
|
print(f" GPU validation complete: {correction_count}/{total_frames} frames need correction")
|
||||||
|
|
||||||
|
# Apply corrections using CPU results
|
||||||
|
validated_frames = []
|
||||||
|
for i, (needs_fix, ratio) in enumerate(zip(needs_correction_all, area_ratios_all)):
|
||||||
|
if i % 100 == 0:
|
||||||
|
print(f" Processing validation results: {i}/{total_frames}")
|
||||||
|
|
||||||
|
if needs_fix:
|
||||||
|
# Apply correction
|
||||||
|
corrected_frame = self._apply_stereo_correction(
|
||||||
|
left_results[i], right_results[i], float(ratio)
|
||||||
|
)
|
||||||
|
validated_frames.append(corrected_frame)
|
||||||
|
else:
|
||||||
|
validated_frames.append(right_results[i])
|
||||||
|
|
||||||
|
print("✅ VALIDATION: GPU stereo consistency check complete")
|
||||||
|
return validated_frames
|
||||||
|
|
||||||
|
def _validate_stereo_consistency_cpu(self,
|
||||||
|
left_results: List[np.ndarray],
|
||||||
|
right_results: List[np.ndarray]) -> List[np.ndarray]:
|
||||||
|
"""CPU fallback for stereo validation"""
|
||||||
|
print(" Using CPU validation (slower)")
|
||||||
validated_frames = []
|
validated_frames = []
|
||||||
|
|
||||||
for i, (left_frame, right_frame) in enumerate(zip(left_results, right_results)):
|
for i, (left_frame, right_frame) in enumerate(zip(left_results, right_results)):
|
||||||
|
if i % 50 == 0: # Progress every 50 frames
|
||||||
|
print(f" CPU validation progress: {i}/{len(left_results)}")
|
||||||
|
|
||||||
# Simple validation: check if mask areas are similar
|
# Simple validation: check if mask areas are similar
|
||||||
left_mask_area = self._get_mask_area(left_frame)
|
left_mask_area = self._get_mask_area(left_frame)
|
||||||
right_mask_area = self._get_mask_area(right_frame)
|
right_mask_area = self._get_mask_area(right_frame)
|
||||||
@@ -448,6 +607,7 @@ class VR180Processor(VideoProcessor):
|
|||||||
else:
|
else:
|
||||||
validated_frames.append(right_frame)
|
validated_frames.append(right_frame)
|
||||||
|
|
||||||
|
print("✅ VALIDATION: CPU stereo consistency check complete")
|
||||||
return validated_frames
|
return validated_frames
|
||||||
|
|
||||||
def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]:
|
def _create_empty_masks_from_count(self, num_frames: int, frame_shape: tuple) -> List[np.ndarray]:
|
||||||
@@ -465,7 +625,26 @@ class VR180Processor(VideoProcessor):
|
|||||||
return empty_frames
|
return empty_frames
|
||||||
|
|
||||||
def _get_mask_area(self, frame: np.ndarray) -> float:
|
def _get_mask_area(self, frame: np.ndarray) -> float:
|
||||||
"""Get mask area from processed frame"""
|
"""Get mask area from processed frame using GPU acceleration"""
|
||||||
|
try:
|
||||||
|
import cupy as cp
|
||||||
|
|
||||||
|
# Transfer to GPU
|
||||||
|
frame_gpu = cp.asarray(frame)
|
||||||
|
|
||||||
|
if frame.shape[2] == 4: # Alpha channel
|
||||||
|
mask_gpu = frame_gpu[:, :, 3] > 0
|
||||||
|
else: # Green screen - detect non-background pixels
|
||||||
|
bg_color_gpu = cp.array(self.config.output.background_color)
|
||||||
|
diff_gpu = cp.abs(frame_gpu.astype(cp.float32) - bg_color_gpu).sum(axis=2)
|
||||||
|
mask_gpu = diff_gpu > 30 # Threshold for non-background
|
||||||
|
|
||||||
|
# Calculate area on GPU and return as Python int
|
||||||
|
area = int(cp.sum(mask_gpu))
|
||||||
|
return area
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
# Fallback to CPU NumPy if CuPy not available
|
||||||
if frame.shape[2] == 4: # Alpha channel
|
if frame.shape[2] == 4: # Alpha channel
|
||||||
mask = frame[:, :, 3] > 0
|
mask = frame[:, :, 3] > 0
|
||||||
else: # Green screen - detect non-background pixels
|
else: # Green screen - detect non-background pixels
|
||||||
|
|||||||
Reference in New Issue
Block a user