optimizations A round 1

2025-07-26 11:04:04 -07:00
parent 40ae537f7a
commit b642b562f0
4 changed files with 353 additions and 13 deletions
--- a/vr180_matting/config.py
+++ b/vr180_matting/config.py
@@ -37,6 +37,8 @@ class OutputConfig:
    format: str = "alpha"
    background_color: List[int] = None
    maintain_sbs: bool = True
+    preserve_audio: bool = True
+    verify_sync: bool = True

    def __post_init__(self):
        if self.background_color is None:
@@ -99,7 +101,9 @@ class VR180Config:
                'path': self.output.path,
                'format': self.output.format,
                'background_color': self.output.background_color,
-                'maintain_sbs': self.output.maintain_sbs
+                'maintain_sbs': self.output.maintain_sbs,
+                'preserve_audio': self.output.preserve_audio,
+                'verify_sync': self.output.verify_sync
            },
            'hardware': {
                'device': self.hardware.device,
--- a/vr180_matting/video_processor.py
+++ b/vr180_matting/video_processor.py
@@ -7,6 +7,8 @@ import tempfile
 import shutil
 from tqdm import tqdm
 import warnings
+import time
+import subprocess

 from .config import VR180Config
 from .detector import YOLODetector
@@ -35,6 +37,16 @@ class VideoProcessor:
        self.frame_width = 0
        self.frame_height = 0
        
+        # Processing statistics
+        self.processing_stats = {
+            'start_time': None,
+            'end_time': None,
+            'total_duration': 0,
+            'processing_fps': 0,
+            'chunks_processed': 0,
+            'frames_processed': 0
+        }
+        
        self._initialize_models()
    
    def _initialize_models(self):
@@ -348,25 +360,109 @@ class VideoProcessor:
        print(f"Saved {len(frames)} PNG frames to {output_dir}")
    
    def _save_mp4_video(self, frames: List[np.ndarray], output_path: str):
-        """Save frames as MP4 video"""
+        """Save frames as MP4 video with audio preservation"""
        if not frames:
            return
        
-        height, width = frames[0].shape[:2]
+        output_path = Path(output_path)
+        temp_frames_dir = output_path.parent / f"temp_frames_{output_path.stem}"
+        temp_frames_dir.mkdir(exist_ok=True)
        
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        writer = cv2.VideoWriter(output_path, fourcc, self.fps, (width, height))
+        try:
+            # Save frames as images
+            print("Saving frames as images...")
+            for i, frame in enumerate(tqdm(frames, desc="Saving frames")):
+                if frame.shape[2] == 4:  # Convert RGBA to BGR
+                    frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
+                
+                frame_path = temp_frames_dir / f"frame_{i:06d}.jpg"
+                cv2.imwrite(str(frame_path), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
+            
+            # Create video with ffmpeg
+            self._create_video_with_ffmpeg(temp_frames_dir, output_path, len(frames))
+            
+        finally:
+            # Cleanup temporary frames
+            if temp_frames_dir.exists():
+                shutil.rmtree(temp_frames_dir)
+    
+    def _create_video_with_ffmpeg(self, frames_dir: Path, output_path: Path, frame_count: int):
+        """Create video using ffmpeg with audio preservation"""
+        frame_pattern = str(frames_dir / "frame_%06d.jpg")
        
-        for frame in tqdm(frames, desc="Writing video"):
-            if frame.shape[2] == 4:  # Convert RGBA to BGR
-                frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
-            writer.write(frame)
+        if self.config.output.preserve_audio:
+            # Create video with audio from input
+            cmd = [
+                'ffmpeg', '-y',
+                '-framerate', str(self.fps),
+                '-i', frame_pattern,
+                '-i', str(self.config.input.video_path),  # Input video for audio
+                '-c:v', 'h264_nvenc',  # Try GPU encoding first
+                '-preset', 'fast',
+                '-cq', '18',
+                '-c:a', 'copy',  # Copy audio without re-encoding
+                '-map', '0:v:0',  # Map video from frames
+                '-map', '1:a:0',  # Map audio from input video
+                '-shortest',     # Match shortest stream duration
+                '-pix_fmt', 'yuv420p',
+                str(output_path)
+            ]
+        else:
+            # Create video without audio
+            cmd = [
+                'ffmpeg', '-y',
+                '-framerate', str(self.fps),
+                '-i', frame_pattern,
+                '-c:v', 'h264_nvenc',
+                '-preset', 'fast',
+                '-cq', '18',
+                '-pix_fmt', 'yuv420p',
+                str(output_path)
+            ]
+        
+        print(f"Creating video with ffmpeg...")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        
+        if result.returncode != 0:
+            # Try CPU encoding as fallback
+            print("GPU encoding failed, trying CPU encoding...")
+            cmd[cmd.index('h264_nvenc')] = 'libx264'
+            cmd[cmd.index('-cq')] = '-crf'  # Change quality parameter for CPU
+            
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            
+            if result.returncode != 0:
+                print(f"FFmpeg stdout: {result.stdout}")
+                print(f"FFmpeg stderr: {result.stderr}")
+                raise RuntimeError(f"FFmpeg failed with return code {result.returncode}")
+        
+        # Verify frame count if sync verification is enabled
+        if self.config.output.verify_sync:
+            self._verify_frame_count(output_path, frame_count)
        
-        writer.release()
        print(f"Saved video to {output_path}")
    
+    def _verify_frame_count(self, video_path: Path, expected_frames: int):
+        """Verify output video has correct frame count"""
+        try:
+            probe = ffmpeg.probe(str(video_path))
+            video_stream = next(
+                (stream for stream in probe['streams'] if stream['codec_type'] == 'video'), 
+                None
+            )
+            
+            if video_stream:
+                actual_frames = int(video_stream.get('nb_frames', 0))
+                if actual_frames != expected_frames:
+                    print(f"⚠️  Frame count mismatch: expected {expected_frames}, got {actual_frames}")
+                else:
+                    print(f"✅ Frame count verified: {actual_frames} frames")
+        except Exception as e:
+            print(f"⚠️  Could not verify frame count: {e}")
+    
    def process_video(self) -> None:
        """Main video processing pipeline"""
+        self.processing_stats['start_time'] = time.time()
        print("Starting VR180 video processing...")
        
        # Load video info
@@ -397,6 +493,10 @@ class VideoProcessor:
            matted_frames = self.process_chunk(frames, chunk_idx)
            chunk_results.append(matted_frames)
            
+            # Update statistics
+            self.processing_stats['chunks_processed'] += 1
+            self.processing_stats['frames_processed'] += len(frames)
+            
            # Memory cleanup
            self.memory_manager.cleanup_memory()
            
@@ -411,7 +511,43 @@ class VideoProcessor:
        print(f"Saving {len(final_frames)} processed frames...")
        self.save_video(final_frames, self.config.output.path)
        
+        # Calculate final statistics
+        self.processing_stats['end_time'] = time.time()
+        self.processing_stats['total_duration'] = self.processing_stats['end_time'] - self.processing_stats['start_time']
+        if self.processing_stats['total_duration'] > 0:
+            self.processing_stats['processing_fps'] = self.processing_stats['frames_processed'] / self.processing_stats['total_duration']
+        
+        # Print processing statistics
+        self._print_processing_statistics()
+        
        # Print final memory report
        self.memory_manager.print_memory_report()
        
-        print("Video processing completed!")
+        print("Video processing completed!")
+    
+    def _print_processing_statistics(self):
+        """Print detailed processing statistics"""
+        stats = self.processing_stats
+        video_duration = self.total_frames / self.fps if self.fps > 0 else 0
+        
+        print("\n" + "="*60)
+        print("PROCESSING STATISTICS")
+        print("="*60)
+        print(f"Input video duration: {video_duration:.1f} seconds ({self.total_frames} frames @ {self.fps:.2f} fps)")
+        print(f"Total processing time: {stats['total_duration']:.1f} seconds")
+        print(f"Processing speed: {stats['processing_fps']:.2f} fps")
+        print(f"Speedup factor: {self.fps / stats['processing_fps']:.1f}x slower than realtime")
+        print(f"Chunks processed: {stats['chunks_processed']}")
+        print(f"Frames processed: {stats['frames_processed']}")
+        
+        if video_duration > 0:
+            efficiency = video_duration / stats['total_duration']
+            print(f"Processing efficiency: {efficiency:.3f} (1.0 = realtime)")
+            
+            # Estimate time for different video lengths
+            print(f"\nEstimated processing times:")
+            print(f"  5 minutes: {(5 * 60) / efficiency / 60:.1f} minutes")
+            print(f"  30 minutes: {(30 * 60) / efficiency / 60:.1f} minutes") 
+            print(f"  1 hour: {(60 * 60) / efficiency / 60:.1f} minutes")
+        
+        print("="*60 + "\n")