From 4cc14bc0a968531fa09089371f0a8e3cfa000962 Mon Sep 17 00:00:00 2001 From: Scott Register Date: Sun, 27 Jul 2025 08:34:57 -0700 Subject: [PATCH] nvenc --- config-streaming-runpod.yaml | 8 ++--- runpod_setup.sh | 3 +- vr180_streaming/frame_writer.py | 58 ++++++++++++++++++++++++++++--- vr180_streaming/sam2_streaming.py | 5 +-- 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/config-streaming-runpod.yaml b/config-streaming-runpod.yaml index aead26d..768ed4d 100644 --- a/config-streaming-runpod.yaml +++ b/config-streaming-runpod.yaml @@ -27,7 +27,7 @@ matting: sam2_model_cfg: "sam2.1_hiera_l" # Use large model for best quality sam2_checkpoint: "segment-anything-2/checkpoints/sam2.1_hiera_large.pt" memory_offload: true # Critical for streaming - offload to CPU when needed - fp16: true # Use half precision for memory efficiency + fp16: false # Disable FP16 to avoid type mismatch with compiled models for memory efficiency continuous_correction: true # Periodically refine tracking correction_interval: 300 # Correct every 5 seconds at 60fps @@ -43,14 +43,14 @@ output: path: "/workspace/output_video.mp4" # Update with your output path format: "greenscreen" # "greenscreen" or "alpha" background_color: [0, 255, 0] # RGB for green screen - video_codec: "libx264" # CPU encoding (use "h264_nvenc" if GPU encoding works) - quality_preset: "medium" # CPU preset (ultrafast/fast/medium/slow/veryslow) + video_codec: "h264_nvenc" # GPU encoding for L40 (fallback to CPU if not available) + quality_preset: "p4" # NVENC preset (p1=fastest, p7=slowest/best quality) crf: 18 # Quality (0-51, lower = better, 18 = high quality) maintain_sbs: true # Keep side-by-side format with audio hardware: device: "cuda" - max_vram_gb: 40.0 # Conservative limit for 48GB GPU + max_vram_gb: 44.0 # Conservative limit for L40 48GB VRAM max_ram_gb: 48.0 # RunPod container RAM limit recovery: diff --git a/runpod_setup.sh b/runpod_setup.sh index d4e757b..34b635d 100755 --- a/runpod_setup.sh +++ b/runpod_setup.sh @@ -1,6 +1,7 @@ #!/bin/bash # VR180 Matting Unified Setup Script for RunPod # Supports both chunked and streaming implementations +# Optimized for L40, A6000, and other NVENC-capable GPUs set -e # Exit on error @@ -235,7 +236,7 @@ echo "===================" echo "- Streaming: Best for long videos, uses ~50GB RAM constant" echo "- Chunked: More stable but uses 100GB+ RAM in spikes" echo "- Scale factor: 0.25 (fast) → 0.5 (balanced) → 1.0 (quality)" -echo "- A6000/A100: Can handle 0.5-0.75 scale easily" +echo "- L40/A6000: Can handle 0.5-0.75 scale easily with NVENC GPU encoding" echo "- Monitor VRAM with: nvidia-smi -l 1" echo echo "🎯 Example Commands:" diff --git a/vr180_streaming/frame_writer.py b/vr180_streaming/frame_writer.py index f32d3de..f33db1f 100644 --- a/vr180_streaming/frame_writer.py +++ b/vr180_streaming/frame_writer.py @@ -11,6 +11,28 @@ import atexit import warnings +def test_nvenc_support() -> bool: + """Test if NVENC encoding is available""" + try: + # Quick test with a 1-frame video + cmd = [ + 'ffmpeg', '-f', 'lavfi', '-i', 'testsrc=duration=0.1:size=320x240:rate=1', + '-c:v', 'h264_nvenc', '-t', '0.1', '-f', 'null', '-' + ] + + result = subprocess.run( + cmd, + capture_output=True, + timeout=10, + text=True + ) + + return result.returncode == 0 + + except (subprocess.TimeoutExpired, FileNotFoundError): + return False + + class StreamingFrameWriter: """Write frames directly to ffmpeg via pipe for memory-efficient output""" @@ -36,6 +58,16 @@ class StreamingFrameWriter: self.frames_written = 0 self.ffmpeg_process = None + # Test NVENC support if GPU codec requested + if video_codec in ['h264_nvenc', 'hevc_nvenc']: + print(f"🔍 Testing NVENC support...") + if not test_nvenc_support(): + print(f"❌ NVENC not available, switching to CPU encoding") + video_codec = 'libx264' + quality_preset = 'medium' + else: + print(f"✅ NVENC available") + # Build ffmpeg command self.ffmpeg_cmd = self._build_ffmpeg_command( video_codec, quality_preset, crf @@ -134,23 +166,39 @@ class StreamingFrameWriter: # Test if ffmpeg starts successfully (quick check) import time - time.sleep(0.1) # Give ffmpeg time to fail if it's going to + time.sleep(0.2) # Give ffmpeg time to fail if it's going to if self.ffmpeg_process.poll() is not None: # Process already died - read error stderr = self.ffmpeg_process.stderr.read().decode() - raise RuntimeError(f"FFmpeg failed immediately: {stderr}") + + # Check for specific NVENC errors and provide better feedback + if 'nvenc' in ' '.join(self.ffmpeg_cmd): + if 'unsupported device' in stderr.lower(): + print(f"❌ NVENC not available on this GPU - switching to CPU encoding") + elif 'cannot load' in stderr.lower() or 'not found' in stderr.lower(): + print(f"❌ NVENC drivers not available - switching to CPU encoding") + else: + print(f"❌ NVENC encoding failed: {stderr}") + + # Try CPU fallback + print(f"🔄 Falling back to CPU encoding (libx264)...") + self.ffmpeg_cmd = self._build_ffmpeg_command('libx264', 'medium', 18) + return self._start_ffmpeg() + else: + raise RuntimeError(f"FFmpeg failed: {stderr}") # Set process to ignore SIGINT (Ctrl+C) - we'll handle it if hasattr(signal, 'pthread_sigmask'): signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT]) except Exception as e: - # Try CPU fallback if GPU encoding fails + # Final fallback if everything fails if 'nvenc' in ' '.join(self.ffmpeg_cmd): - print(f"⚠️ GPU encoding failed, trying CPU fallback...") + print(f"⚠️ GPU encoding failed with error: {e}") + print(f"🔄 Falling back to CPU encoding...") self.ffmpeg_cmd = self._build_ffmpeg_command('libx264', 'medium', 18) - self._start_ffmpeg() + return self._start_ffmpeg() else: raise RuntimeError(f"Failed to start ffmpeg: {e}") diff --git a/vr180_streaming/sam2_streaming.py b/vr180_streaming/sam2_streaming.py index 28d907b..2216c5f 100644 --- a/vr180_streaming/sam2_streaming.py +++ b/vr180_streaming/sam2_streaming.py @@ -83,9 +83,10 @@ class SAM2StreamingProcessor: # Set to eval mode self.predictor.eval() - # Enable FP16 if requested + # Note: FP16 conversion can cause type mismatches with compiled models + # Let SAM2 handle precision internally via build_sam2_video_predictor options if self.fp16 and self.device.type == 'cuda': - self.predictor = self.predictor.half() + print(" FP16 enabled via SAM2 internal settings") except Exception as e: raise RuntimeError(f"Failed to initialize SAM2 predictor: {e}")