nvenc

2025-07-27 08:34:57 -07:00
parent 9faaf4ed57
commit 4cc14bc0a9
4 changed files with 62 additions and 12 deletions
--- a/config-streaming-runpod.yaml
+++ b/config-streaming-runpod.yaml
@@ -27,7 +27,7 @@ matting:
  sam2_model_cfg: "sam2.1_hiera_l"  # Use large model for best quality
  sam2_checkpoint: "segment-anything-2/checkpoints/sam2.1_hiera_large.pt"
  memory_offload: true  # Critical for streaming - offload to CPU when needed
-  fp16: true  # Use half precision for memory efficiency
+  fp16: false  # Disable FP16 to avoid type mismatch with compiled models for memory efficiency
  continuous_correction: true  # Periodically refine tracking
  correction_interval: 300  # Correct every 5 seconds at 60fps
@@ -43,14 +43,14 @@ output:
  path: "/workspace/output_video.mp4"  # Update with your output path
  format: "greenscreen"  # "greenscreen" or "alpha"
  background_color: [0, 255, 0]  # RGB for green screen
-  video_codec: "libx264"  # CPU encoding (use "h264_nvenc" if GPU encoding works)
+  video_codec: "h264_nvenc"  # GPU encoding for L40 (fallback to CPU if not available)
-  quality_preset: "medium"  # CPU preset (ultrafast/fast/medium/slow/veryslow)
+  quality_preset: "p4"  # NVENC preset (p1=fastest, p7=slowest/best quality)
  crf: 18  # Quality (0-51, lower = better, 18 = high quality)
  maintain_sbs: true  # Keep side-by-side format with audio
 hardware:
  device: "cuda"
-  max_vram_gb: 40.0  # Conservative limit for 48GB GPU
+  max_vram_gb: 44.0  # Conservative limit for L40 48GB VRAM
  max_ram_gb: 48.0  # RunPod container RAM limit
 recovery:
--- a/runpod_setup.sh
+++ b/runpod_setup.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # VR180 Matting Unified Setup Script for RunPod
 # Supports both chunked and streaming implementations
 # Optimized for L40, A6000, and other NVENC-capable GPUs
 set -e  # Exit on error
@@ -235,7 +236,7 @@ echo "==================="
 echo "- Streaming: Best for long videos, uses ~50GB RAM constant"
 echo "- Chunked: More stable but uses 100GB+ RAM in spikes"
 echo "- Scale factor: 0.25 (fast) → 0.5 (balanced) → 1.0 (quality)"
-echo "- A6000/A100: Can handle 0.5-0.75 scale easily"
+echo "- L40/A6000: Can handle 0.5-0.75 scale easily with NVENC GPU encoding"
 echo "- Monitor VRAM with: nvidia-smi -l 1"
 echo
 echo "🎯 Example Commands:"
--- a/vr180_streaming/frame_writer.py
+++ b/vr180_streaming/frame_writer.py
@@ -11,6 +11,28 @@ import atexit
 import warnings
 def test_nvenc_support() -> bool:
    """Test if NVENC encoding is available"""
    try:
        # Quick test with a 1-frame video
        cmd = [
            'ffmpeg', '-f', 'lavfi', '-i', 'testsrc=duration=0.1:size=320x240:rate=1',
            '-c:v', 'h264_nvenc', '-t', '0.1', '-f', 'null', '-'
        ]
        result = subprocess.run(
            cmd,
            capture_output=True,
            timeout=10,
            text=True
        )
        return result.returncode == 0
    except (subprocess.TimeoutExpired, FileNotFoundError):
        return False
 class StreamingFrameWriter:
    """Write frames directly to ffmpeg via pipe for memory-efficient output"""
@@ -36,6 +58,16 @@ class StreamingFrameWriter:
        self.frames_written = 0
        self.ffmpeg_process = None
        # Test NVENC support if GPU codec requested
        if video_codec in ['h264_nvenc', 'hevc_nvenc']:
            print(f"🔍 Testing NVENC support...")
            if not test_nvenc_support():
                print(f"❌ NVENC not available, switching to CPU encoding")
                video_codec = 'libx264'
                quality_preset = 'medium'
            else:
                print(f"✅ NVENC available")
        # Build ffmpeg command
        self.ffmpeg_cmd = self._build_ffmpeg_command(
            video_codec, quality_preset, crf
@@ -134,23 +166,39 @@ class StreamingFrameWriter:
            # Test if ffmpeg starts successfully (quick check)
            import time
-            time.sleep(0.1)  # Give ffmpeg time to fail if it's going to
+            time.sleep(0.2)  # Give ffmpeg time to fail if it's going to
            if self.ffmpeg_process.poll() is not None:
                # Process already died - read error
                stderr = self.ffmpeg_process.stderr.read().decode()
-                raise RuntimeError(f"FFmpeg failed immediately: {stderr}")
+                
                # Check for specific NVENC errors and provide better feedback
                if 'nvenc' in ' '.join(self.ffmpeg_cmd):
                    if 'unsupported device' in stderr.lower():
                        print(f"❌ NVENC not available on this GPU - switching to CPU encoding")
                    elif 'cannot load' in stderr.lower() or 'not found' in stderr.lower():
                        print(f"❌ NVENC drivers not available - switching to CPU encoding")
                    else:
                        print(f"❌ NVENC encoding failed: {stderr}")
                    # Try CPU fallback
                    print(f"🔄 Falling back to CPU encoding (libx264)...")
                    self.ffmpeg_cmd = self._build_ffmpeg_command('libx264', 'medium', 18)
                    return self._start_ffmpeg()
                else:
                    raise RuntimeError(f"FFmpeg failed: {stderr}")
            # Set process to ignore SIGINT (Ctrl+C) - we'll handle it
            if hasattr(signal, 'pthread_sigmask'):
                signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT])
        except Exception as e:
-            # Try CPU fallback if GPU encoding fails
+            # Final fallback if everything fails
            if 'nvenc' in ' '.join(self.ffmpeg_cmd):
-                print(f"⚠️  GPU encoding failed, trying CPU fallback...")
+                print(f"⚠️  GPU encoding failed with error: {e}")
                print(f"🔄 Falling back to CPU encoding...")
                self.ffmpeg_cmd = self._build_ffmpeg_command('libx264', 'medium', 18)
-                self._start_ffmpeg()
+                return self._start_ffmpeg()
            else:
                raise RuntimeError(f"Failed to start ffmpeg: {e}")
--- a/vr180_streaming/sam2_streaming.py
+++ b/vr180_streaming/sam2_streaming.py
@@ -83,9 +83,10 @@ class SAM2StreamingProcessor:
            # Set to eval mode
            self.predictor.eval()
-            # Enable FP16 if requested
+            # Note: FP16 conversion can cause type mismatches with compiled models
            # Let SAM2 handle precision internally via build_sam2_video_predictor options
            if self.fp16 and self.device.type == 'cuda':
-                self.predictor = self.predictor.half()
+                print("   FP16 enabled via SAM2 internal settings")
        except Exception as e:
            raise RuntimeError(f"Failed to initialize SAM2 predictor: {e}")