nvenc
This commit is contained in:
@@ -27,7 +27,7 @@ matting:
|
|||||||
sam2_model_cfg: "sam2.1_hiera_l" # Use large model for best quality
|
sam2_model_cfg: "sam2.1_hiera_l" # Use large model for best quality
|
||||||
sam2_checkpoint: "segment-anything-2/checkpoints/sam2.1_hiera_large.pt"
|
sam2_checkpoint: "segment-anything-2/checkpoints/sam2.1_hiera_large.pt"
|
||||||
memory_offload: true # Critical for streaming - offload to CPU when needed
|
memory_offload: true # Critical for streaming - offload to CPU when needed
|
||||||
fp16: true # Use half precision for memory efficiency
|
fp16: false # Disable FP16 to avoid type mismatch with compiled models for memory efficiency
|
||||||
continuous_correction: true # Periodically refine tracking
|
continuous_correction: true # Periodically refine tracking
|
||||||
correction_interval: 300 # Correct every 5 seconds at 60fps
|
correction_interval: 300 # Correct every 5 seconds at 60fps
|
||||||
|
|
||||||
@@ -43,14 +43,14 @@ output:
|
|||||||
path: "/workspace/output_video.mp4" # Update with your output path
|
path: "/workspace/output_video.mp4" # Update with your output path
|
||||||
format: "greenscreen" # "greenscreen" or "alpha"
|
format: "greenscreen" # "greenscreen" or "alpha"
|
||||||
background_color: [0, 255, 0] # RGB for green screen
|
background_color: [0, 255, 0] # RGB for green screen
|
||||||
video_codec: "libx264" # CPU encoding (use "h264_nvenc" if GPU encoding works)
|
video_codec: "h264_nvenc" # GPU encoding for L40 (fallback to CPU if not available)
|
||||||
quality_preset: "medium" # CPU preset (ultrafast/fast/medium/slow/veryslow)
|
quality_preset: "p4" # NVENC preset (p1=fastest, p7=slowest/best quality)
|
||||||
crf: 18 # Quality (0-51, lower = better, 18 = high quality)
|
crf: 18 # Quality (0-51, lower = better, 18 = high quality)
|
||||||
maintain_sbs: true # Keep side-by-side format with audio
|
maintain_sbs: true # Keep side-by-side format with audio
|
||||||
|
|
||||||
hardware:
|
hardware:
|
||||||
device: "cuda"
|
device: "cuda"
|
||||||
max_vram_gb: 40.0 # Conservative limit for 48GB GPU
|
max_vram_gb: 44.0 # Conservative limit for L40 48GB VRAM
|
||||||
max_ram_gb: 48.0 # RunPod container RAM limit
|
max_ram_gb: 48.0 # RunPod container RAM limit
|
||||||
|
|
||||||
recovery:
|
recovery:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# VR180 Matting Unified Setup Script for RunPod
|
# VR180 Matting Unified Setup Script for RunPod
|
||||||
# Supports both chunked and streaming implementations
|
# Supports both chunked and streaming implementations
|
||||||
|
# Optimized for L40, A6000, and other NVENC-capable GPUs
|
||||||
|
|
||||||
set -e # Exit on error
|
set -e # Exit on error
|
||||||
|
|
||||||
@@ -235,7 +236,7 @@ echo "==================="
|
|||||||
echo "- Streaming: Best for long videos, uses ~50GB RAM constant"
|
echo "- Streaming: Best for long videos, uses ~50GB RAM constant"
|
||||||
echo "- Chunked: More stable but uses 100GB+ RAM in spikes"
|
echo "- Chunked: More stable but uses 100GB+ RAM in spikes"
|
||||||
echo "- Scale factor: 0.25 (fast) → 0.5 (balanced) → 1.0 (quality)"
|
echo "- Scale factor: 0.25 (fast) → 0.5 (balanced) → 1.0 (quality)"
|
||||||
echo "- A6000/A100: Can handle 0.5-0.75 scale easily"
|
echo "- L40/A6000: Can handle 0.5-0.75 scale easily with NVENC GPU encoding"
|
||||||
echo "- Monitor VRAM with: nvidia-smi -l 1"
|
echo "- Monitor VRAM with: nvidia-smi -l 1"
|
||||||
echo
|
echo
|
||||||
echo "🎯 Example Commands:"
|
echo "🎯 Example Commands:"
|
||||||
|
|||||||
@@ -11,6 +11,28 @@ import atexit
|
|||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
|
def test_nvenc_support() -> bool:
|
||||||
|
"""Test if NVENC encoding is available"""
|
||||||
|
try:
|
||||||
|
# Quick test with a 1-frame video
|
||||||
|
cmd = [
|
||||||
|
'ffmpeg', '-f', 'lavfi', '-i', 'testsrc=duration=0.1:size=320x240:rate=1',
|
||||||
|
'-c:v', 'h264_nvenc', '-t', '0.1', '-f', 'null', '-'
|
||||||
|
]
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
capture_output=True,
|
||||||
|
timeout=10,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.returncode == 0
|
||||||
|
|
||||||
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class StreamingFrameWriter:
|
class StreamingFrameWriter:
|
||||||
"""Write frames directly to ffmpeg via pipe for memory-efficient output"""
|
"""Write frames directly to ffmpeg via pipe for memory-efficient output"""
|
||||||
|
|
||||||
@@ -36,6 +58,16 @@ class StreamingFrameWriter:
|
|||||||
self.frames_written = 0
|
self.frames_written = 0
|
||||||
self.ffmpeg_process = None
|
self.ffmpeg_process = None
|
||||||
|
|
||||||
|
# Test NVENC support if GPU codec requested
|
||||||
|
if video_codec in ['h264_nvenc', 'hevc_nvenc']:
|
||||||
|
print(f"🔍 Testing NVENC support...")
|
||||||
|
if not test_nvenc_support():
|
||||||
|
print(f"❌ NVENC not available, switching to CPU encoding")
|
||||||
|
video_codec = 'libx264'
|
||||||
|
quality_preset = 'medium'
|
||||||
|
else:
|
||||||
|
print(f"✅ NVENC available")
|
||||||
|
|
||||||
# Build ffmpeg command
|
# Build ffmpeg command
|
||||||
self.ffmpeg_cmd = self._build_ffmpeg_command(
|
self.ffmpeg_cmd = self._build_ffmpeg_command(
|
||||||
video_codec, quality_preset, crf
|
video_codec, quality_preset, crf
|
||||||
@@ -134,23 +166,39 @@ class StreamingFrameWriter:
|
|||||||
|
|
||||||
# Test if ffmpeg starts successfully (quick check)
|
# Test if ffmpeg starts successfully (quick check)
|
||||||
import time
|
import time
|
||||||
time.sleep(0.1) # Give ffmpeg time to fail if it's going to
|
time.sleep(0.2) # Give ffmpeg time to fail if it's going to
|
||||||
|
|
||||||
if self.ffmpeg_process.poll() is not None:
|
if self.ffmpeg_process.poll() is not None:
|
||||||
# Process already died - read error
|
# Process already died - read error
|
||||||
stderr = self.ffmpeg_process.stderr.read().decode()
|
stderr = self.ffmpeg_process.stderr.read().decode()
|
||||||
raise RuntimeError(f"FFmpeg failed immediately: {stderr}")
|
|
||||||
|
# Check for specific NVENC errors and provide better feedback
|
||||||
|
if 'nvenc' in ' '.join(self.ffmpeg_cmd):
|
||||||
|
if 'unsupported device' in stderr.lower():
|
||||||
|
print(f"❌ NVENC not available on this GPU - switching to CPU encoding")
|
||||||
|
elif 'cannot load' in stderr.lower() or 'not found' in stderr.lower():
|
||||||
|
print(f"❌ NVENC drivers not available - switching to CPU encoding")
|
||||||
|
else:
|
||||||
|
print(f"❌ NVENC encoding failed: {stderr}")
|
||||||
|
|
||||||
|
# Try CPU fallback
|
||||||
|
print(f"🔄 Falling back to CPU encoding (libx264)...")
|
||||||
|
self.ffmpeg_cmd = self._build_ffmpeg_command('libx264', 'medium', 18)
|
||||||
|
return self._start_ffmpeg()
|
||||||
|
else:
|
||||||
|
raise RuntimeError(f"FFmpeg failed: {stderr}")
|
||||||
|
|
||||||
# Set process to ignore SIGINT (Ctrl+C) - we'll handle it
|
# Set process to ignore SIGINT (Ctrl+C) - we'll handle it
|
||||||
if hasattr(signal, 'pthread_sigmask'):
|
if hasattr(signal, 'pthread_sigmask'):
|
||||||
signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT])
|
signal.pthread_sigmask(signal.SIG_BLOCK, [signal.SIGINT])
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Try CPU fallback if GPU encoding fails
|
# Final fallback if everything fails
|
||||||
if 'nvenc' in ' '.join(self.ffmpeg_cmd):
|
if 'nvenc' in ' '.join(self.ffmpeg_cmd):
|
||||||
print(f"⚠️ GPU encoding failed, trying CPU fallback...")
|
print(f"⚠️ GPU encoding failed with error: {e}")
|
||||||
|
print(f"🔄 Falling back to CPU encoding...")
|
||||||
self.ffmpeg_cmd = self._build_ffmpeg_command('libx264', 'medium', 18)
|
self.ffmpeg_cmd = self._build_ffmpeg_command('libx264', 'medium', 18)
|
||||||
self._start_ffmpeg()
|
return self._start_ffmpeg()
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(f"Failed to start ffmpeg: {e}")
|
raise RuntimeError(f"Failed to start ffmpeg: {e}")
|
||||||
|
|
||||||
|
|||||||
@@ -83,9 +83,10 @@ class SAM2StreamingProcessor:
|
|||||||
# Set to eval mode
|
# Set to eval mode
|
||||||
self.predictor.eval()
|
self.predictor.eval()
|
||||||
|
|
||||||
# Enable FP16 if requested
|
# Note: FP16 conversion can cause type mismatches with compiled models
|
||||||
|
# Let SAM2 handle precision internally via build_sam2_video_predictor options
|
||||||
if self.fp16 and self.device.type == 'cuda':
|
if self.fp16 and self.device.type == 'cuda':
|
||||||
self.predictor = self.predictor.half()
|
print(" FP16 enabled via SAM2 internal settings")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Failed to initialize SAM2 predictor: {e}")
|
raise RuntimeError(f"Failed to initialize SAM2 predictor: {e}")
|
||||||
|
|||||||
Reference in New Issue
Block a user