From 40ae537f7ab337bf90eb2fa0f203e75c76f58c9a Mon Sep 17 00:00:00 2001 From: Scott Register Date: Sat, 26 Jul 2025 09:56:39 -0700 Subject: [PATCH] memory stuff --- config_runpod.yaml | 4 +- vr180_matting/vr180_processor.py | 76 ++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/config_runpod.yaml b/config_runpod.yaml index 2a52bff..905d6d6 100644 --- a/config_runpod.yaml +++ b/config_runpod.yaml @@ -3,8 +3,8 @@ input: processing: scale_factor: 0.5 # A40 can handle 0.5 well - chunk_size: 0 # Auto-calculate based on A40's 48GB VRAM - overlap_frames: 60 + chunk_size: 200 # Smaller chunks to prevent OOM (was auto-calculated to 423) + overlap_frames: 30 # Reduced overlap detection: confidence_threshold: 0.7 diff --git a/vr180_matting/vr180_processor.py b/vr180_matting/vr180_processor.py index 102e580..5d8e230 100644 --- a/vr180_matting/vr180_processor.py +++ b/vr180_matting/vr180_processor.py @@ -65,18 +65,25 @@ class VR180Processor(VideoProcessor): Returns: Tuple of (left_eye_frame, right_eye_frame) """ - if self.sbs_split_point == 0: - self.sbs_split_point = frame.shape[1] // 2 - - # Debug: Check if split point is valid for this frame + # Always calculate split point based on current frame width + # This handles scaled frames correctly frame_width = frame.shape[1] - if self.sbs_split_point >= frame_width: - print(f"WARNING: Split point {self.sbs_split_point} >= frame width {frame_width}") - self.sbs_split_point = frame_width // 2 - print(f"Adjusted split point to {self.sbs_split_point}") + current_split_point = frame_width // 2 - left_eye = frame[:, :self.sbs_split_point] - right_eye = frame[:, self.sbs_split_point:] + # Debug info on first use + if self.sbs_split_point == 0: + print(f"Frame dimensions: {frame.shape[1]}x{frame.shape[0]}") + print(f"Split point: {current_split_point}") + self.sbs_split_point = current_split_point # Store for reference + + left_eye = frame[:, :current_split_point] + right_eye = frame[:, current_split_point:] + + # Validate both eyes have content + if left_eye.size == 0: + raise RuntimeError(f"Left eye frame is empty after split (frame width: {frame_width})") + if right_eye.size == 0: + raise RuntimeError(f"Right eye frame is empty after split (frame width: {frame_width})") return left_eye, right_eye @@ -189,7 +196,7 @@ class VR180Processor(VideoProcessor): temp_frames_dir = temp_video_path.parent / f"frames_{temp_video_path.stem}" temp_frames_dir.mkdir(exist_ok=True) - # Save frames as individual images + # Save frames as individual images (using JPEG for smaller file size) print("Saving frames as images...") for i, frame in enumerate(eye_frames): # Check if frame is empty @@ -204,31 +211,64 @@ class VR180Processor(VideoProcessor): if i == 0: print(f"First frame to save: shape={frame.shape}, dtype={frame.dtype}, empty={frame.size == 0}") - frame_path = temp_frames_dir / f"frame_{i:06d}.png" - success = cv2.imwrite(str(frame_path), frame) + # Use JPEG instead of PNG for smaller files (faster I/O, less disk space) + frame_path = temp_frames_dir / f"frame_{i:06d}.jpg" + # Use high quality JPEG to minimize compression artifacts + success = cv2.imwrite(str(frame_path), frame, [cv2.IMWRITE_JPEG_QUALITY, 95]) if not success: print(f"Frame {i} details: shape={frame.shape}, dtype={frame.dtype}, size={frame.size}") raise RuntimeError(f"Failed to save frame {i} as image") if i % 50 == 0: print(f"Saved {i}/{len(eye_frames)} frames") + + # Force garbage collection every 100 frames to free memory + if i % 100 == 0: + import gc + gc.collect() # Use ffmpeg to create video from images import subprocess # Use the original video's framerate - access through parent class original_fps = self.fps if hasattr(self, 'fps') else 30.0 print(f"Using framerate: {original_fps} fps") - ffmpeg_cmd = [ + # Try GPU encoding first, fallback to CPU + gpu_cmd = [ 'ffmpeg', '-y', # -y to overwrite output file '-framerate', str(original_fps), - '-i', str(temp_frames_dir / 'frame_%06d.png'), - '-c:v', 'libx264', + '-i', str(temp_frames_dir / 'frame_%06d.jpg'), + '-c:v', 'h264_nvenc', # NVIDIA GPU encoder + '-preset', 'fast', # GPU preset + '-cq', '18', # Quality for GPU encoding '-pix_fmt', 'yuv420p', - '-crf', '18', # Higher quality (lower CRF) - '-preset', 'slow', # Better compression str(temp_video_path) ] + cpu_cmd = [ + 'ffmpeg', '-y', # -y to overwrite output file + '-framerate', str(original_fps), + '-i', str(temp_frames_dir / 'frame_%06d.jpg'), + '-c:v', 'libx264', # CPU encoder + '-pix_fmt', 'yuv420p', + '-crf', '18', # Quality for CPU encoding + '-preset', 'medium', + str(temp_video_path) + ] + + # Try GPU first + print(f"Trying GPU encoding: {' '.join(gpu_cmd)}") + result = subprocess.run(gpu_cmd, capture_output=True, text=True) + + if result.returncode != 0: + print("GPU encoding failed, trying CPU...") + print(f"GPU error: {result.stderr}") + ffmpeg_cmd = cpu_cmd + print(f"Using CPU encoding: {' '.join(ffmpeg_cmd)}") + result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True) + else: + print("GPU encoding successful!") + ffmpeg_cmd = gpu_cmd + print(f"Running ffmpeg: {' '.join(ffmpeg_cmd)}") result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)