From 40ae537f7ab337bf90eb2fa0f203e75c76f58c9a Mon Sep 17 00:00:00 2001
From: Scott Register <sreg@sreg.io>
Date: Sat, 26 Jul 2025 09:56:39 -0700
Subject: [PATCH] memory stuff

---
 config_runpod.yaml               |  4 +-
 vr180_matting/vr180_processor.py | 76 ++++++++++++++++++++++++--------
 2 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/config_runpod.yaml b/config_runpod.yaml
index 2a52bff..905d6d6 100644
--- a/config_runpod.yaml
+++ b/config_runpod.yaml
@@ -3,8 +3,8 @@ input:
   
 processing:
   scale_factor: 0.5  # A40 can handle 0.5 well
-  chunk_size: 0     # Auto-calculate based on A40's 48GB VRAM
-  overlap_frames: 60
+  chunk_size: 200   # Smaller chunks to prevent OOM (was auto-calculated to 423)
+  overlap_frames: 30 # Reduced overlap
   
 detection:
   confidence_threshold: 0.7
diff --git a/vr180_matting/vr180_processor.py b/vr180_matting/vr180_processor.py
index 102e580..5d8e230 100644
--- a/vr180_matting/vr180_processor.py
+++ b/vr180_matting/vr180_processor.py
@@ -65,18 +65,25 @@ class VR180Processor(VideoProcessor):
         Returns:
             Tuple of (left_eye_frame, right_eye_frame)
         """
-        if self.sbs_split_point == 0:
-            self.sbs_split_point = frame.shape[1] // 2
-        
-        # Debug: Check if split point is valid for this frame
+        # Always calculate split point based on current frame width
+        # This handles scaled frames correctly
         frame_width = frame.shape[1]
-        if self.sbs_split_point >= frame_width:
-            print(f"WARNING: Split point {self.sbs_split_point} >= frame width {frame_width}")
-            self.sbs_split_point = frame_width // 2
-            print(f"Adjusted split point to {self.sbs_split_point}")
+        current_split_point = frame_width // 2
         
-        left_eye = frame[:, :self.sbs_split_point]
-        right_eye = frame[:, self.sbs_split_point:]
+        # Debug info on first use
+        if self.sbs_split_point == 0:
+            print(f"Frame dimensions: {frame.shape[1]}x{frame.shape[0]}")
+            print(f"Split point: {current_split_point}")
+            self.sbs_split_point = current_split_point  # Store for reference
+        
+        left_eye = frame[:, :current_split_point]
+        right_eye = frame[:, current_split_point:]
+        
+        # Validate both eyes have content
+        if left_eye.size == 0:
+            raise RuntimeError(f"Left eye frame is empty after split (frame width: {frame_width})")
+        if right_eye.size == 0:
+            raise RuntimeError(f"Right eye frame is empty after split (frame width: {frame_width})")
         
         return left_eye, right_eye
     
@@ -189,7 +196,7 @@ class VR180Processor(VideoProcessor):
             temp_frames_dir = temp_video_path.parent / f"frames_{temp_video_path.stem}"
             temp_frames_dir.mkdir(exist_ok=True)
             
-            # Save frames as individual images
+            # Save frames as individual images (using JPEG for smaller file size)
             print("Saving frames as images...")
             for i, frame in enumerate(eye_frames):
                 # Check if frame is empty
@@ -204,31 +211,64 @@ class VR180Processor(VideoProcessor):
                 if i == 0:
                     print(f"First frame to save: shape={frame.shape}, dtype={frame.dtype}, empty={frame.size == 0}")
                 
-                frame_path = temp_frames_dir / f"frame_{i:06d}.png"
-                success = cv2.imwrite(str(frame_path), frame)
+                # Use JPEG instead of PNG for smaller files (faster I/O, less disk space)
+                frame_path = temp_frames_dir / f"frame_{i:06d}.jpg"
+                # Use high quality JPEG to minimize compression artifacts
+                success = cv2.imwrite(str(frame_path), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
                 if not success:
                     print(f"Frame {i} details: shape={frame.shape}, dtype={frame.dtype}, size={frame.size}")
                     raise RuntimeError(f"Failed to save frame {i} as image")
                 
                 if i % 50 == 0:
                     print(f"Saved {i}/{len(eye_frames)} frames")
+                
+                # Force garbage collection every 100 frames to free memory
+                if i % 100 == 0:
+                    import gc
+                    gc.collect()
             
             # Use ffmpeg to create video from images
             import subprocess
             # Use the original video's framerate - access through parent class
             original_fps = self.fps if hasattr(self, 'fps') else 30.0
             print(f"Using framerate: {original_fps} fps")
-            ffmpeg_cmd = [
+            # Try GPU encoding first, fallback to CPU
+            gpu_cmd = [
                 'ffmpeg', '-y',  # -y to overwrite output file
                 '-framerate', str(original_fps),
-                '-i', str(temp_frames_dir / 'frame_%06d.png'),
-                '-c:v', 'libx264',
+                '-i', str(temp_frames_dir / 'frame_%06d.jpg'),
+                '-c:v', 'h264_nvenc',  # NVIDIA GPU encoder
+                '-preset', 'fast',     # GPU preset
+                '-cq', '18',           # Quality for GPU encoding
                 '-pix_fmt', 'yuv420p',
-                '-crf', '18',  # Higher quality (lower CRF)
-                '-preset', 'slow',  # Better compression
                 str(temp_video_path)
             ]
             
+            cpu_cmd = [
+                'ffmpeg', '-y',  # -y to overwrite output file
+                '-framerate', str(original_fps),
+                '-i', str(temp_frames_dir / 'frame_%06d.jpg'),
+                '-c:v', 'libx264',     # CPU encoder
+                '-pix_fmt', 'yuv420p',
+                '-crf', '18',          # Quality for CPU encoding
+                '-preset', 'medium',
+                str(temp_video_path)
+            ]
+            
+            # Try GPU first
+            print(f"Trying GPU encoding: {' '.join(gpu_cmd)}")
+            result = subprocess.run(gpu_cmd, capture_output=True, text=True)
+            
+            if result.returncode != 0:
+                print("GPU encoding failed, trying CPU...")
+                print(f"GPU error: {result.stderr}")
+                ffmpeg_cmd = cpu_cmd
+                print(f"Using CPU encoding: {' '.join(ffmpeg_cmd)}")
+                result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
+            else:
+                print("GPU encoding successful!")
+                ffmpeg_cmd = gpu_cmd
+            
             print(f"Running ffmpeg: {' '.join(ffmpeg_cmd)}")
             result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)