working a bit faster

This commit is contained in:
2025-07-31 09:09:22 -07:00
parent 70044e1b10
commit 0057017ac4
5 changed files with 585 additions and 137 deletions

View File

@@ -459,41 +459,12 @@ class MaskProcessor:
return colored_mask
def _precompute_upscaled_masks(self, video_segments: Dict[int, Dict[int, np.ndarray]],
target_width: int, target_height: int) -> Dict[int, Dict[int, np.ndarray]]:
"""
Pre-compute all upscaled masks to avoid per-frame upscaling.
Args:
video_segments: Dictionary of frame masks from SAM2
target_width: Target frame width
target_height: Target frame height
Returns:
Dictionary with pre-upscaled masks
"""
logger.info(f"Pre-computing upscaled masks for {len(video_segments)} frames")
upscaled_segments = {}
for frame_idx, frame_masks in video_segments.items():
upscaled_frame_masks = {}
for obj_id, mask in frame_masks.items():
mask = mask.squeeze()
if mask.shape != (target_height, target_width):
upscaled_mask = cv2.resize(mask.astype(np.uint8),
(target_width, target_height),
interpolation=cv2.INTER_NEAREST)
upscaled_frame_masks[obj_id] = upscaled_mask
else:
upscaled_frame_masks[obj_id] = mask.astype(np.uint8)
upscaled_segments[frame_idx] = upscaled_frame_masks
logger.info(f"Pre-computed upscaled masks for {len(upscaled_segments)} frames")
return upscaled_segments
def process_and_save_output_video(self, video_path: str, output_video_path: str,
video_segments: Dict[int, Dict[int, np.ndarray]],
use_nvenc: bool = False, bitrate: str = "50M") -> bool:
use_nvenc: bool = False, bitrate: str = "50M",
batch_size: int = 16) -> bool:
"""
Process high-resolution frames, apply upscaled masks, and save the output video.
@@ -503,6 +474,7 @@ class MaskProcessor:
video_segments: Dictionary of frame masks
use_nvenc: Whether to use NVIDIA hardware encoding
bitrate: Output video bitrate
batch_size: Number of frames to process in a single batch
Returns:
True if successful
@@ -520,18 +492,15 @@ class MaskProcessor:
logger.info(f"Processing video: {frame_width}x{frame_height} @ {fps}fps, {total_frames} frames")
# Pre-compute all upscaled masks (Phase 1 optimization)
upscaled_segments = self._precompute_upscaled_masks(video_segments, frame_width, frame_height)
# Setup VideoWriter
out_writer = None
if self.output_mode == "alpha_channel":
# For alpha channel, we need a codec that supports transparency
success = self._setup_alpha_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
if not success:
logger.error("Failed to setup alpha channel encoder")
cap.release()
return False
use_nvenc = False # Override NVENC for alpha channel
use_nvenc = False
elif use_nvenc:
success = self._setup_nvenc_encoder(output_video_path, frame_width, frame_height, fps, bitrate)
if not success:
@@ -539,106 +508,101 @@ class MaskProcessor:
use_nvenc = False
if not use_nvenc and self.output_mode != "alpha_channel":
# Use OpenCV VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Use mp4v for better compatibility
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
if not out.isOpened():
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
if not out_writer.isOpened():
logger.error("Failed to create output video writer")
cap.release()
return False
# Process frames with batch reading (Phase 1 optimization)
# Process frames in batches
frame_idx = 0
processed_frames = 0
batch_size = 10 # Process frames in batches for better I/O performance
frame_buffer = []
# Pre-fill frame buffer
for _ in range(min(batch_size, len(upscaled_segments))):
ret, frame = cap.read()
if ret:
frame_buffer.append(frame)
else:
while frame_idx < total_frames:
batch_frames = []
batch_masks = []
# Read a batch of frames
for _ in range(batch_size):
ret, frame = cap.read()
if not ret:
break
batch_frames.append(frame)
if not batch_frames:
break
buffer_idx = 0
while frame_idx < len(upscaled_segments) and buffer_idx < len(frame_buffer):
frame = frame_buffer[buffer_idx]
if frame_idx in upscaled_segments:
# Get pre-computed upscaled masks for this frame (Phase 1 optimization)
upscaled_masks = [upscaled_segments[frame_idx][obj_id]
for obj_id in upscaled_segments[frame_idx]]
# Apply mask based on output mode (no upscaling needed - already done)
if self.output_mode == "alpha_channel":
result_frame = self.apply_mask_with_alpha(frame, upscaled_masks)
# Get masks for the current batch and perform just-in-time upscaling
for i in range(len(batch_frames)):
current_frame_idx = frame_idx + i
if current_frame_idx in video_segments:
frame_masks = video_segments[current_frame_idx]
upscaled_masks = []
for obj_id, mask in frame_masks.items():
mask = mask.squeeze()
if mask.shape != (frame_height, frame_width):
upscaled_mask = cv2.resize(mask.astype(np.uint8),
(frame_width, frame_height),
interpolation=cv2.INTER_NEAREST)
upscaled_masks.append(upscaled_mask)
else:
upscaled_masks.append(mask.astype(np.uint8))
batch_masks.append(upscaled_masks)
else:
result_frame = self.apply_green_mask(frame, upscaled_masks)
else:
# No mask for this frame
if self.output_mode == "alpha_channel":
# Create fully transparent frame for alpha channel mode
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
bgra_frame[:, :, 3] = 0 # Fully transparent
result_frame = bgra_frame
logger.warning(f"No mask for frame {frame_idx}, using transparent frame")
else:
# Use original frame for green screen mode
logger.warning(f"No mask for frame {frame_idx}, using original")
result_frame = frame
batch_masks.append([]) # No masks for this frame
# Write frame
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
self.alpha_process.stdin.write(result_frame.tobytes())
elif use_nvenc and hasattr(self, 'nvenc_process'):
self.nvenc_process.stdin.write(result_frame.tobytes())
else:
out.write(result_frame)
processed_frames += 1
frame_idx += 1
buffer_idx += 1
# Refill buffer when needed
if buffer_idx >= len(frame_buffer) and frame_idx < len(upscaled_segments):
frame_buffer.clear()
buffer_idx = 0
# Read next batch
for _ in range(min(batch_size, len(upscaled_segments) - frame_idx)):
ret, frame = cap.read()
if ret:
frame_buffer.append(frame)
# Process the batch
result_batch = []
for i, frame in enumerate(batch_frames):
masks = batch_masks[i]
if masks:
if self.output_mode == "alpha_channel":
result_frame = self.apply_mask_with_alpha(frame, masks)
else:
break
result_frame = self.apply_green_mask(frame, masks)
else:
# No mask for this frame
if self.output_mode == "alpha_channel":
bgra_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
bgra_frame[:, :, 3] = 0
result_frame = bgra_frame
else:
result_frame = frame
result_batch.append(result_frame)
# Progress logging
if processed_frames % 100 == 0:
# Write the processed batch
for result_frame in result_batch:
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
self.alpha_process.stdin.write(result_frame.tobytes())
elif use_nvenc and hasattr(self, 'nvenc_process'):
self.nvenc_process.stdin.write(result_frame.tobytes())
else:
out_writer.write(result_frame)
processed_frames += len(batch_frames)
frame_idx += len(batch_frames)
if processed_frames % 100 < batch_size:
logger.info(f"Processed {processed_frames}/{total_frames} frames")
# Cleanup
cap.release()
if self.output_mode == "alpha_channel" and hasattr(self, 'alpha_process'):
self.alpha_process.stdin.close()
self.alpha_process.wait()
if self.alpha_process.returncode != 0:
logger.error("Alpha channel encoding failed")
return False
elif use_nvenc and hasattr(self, 'nvenc_process'):
self.nvenc_process.stdin.close()
self.nvenc_process.wait()
if self.nvenc_process.returncode != 0:
logger.error("NVENC encoding failed")
return False
else:
out.release()
if out_writer:
out_writer.release()
logger.info(f"Successfully processed {processed_frames} frames to {output_video_path}")
return True
except Exception as e:
logger.error(f"Error processing video: {e}")
logger.error(f"Error processing video: {e}", exc_info=True)
return False
def _setup_nvenc_encoder(self, output_path: str, width: int, height: int,
@@ -751,6 +715,14 @@ class MaskProcessor:
if success:
logger.info(f"Successfully created {self.output_mode} video: {output_video}")
# Mark segment as completed only after video is successfully written
try:
output_done_file = os.path.join(segment_info['directory'], "output_frames_done")
with open(output_done_file, 'w') as f:
f.write(f"Segment {segment_info['index']} processed and saved successfully.")
logger.debug(f"Created completion marker for segment {segment_info['index']}")
except Exception as e:
logger.error(f"Failed to create completion marker for segment {segment_info['index']}: {e}")
else:
logger.error(f"Failed to process segment {segment_info['index']}")