use cuda for mask

This commit is contained in:
2024-12-07 12:04:58 -08:00
parent ad5db644fa
commit 61d525dc83
3 changed files with 115 additions and 52 deletions

View File

@@ -26,6 +26,7 @@
import os
import cv2
import numpy as np
import cupy as cp
from concurrent.futures import ThreadPoolExecutor
import torch
import logging
@@ -91,60 +92,80 @@ def load_previous_segment_mask(prev_segment_dir):
return per_obj_input_mask, input_palette
def apply_green_mask(frame, masks):
"""
Applies masks to the frame, replacing the background with green.
Parameters:
- frame: numpy array representing the image frame.
- masks: list of numpy arrays representing the masks.
Returns:
- result_frame: numpy array with the green background applied.
"""
# Initialize combined mask as a boolean array
combined_mask = np.zeros(frame.shape[:2], dtype=bool)
# Convert frame and masks to CuPy arrays
frame_gpu = cp.asarray(frame)
combined_mask = cp.zeros(frame_gpu.shape[:2], dtype=cp.bool_)
for mask in masks:
mask = mask.squeeze()
# Resize the mask if necessary
if mask.shape != frame.shape[:2]:
# Resize the mask using bilinear interpolation
# and convert it to float32 for accurate interpolation
resized_mask = cv2.resize(
mask.astype(np.float32),
(frame.shape[1], frame.shape[0]),
interpolation=cv2.INTER_CUBIC
)
# Threshold the resized mask to obtain a boolean mask
# add a small gausian blur to the mask to smooth out the edges
blurred_mask = cv2.GaussianBlur(resized_mask, (5, 5), 0)
combined_mask = np.maximum(combined_mask, blurred_mask)
mask = resized_mask > 0.5
mask_gpu = cp.asarray(mask.squeeze())
if mask_gpu.shape != frame_gpu.shape[:2]:
resized_mask = cv2.resize(cp.asnumpy(mask_gpu).astype(cp.float32),
(frame_gpu.shape[1], frame_gpu.shape[0]))
mask_gpu = cp.asarray(resized_mask > 0.5) # Convert back to CuPy boolean array
else:
# Ensure mask is boolean
mask = mask.astype(bool)
mask_gpu = mask_gpu.astype(cp.bool_) # Ensure boolean type
combined_mask |= mask_gpu # Perform the bitwise OR operation
# Combine masks using logical OR
combined_mask |= mask # Now both arrays are bool
green_background = cp.full(frame_gpu.shape, cp.array([0, 255, 0], dtype=cp.uint8), dtype=cp.uint8)
result_frame = cp.where(combined_mask[..., None], frame_gpu, green_background)
return cp.asnumpy(result_frame) # Convert back to NumPy
# Create a green background image
green_background = np.full_like(frame, [0, 255, 0])
# Use combined mask to overlay the original frame onto the green background
result_frame = np.where(
combined_mask[..., None],
frame,
green_background
)
return result_frame
# def apply_green_mask(frame, masks):
# """
# Applies masks to the frame, replacing the background with green.
#
# Parameters:
#
# - frame: numpy array representing the image frame.
# - masks: list of numpy arrays representing the masks.
#
# Returns:
# - result_frame: numpy array with the green background applied.
# """
# # Initialize combined mask as a boolean array
# combined_mask = cp.zeros(frame.shape[:2], dtype=bool)
#
#
# for mask in masks:
# mask = mask.squeeze()
#
# # Resize the mask if necessary
# if mask.shape != frame.shape[:2]:
# # Resize the mask using bilinear interpolation
#
# # and convert it to float32 for accurate interpolation
# resized_mask = cv2.resize(
# mask.astype(cp.float32),
# (frame.shape[1], frame.shape[0]),
# interpolation=cv2.INTER_CUBIC
# )
# # Threshold the resized mask to obtain a boolean mask
# # add a small gausian blur to the mask to smooth out the edges
#
# mask = resized_mask > 0.5
# else:
# # Ensure mask is boolean
# mask = mask.astype(bool)
#
# # Combine masks using logical OR
# combined_mask |= mask # Now both arrays are bool
#
# # Create a green background image
# green_background = cp.full_like(frame, [0, 255, 0])
# # Use combined mask to overlay the original frame onto the green background
# result_frame = cp.where(
# combined_mask[..., None],
#
# frame,
# green_background
# )
# #result_frame = frame.copy()
# #result_frame[~combined_mask] = [0, 255, 0]
#
# return result_frame
def initialize_predictor():
if torch.cuda.is_available():
@@ -194,7 +215,7 @@ def load_first_frame(video_path, scale=1.0):
return frame
def select_points(first_frame):
def select_points_old(first_frame):
points_a = []
points_b = []
current_object = 'A'
@@ -231,6 +252,43 @@ def select_points(first_frame):
cv2.destroyAllWindows()
return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32)
def select_points(first_frame):
points_a = []
point_count = 0
selection_complete = False
frame_width = first_frame.shape[1]
half_frame_width = frame_width // 2 # Integer division for pixel coordinates
def mouse_callback(event, x, y, flags, param):
nonlocal points_a, point_count, selection_complete
if event == cv2.EVENT_LBUTTONDOWN:
points_a.append((x, y))
point_count += 1
print(f"Selected point {point_count} for Object A: ({x}, {y})")
if len(points_a) == 5: # Collect 5 points for Object A
selection_complete = True
print("Select 5 points for Object A (left side)")
cv2.namedWindow('Select Points', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Select Points', int(first_frame.shape[1] * (500 / first_frame.shape[0])), 500)
cv2.imshow('Select Points', first_frame)
cv2.setMouseCallback('Select Points', mouse_callback)
while not selection_complete:
cv2.waitKey(1)
cv2.destroyAllWindows()
# Automatically generate points for Object B by shifting x-coordinates
points_a = np.array(points_a, dtype=np.float32)
points_b = points_a.copy()
points_b[:, 0] += half_frame_width # Shift x-coordinate by half the frame width
# Ensure that the shifted points are within the frame boundaries
points_b[:, 0] = np.clip(points_b[:, 0], 0, frame_width - 1)
return points_a, points_b
def add_points_to_predictor(predictor, inference_state, points, obj_id):
labels = np.array([1, 1, 1, 1, 1], np.int32) # Update labels to match 4 points
points = np.array(points, dtype=np.float32) # Ensure points have shape (4, 2)
@@ -309,7 +367,7 @@ def process_and_save_output_video(video_path, output_video_path, video_segments,
'-i', '-', # Input from stdin
'-an', # No audio
'-vcodec', encoder,
'-pix_fmt', 'yuv420p',
'-pix_fmt', 'nv12',
'-preset', 'slow',
'-b:v', '50M',
output_video_path