use cuda for mask
This commit is contained in:
@@ -26,6 +26,7 @@
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import cupy as cp
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import torch
|
||||
import logging
|
||||
@@ -91,60 +92,80 @@ def load_previous_segment_mask(prev_segment_dir):
|
||||
|
||||
return per_obj_input_mask, input_palette
|
||||
|
||||
|
||||
def apply_green_mask(frame, masks):
|
||||
"""
|
||||
Applies masks to the frame, replacing the background with green.
|
||||
|
||||
Parameters:
|
||||
|
||||
- frame: numpy array representing the image frame.
|
||||
- masks: list of numpy arrays representing the masks.
|
||||
|
||||
Returns:
|
||||
- result_frame: numpy array with the green background applied.
|
||||
"""
|
||||
# Initialize combined mask as a boolean array
|
||||
combined_mask = np.zeros(frame.shape[:2], dtype=bool)
|
||||
|
||||
# Convert frame and masks to CuPy arrays
|
||||
frame_gpu = cp.asarray(frame)
|
||||
combined_mask = cp.zeros(frame_gpu.shape[:2], dtype=cp.bool_)
|
||||
|
||||
for mask in masks:
|
||||
mask = mask.squeeze()
|
||||
|
||||
# Resize the mask if necessary
|
||||
if mask.shape != frame.shape[:2]:
|
||||
# Resize the mask using bilinear interpolation
|
||||
|
||||
# and convert it to float32 for accurate interpolation
|
||||
resized_mask = cv2.resize(
|
||||
mask.astype(np.float32),
|
||||
(frame.shape[1], frame.shape[0]),
|
||||
interpolation=cv2.INTER_CUBIC
|
||||
)
|
||||
# Threshold the resized mask to obtain a boolean mask
|
||||
# add a small gausian blur to the mask to smooth out the edges
|
||||
blurred_mask = cv2.GaussianBlur(resized_mask, (5, 5), 0)
|
||||
|
||||
combined_mask = np.maximum(combined_mask, blurred_mask)
|
||||
|
||||
mask = resized_mask > 0.5
|
||||
mask_gpu = cp.asarray(mask.squeeze())
|
||||
if mask_gpu.shape != frame_gpu.shape[:2]:
|
||||
resized_mask = cv2.resize(cp.asnumpy(mask_gpu).astype(cp.float32),
|
||||
(frame_gpu.shape[1], frame_gpu.shape[0]))
|
||||
mask_gpu = cp.asarray(resized_mask > 0.5) # Convert back to CuPy boolean array
|
||||
else:
|
||||
# Ensure mask is boolean
|
||||
mask = mask.astype(bool)
|
||||
mask_gpu = mask_gpu.astype(cp.bool_) # Ensure boolean type
|
||||
combined_mask |= mask_gpu # Perform the bitwise OR operation
|
||||
|
||||
# Combine masks using logical OR
|
||||
combined_mask |= mask # Now both arrays are bool
|
||||
green_background = cp.full(frame_gpu.shape, cp.array([0, 255, 0], dtype=cp.uint8), dtype=cp.uint8)
|
||||
result_frame = cp.where(combined_mask[..., None], frame_gpu, green_background)
|
||||
return cp.asnumpy(result_frame) # Convert back to NumPy
|
||||
|
||||
# Create a green background image
|
||||
green_background = np.full_like(frame, [0, 255, 0])
|
||||
# Use combined mask to overlay the original frame onto the green background
|
||||
result_frame = np.where(
|
||||
combined_mask[..., None],
|
||||
|
||||
frame,
|
||||
green_background
|
||||
)
|
||||
|
||||
return result_frame
|
||||
# def apply_green_mask(frame, masks):
|
||||
# """
|
||||
# Applies masks to the frame, replacing the background with green.
|
||||
#
|
||||
# Parameters:
|
||||
#
|
||||
# - frame: numpy array representing the image frame.
|
||||
# - masks: list of numpy arrays representing the masks.
|
||||
#
|
||||
# Returns:
|
||||
# - result_frame: numpy array with the green background applied.
|
||||
# """
|
||||
# # Initialize combined mask as a boolean array
|
||||
# combined_mask = cp.zeros(frame.shape[:2], dtype=bool)
|
||||
#
|
||||
#
|
||||
# for mask in masks:
|
||||
# mask = mask.squeeze()
|
||||
#
|
||||
# # Resize the mask if necessary
|
||||
# if mask.shape != frame.shape[:2]:
|
||||
# # Resize the mask using bilinear interpolation
|
||||
#
|
||||
# # and convert it to float32 for accurate interpolation
|
||||
# resized_mask = cv2.resize(
|
||||
# mask.astype(cp.float32),
|
||||
# (frame.shape[1], frame.shape[0]),
|
||||
# interpolation=cv2.INTER_CUBIC
|
||||
# )
|
||||
# # Threshold the resized mask to obtain a boolean mask
|
||||
# # add a small gausian blur to the mask to smooth out the edges
|
||||
#
|
||||
# mask = resized_mask > 0.5
|
||||
# else:
|
||||
# # Ensure mask is boolean
|
||||
# mask = mask.astype(bool)
|
||||
#
|
||||
# # Combine masks using logical OR
|
||||
# combined_mask |= mask # Now both arrays are bool
|
||||
#
|
||||
# # Create a green background image
|
||||
# green_background = cp.full_like(frame, [0, 255, 0])
|
||||
# # Use combined mask to overlay the original frame onto the green background
|
||||
# result_frame = cp.where(
|
||||
# combined_mask[..., None],
|
||||
#
|
||||
# frame,
|
||||
# green_background
|
||||
# )
|
||||
# #result_frame = frame.copy()
|
||||
# #result_frame[~combined_mask] = [0, 255, 0]
|
||||
#
|
||||
# return result_frame
|
||||
|
||||
def initialize_predictor():
|
||||
if torch.cuda.is_available():
|
||||
@@ -194,7 +215,7 @@ def load_first_frame(video_path, scale=1.0):
|
||||
|
||||
return frame
|
||||
|
||||
def select_points(first_frame):
|
||||
def select_points_old(first_frame):
|
||||
points_a = []
|
||||
points_b = []
|
||||
current_object = 'A'
|
||||
@@ -231,6 +252,43 @@ def select_points(first_frame):
|
||||
cv2.destroyAllWindows()
|
||||
return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32)
|
||||
|
||||
def select_points(first_frame):
|
||||
points_a = []
|
||||
point_count = 0
|
||||
selection_complete = False
|
||||
frame_width = first_frame.shape[1]
|
||||
half_frame_width = frame_width // 2 # Integer division for pixel coordinates
|
||||
|
||||
def mouse_callback(event, x, y, flags, param):
|
||||
nonlocal points_a, point_count, selection_complete
|
||||
if event == cv2.EVENT_LBUTTONDOWN:
|
||||
points_a.append((x, y))
|
||||
point_count += 1
|
||||
print(f"Selected point {point_count} for Object A: ({x}, {y})")
|
||||
if len(points_a) == 5: # Collect 5 points for Object A
|
||||
selection_complete = True
|
||||
|
||||
print("Select 5 points for Object A (left side)")
|
||||
cv2.namedWindow('Select Points', cv2.WINDOW_NORMAL)
|
||||
cv2.resizeWindow('Select Points', int(first_frame.shape[1] * (500 / first_frame.shape[0])), 500)
|
||||
cv2.imshow('Select Points', first_frame)
|
||||
cv2.setMouseCallback('Select Points', mouse_callback)
|
||||
|
||||
while not selection_complete:
|
||||
cv2.waitKey(1)
|
||||
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Automatically generate points for Object B by shifting x-coordinates
|
||||
points_a = np.array(points_a, dtype=np.float32)
|
||||
points_b = points_a.copy()
|
||||
points_b[:, 0] += half_frame_width # Shift x-coordinate by half the frame width
|
||||
|
||||
# Ensure that the shifted points are within the frame boundaries
|
||||
points_b[:, 0] = np.clip(points_b[:, 0], 0, frame_width - 1)
|
||||
|
||||
return points_a, points_b
|
||||
|
||||
def add_points_to_predictor(predictor, inference_state, points, obj_id):
|
||||
labels = np.array([1, 1, 1, 1, 1], np.int32) # Update labels to match 4 points
|
||||
points = np.array(points, dtype=np.float32) # Ensure points have shape (4, 2)
|
||||
@@ -309,7 +367,7 @@ def process_and_save_output_video(video_path, output_video_path, video_segments,
|
||||
'-i', '-', # Input from stdin
|
||||
'-an', # No audio
|
||||
'-vcodec', encoder,
|
||||
'-pix_fmt', 'yuv420p',
|
||||
'-pix_fmt', 'nv12',
|
||||
'-preset', 'slow',
|
||||
'-b:v', '50M',
|
||||
output_video_path
|
||||
|
||||
Reference in New Issue
Block a user