use cuda for mask

This commit is contained in:
2024-12-07 12:04:58 -08:00
parent ad5db644fa
commit 61d525dc83
3 changed files with 115 additions and 52 deletions

View File

@@ -23,18 +23,23 @@ for dir in $dirs; do
segment_name=$(basename "$dir") segment_name=$(basename "$dir")
segment_num=$(echo "$segment_name" | sed 's/segment_//') segment_num=$(echo "$segment_name" | sed 's/segment_//')
#formatted_segment_number=$(printf "%03d" "$segment_num")
output_file="$dir/output_$segment_num.mp4" output_file="$dir/output_$segment_num.mp4"
#output_file="$dir/output_${formatted_segment_num}.mp4"
if [ -f "$output_file" ]; then if [ -f "$output_file" ]; then
echo "file '$output_file'" >> "$FILE_LIST" echo "file '$output_file'" >> "$FILE_LIST"
else else
echo "No output_$segment_num.mp4 found in $dir" echo "No $output_file found in $dir"
fi fi
done done
# Run ffmpeg to concatenate the videos # Run ffmpeg to concatenate the videos
ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined.mp4 ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined_test.mp4
# Remove the temporary file # Remove the temporary file
rm "$FILE_LIST" rm "$FILE_LIST"

View File

@@ -26,6 +26,7 @@
import os import os
import cv2 import cv2
import numpy as np import numpy as np
import cupy as cp
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import torch import torch
import logging import logging
@@ -91,60 +92,80 @@ def load_previous_segment_mask(prev_segment_dir):
return per_obj_input_mask, input_palette return per_obj_input_mask, input_palette
def apply_green_mask(frame, masks): def apply_green_mask(frame, masks):
""" # Convert frame and masks to CuPy arrays
Applies masks to the frame, replacing the background with green. frame_gpu = cp.asarray(frame)
combined_mask = cp.zeros(frame_gpu.shape[:2], dtype=cp.bool_)
Parameters:
- frame: numpy array representing the image frame.
- masks: list of numpy arrays representing the masks.
Returns:
- result_frame: numpy array with the green background applied.
"""
# Initialize combined mask as a boolean array
combined_mask = np.zeros(frame.shape[:2], dtype=bool)
for mask in masks: for mask in masks:
mask = mask.squeeze() mask_gpu = cp.asarray(mask.squeeze())
if mask_gpu.shape != frame_gpu.shape[:2]:
# Resize the mask if necessary resized_mask = cv2.resize(cp.asnumpy(mask_gpu).astype(cp.float32),
if mask.shape != frame.shape[:2]: (frame_gpu.shape[1], frame_gpu.shape[0]))
# Resize the mask using bilinear interpolation mask_gpu = cp.asarray(resized_mask > 0.5) # Convert back to CuPy boolean array
# and convert it to float32 for accurate interpolation
resized_mask = cv2.resize(
mask.astype(np.float32),
(frame.shape[1], frame.shape[0]),
interpolation=cv2.INTER_CUBIC
)
# Threshold the resized mask to obtain a boolean mask
# add a small gausian blur to the mask to smooth out the edges
blurred_mask = cv2.GaussianBlur(resized_mask, (5, 5), 0)
combined_mask = np.maximum(combined_mask, blurred_mask)
mask = resized_mask > 0.5
else: else:
# Ensure mask is boolean mask_gpu = mask_gpu.astype(cp.bool_) # Ensure boolean type
mask = mask.astype(bool) combined_mask |= mask_gpu # Perform the bitwise OR operation
# Combine masks using logical OR green_background = cp.full(frame_gpu.shape, cp.array([0, 255, 0], dtype=cp.uint8), dtype=cp.uint8)
combined_mask |= mask # Now both arrays are bool result_frame = cp.where(combined_mask[..., None], frame_gpu, green_background)
return cp.asnumpy(result_frame) # Convert back to NumPy
# Create a green background image
green_background = np.full_like(frame, [0, 255, 0])
# Use combined mask to overlay the original frame onto the green background
result_frame = np.where(
combined_mask[..., None],
frame, # def apply_green_mask(frame, masks):
green_background # """
) # Applies masks to the frame, replacing the background with green.
#
return result_frame # Parameters:
#
# - frame: numpy array representing the image frame.
# - masks: list of numpy arrays representing the masks.
#
# Returns:
# - result_frame: numpy array with the green background applied.
# """
# # Initialize combined mask as a boolean array
# combined_mask = cp.zeros(frame.shape[:2], dtype=bool)
#
#
# for mask in masks:
# mask = mask.squeeze()
#
# # Resize the mask if necessary
# if mask.shape != frame.shape[:2]:
# # Resize the mask using bilinear interpolation
#
# # and convert it to float32 for accurate interpolation
# resized_mask = cv2.resize(
# mask.astype(cp.float32),
# (frame.shape[1], frame.shape[0]),
# interpolation=cv2.INTER_CUBIC
# )
# # Threshold the resized mask to obtain a boolean mask
# # add a small gausian blur to the mask to smooth out the edges
#
# mask = resized_mask > 0.5
# else:
# # Ensure mask is boolean
# mask = mask.astype(bool)
#
# # Combine masks using logical OR
# combined_mask |= mask # Now both arrays are bool
#
# # Create a green background image
# green_background = cp.full_like(frame, [0, 255, 0])
# # Use combined mask to overlay the original frame onto the green background
# result_frame = cp.where(
# combined_mask[..., None],
#
# frame,
# green_background
# )
# #result_frame = frame.copy()
# #result_frame[~combined_mask] = [0, 255, 0]
#
# return result_frame
def initialize_predictor(): def initialize_predictor():
if torch.cuda.is_available(): if torch.cuda.is_available():
@@ -194,7 +215,7 @@ def load_first_frame(video_path, scale=1.0):
return frame return frame
def select_points(first_frame): def select_points_old(first_frame):
points_a = [] points_a = []
points_b = [] points_b = []
current_object = 'A' current_object = 'A'
@@ -231,6 +252,43 @@ def select_points(first_frame):
cv2.destroyAllWindows() cv2.destroyAllWindows()
return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32) return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32)
def select_points(first_frame):
points_a = []
point_count = 0
selection_complete = False
frame_width = first_frame.shape[1]
half_frame_width = frame_width // 2 # Integer division for pixel coordinates
def mouse_callback(event, x, y, flags, param):
nonlocal points_a, point_count, selection_complete
if event == cv2.EVENT_LBUTTONDOWN:
points_a.append((x, y))
point_count += 1
print(f"Selected point {point_count} for Object A: ({x}, {y})")
if len(points_a) == 5: # Collect 5 points for Object A
selection_complete = True
print("Select 5 points for Object A (left side)")
cv2.namedWindow('Select Points', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Select Points', int(first_frame.shape[1] * (500 / first_frame.shape[0])), 500)
cv2.imshow('Select Points', first_frame)
cv2.setMouseCallback('Select Points', mouse_callback)
while not selection_complete:
cv2.waitKey(1)
cv2.destroyAllWindows()
# Automatically generate points for Object B by shifting x-coordinates
points_a = np.array(points_a, dtype=np.float32)
points_b = points_a.copy()
points_b[:, 0] += half_frame_width # Shift x-coordinate by half the frame width
# Ensure that the shifted points are within the frame boundaries
points_b[:, 0] = np.clip(points_b[:, 0], 0, frame_width - 1)
return points_a, points_b
def add_points_to_predictor(predictor, inference_state, points, obj_id): def add_points_to_predictor(predictor, inference_state, points, obj_id):
labels = np.array([1, 1, 1, 1, 1], np.int32) # Update labels to match 4 points labels = np.array([1, 1, 1, 1, 1], np.int32) # Update labels to match 4 points
points = np.array(points, dtype=np.float32) # Ensure points have shape (4, 2) points = np.array(points, dtype=np.float32) # Ensure points have shape (4, 2)
@@ -309,7 +367,7 @@ def process_and_save_output_video(video_path, output_video_path, video_segments,
'-i', '-', # Input from stdin '-i', '-', # Input from stdin
'-an', # No audio '-an', # No audio
'-vcodec', encoder, '-vcodec', encoder,
'-pix_fmt', 'yuv420p', '-pix_fmt', 'nv12',
'-preset', 'slow', '-preset', 'slow',
'-b:v', '50M', '-b:v', '50M',
output_video_path output_video_path

View File

@@ -18,7 +18,7 @@ output_folder="${input_file%.*}_segments"
mkdir -p "$output_folder" mkdir -p "$output_folder"
# Split the video into segments using ffmpeg # Split the video into segments using ffmpeg
ffmpeg -i "$input_file" -c copy -f segment -segment_time "$time" -reset_timestamps 1 "$output_folder/segment_%03d.mp4" ffmpeg -i "$input_file" -force_key_frames "expr:gte(t,n_forced*5)" -c copy -f segment -segment_time "$time" -reset_timestamps 1 -copyts "$output_folder/segment_%03d.mp4"
# Change to the output folder # Change to the output folder
cd "$output_folder" cd "$output_folder"