use cuda for mask
This commit is contained in:
@@ -23,18 +23,23 @@ for dir in $dirs; do
|
|||||||
segment_name=$(basename "$dir")
|
segment_name=$(basename "$dir")
|
||||||
segment_num=$(echo "$segment_name" | sed 's/segment_//')
|
segment_num=$(echo "$segment_name" | sed 's/segment_//')
|
||||||
|
|
||||||
|
#formatted_segment_number=$(printf "%03d" "$segment_num")
|
||||||
|
|
||||||
|
|
||||||
output_file="$dir/output_$segment_num.mp4"
|
output_file="$dir/output_$segment_num.mp4"
|
||||||
|
#output_file="$dir/output_${formatted_segment_num}.mp4"
|
||||||
|
|
||||||
|
|
||||||
if [ -f "$output_file" ]; then
|
if [ -f "$output_file" ]; then
|
||||||
echo "file '$output_file'" >> "$FILE_LIST"
|
echo "file '$output_file'" >> "$FILE_LIST"
|
||||||
|
|
||||||
else
|
else
|
||||||
echo "No output_$segment_num.mp4 found in $dir"
|
echo "No $output_file found in $dir"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Run ffmpeg to concatenate the videos
|
# Run ffmpeg to concatenate the videos
|
||||||
ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined.mp4
|
ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined_test.mp4
|
||||||
|
|
||||||
# Remove the temporary file
|
# Remove the temporary file
|
||||||
rm "$FILE_LIST"
|
rm "$FILE_LIST"
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
import os
|
import os
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import cupy as cp
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import torch
|
import torch
|
||||||
import logging
|
import logging
|
||||||
@@ -91,60 +92,80 @@ def load_previous_segment_mask(prev_segment_dir):
|
|||||||
|
|
||||||
return per_obj_input_mask, input_palette
|
return per_obj_input_mask, input_palette
|
||||||
|
|
||||||
|
|
||||||
def apply_green_mask(frame, masks):
|
def apply_green_mask(frame, masks):
|
||||||
"""
|
# Convert frame and masks to CuPy arrays
|
||||||
Applies masks to the frame, replacing the background with green.
|
frame_gpu = cp.asarray(frame)
|
||||||
|
combined_mask = cp.zeros(frame_gpu.shape[:2], dtype=cp.bool_)
|
||||||
Parameters:
|
|
||||||
|
|
||||||
- frame: numpy array representing the image frame.
|
|
||||||
- masks: list of numpy arrays representing the masks.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- result_frame: numpy array with the green background applied.
|
|
||||||
"""
|
|
||||||
# Initialize combined mask as a boolean array
|
|
||||||
combined_mask = np.zeros(frame.shape[:2], dtype=bool)
|
|
||||||
|
|
||||||
|
|
||||||
for mask in masks:
|
for mask in masks:
|
||||||
mask = mask.squeeze()
|
mask_gpu = cp.asarray(mask.squeeze())
|
||||||
|
if mask_gpu.shape != frame_gpu.shape[:2]:
|
||||||
# Resize the mask if necessary
|
resized_mask = cv2.resize(cp.asnumpy(mask_gpu).astype(cp.float32),
|
||||||
if mask.shape != frame.shape[:2]:
|
(frame_gpu.shape[1], frame_gpu.shape[0]))
|
||||||
# Resize the mask using bilinear interpolation
|
mask_gpu = cp.asarray(resized_mask > 0.5) # Convert back to CuPy boolean array
|
||||||
|
|
||||||
# and convert it to float32 for accurate interpolation
|
|
||||||
resized_mask = cv2.resize(
|
|
||||||
mask.astype(np.float32),
|
|
||||||
(frame.shape[1], frame.shape[0]),
|
|
||||||
interpolation=cv2.INTER_CUBIC
|
|
||||||
)
|
|
||||||
# Threshold the resized mask to obtain a boolean mask
|
|
||||||
# add a small gausian blur to the mask to smooth out the edges
|
|
||||||
blurred_mask = cv2.GaussianBlur(resized_mask, (5, 5), 0)
|
|
||||||
|
|
||||||
combined_mask = np.maximum(combined_mask, blurred_mask)
|
|
||||||
|
|
||||||
mask = resized_mask > 0.5
|
|
||||||
else:
|
else:
|
||||||
# Ensure mask is boolean
|
mask_gpu = mask_gpu.astype(cp.bool_) # Ensure boolean type
|
||||||
mask = mask.astype(bool)
|
combined_mask |= mask_gpu # Perform the bitwise OR operation
|
||||||
|
|
||||||
# Combine masks using logical OR
|
green_background = cp.full(frame_gpu.shape, cp.array([0, 255, 0], dtype=cp.uint8), dtype=cp.uint8)
|
||||||
combined_mask |= mask # Now both arrays are bool
|
result_frame = cp.where(combined_mask[..., None], frame_gpu, green_background)
|
||||||
|
return cp.asnumpy(result_frame) # Convert back to NumPy
|
||||||
|
|
||||||
# Create a green background image
|
|
||||||
green_background = np.full_like(frame, [0, 255, 0])
|
|
||||||
# Use combined mask to overlay the original frame onto the green background
|
|
||||||
result_frame = np.where(
|
|
||||||
combined_mask[..., None],
|
|
||||||
|
|
||||||
frame,
|
# def apply_green_mask(frame, masks):
|
||||||
green_background
|
# """
|
||||||
)
|
# Applies masks to the frame, replacing the background with green.
|
||||||
|
#
|
||||||
return result_frame
|
# Parameters:
|
||||||
|
#
|
||||||
|
# - frame: numpy array representing the image frame.
|
||||||
|
# - masks: list of numpy arrays representing the masks.
|
||||||
|
#
|
||||||
|
# Returns:
|
||||||
|
# - result_frame: numpy array with the green background applied.
|
||||||
|
# """
|
||||||
|
# # Initialize combined mask as a boolean array
|
||||||
|
# combined_mask = cp.zeros(frame.shape[:2], dtype=bool)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# for mask in masks:
|
||||||
|
# mask = mask.squeeze()
|
||||||
|
#
|
||||||
|
# # Resize the mask if necessary
|
||||||
|
# if mask.shape != frame.shape[:2]:
|
||||||
|
# # Resize the mask using bilinear interpolation
|
||||||
|
#
|
||||||
|
# # and convert it to float32 for accurate interpolation
|
||||||
|
# resized_mask = cv2.resize(
|
||||||
|
# mask.astype(cp.float32),
|
||||||
|
# (frame.shape[1], frame.shape[0]),
|
||||||
|
# interpolation=cv2.INTER_CUBIC
|
||||||
|
# )
|
||||||
|
# # Threshold the resized mask to obtain a boolean mask
|
||||||
|
# # add a small gausian blur to the mask to smooth out the edges
|
||||||
|
#
|
||||||
|
# mask = resized_mask > 0.5
|
||||||
|
# else:
|
||||||
|
# # Ensure mask is boolean
|
||||||
|
# mask = mask.astype(bool)
|
||||||
|
#
|
||||||
|
# # Combine masks using logical OR
|
||||||
|
# combined_mask |= mask # Now both arrays are bool
|
||||||
|
#
|
||||||
|
# # Create a green background image
|
||||||
|
# green_background = cp.full_like(frame, [0, 255, 0])
|
||||||
|
# # Use combined mask to overlay the original frame onto the green background
|
||||||
|
# result_frame = cp.where(
|
||||||
|
# combined_mask[..., None],
|
||||||
|
#
|
||||||
|
# frame,
|
||||||
|
# green_background
|
||||||
|
# )
|
||||||
|
# #result_frame = frame.copy()
|
||||||
|
# #result_frame[~combined_mask] = [0, 255, 0]
|
||||||
|
#
|
||||||
|
# return result_frame
|
||||||
|
|
||||||
def initialize_predictor():
|
def initialize_predictor():
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
@@ -194,7 +215,7 @@ def load_first_frame(video_path, scale=1.0):
|
|||||||
|
|
||||||
return frame
|
return frame
|
||||||
|
|
||||||
def select_points(first_frame):
|
def select_points_old(first_frame):
|
||||||
points_a = []
|
points_a = []
|
||||||
points_b = []
|
points_b = []
|
||||||
current_object = 'A'
|
current_object = 'A'
|
||||||
@@ -231,6 +252,43 @@ def select_points(first_frame):
|
|||||||
cv2.destroyAllWindows()
|
cv2.destroyAllWindows()
|
||||||
return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32)
|
return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32)
|
||||||
|
|
||||||
|
def select_points(first_frame):
|
||||||
|
points_a = []
|
||||||
|
point_count = 0
|
||||||
|
selection_complete = False
|
||||||
|
frame_width = first_frame.shape[1]
|
||||||
|
half_frame_width = frame_width // 2 # Integer division for pixel coordinates
|
||||||
|
|
||||||
|
def mouse_callback(event, x, y, flags, param):
|
||||||
|
nonlocal points_a, point_count, selection_complete
|
||||||
|
if event == cv2.EVENT_LBUTTONDOWN:
|
||||||
|
points_a.append((x, y))
|
||||||
|
point_count += 1
|
||||||
|
print(f"Selected point {point_count} for Object A: ({x}, {y})")
|
||||||
|
if len(points_a) == 5: # Collect 5 points for Object A
|
||||||
|
selection_complete = True
|
||||||
|
|
||||||
|
print("Select 5 points for Object A (left side)")
|
||||||
|
cv2.namedWindow('Select Points', cv2.WINDOW_NORMAL)
|
||||||
|
cv2.resizeWindow('Select Points', int(first_frame.shape[1] * (500 / first_frame.shape[0])), 500)
|
||||||
|
cv2.imshow('Select Points', first_frame)
|
||||||
|
cv2.setMouseCallback('Select Points', mouse_callback)
|
||||||
|
|
||||||
|
while not selection_complete:
|
||||||
|
cv2.waitKey(1)
|
||||||
|
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
# Automatically generate points for Object B by shifting x-coordinates
|
||||||
|
points_a = np.array(points_a, dtype=np.float32)
|
||||||
|
points_b = points_a.copy()
|
||||||
|
points_b[:, 0] += half_frame_width # Shift x-coordinate by half the frame width
|
||||||
|
|
||||||
|
# Ensure that the shifted points are within the frame boundaries
|
||||||
|
points_b[:, 0] = np.clip(points_b[:, 0], 0, frame_width - 1)
|
||||||
|
|
||||||
|
return points_a, points_b
|
||||||
|
|
||||||
def add_points_to_predictor(predictor, inference_state, points, obj_id):
|
def add_points_to_predictor(predictor, inference_state, points, obj_id):
|
||||||
labels = np.array([1, 1, 1, 1, 1], np.int32) # Update labels to match 4 points
|
labels = np.array([1, 1, 1, 1, 1], np.int32) # Update labels to match 4 points
|
||||||
points = np.array(points, dtype=np.float32) # Ensure points have shape (4, 2)
|
points = np.array(points, dtype=np.float32) # Ensure points have shape (4, 2)
|
||||||
@@ -309,7 +367,7 @@ def process_and_save_output_video(video_path, output_video_path, video_segments,
|
|||||||
'-i', '-', # Input from stdin
|
'-i', '-', # Input from stdin
|
||||||
'-an', # No audio
|
'-an', # No audio
|
||||||
'-vcodec', encoder,
|
'-vcodec', encoder,
|
||||||
'-pix_fmt', 'yuv420p',
|
'-pix_fmt', 'nv12',
|
||||||
'-preset', 'slow',
|
'-preset', 'slow',
|
||||||
'-b:v', '50M',
|
'-b:v', '50M',
|
||||||
output_video_path
|
output_video_path
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ output_folder="${input_file%.*}_segments"
|
|||||||
mkdir -p "$output_folder"
|
mkdir -p "$output_folder"
|
||||||
|
|
||||||
# Split the video into segments using ffmpeg
|
# Split the video into segments using ffmpeg
|
||||||
ffmpeg -i "$input_file" -c copy -f segment -segment_time "$time" -reset_timestamps 1 "$output_folder/segment_%03d.mp4"
|
ffmpeg -i "$input_file" -force_key_frames "expr:gte(t,n_forced*5)" -c copy -f segment -segment_time "$time" -reset_timestamps 1 -copyts "$output_folder/segment_%03d.mp4"
|
||||||
|
|
||||||
# Change to the output folder
|
# Change to the output folder
|
||||||
cd "$output_folder"
|
cd "$output_folder"
|
||||||
|
|||||||
Reference in New Issue
Block a user