diff --git a/notebooks/concat_output_videos.sh b/notebooks/concat_output_videos.sh index cba577f..41fbcb1 100755 --- a/notebooks/concat_output_videos.sh +++ b/notebooks/concat_output_videos.sh @@ -23,18 +23,23 @@ for dir in $dirs; do segment_name=$(basename "$dir") segment_num=$(echo "$segment_name" | sed 's/segment_//') + #formatted_segment_number=$(printf "%03d" "$segment_num") + + output_file="$dir/output_$segment_num.mp4" + #output_file="$dir/output_${formatted_segment_num}.mp4" + if [ -f "$output_file" ]; then echo "file '$output_file'" >> "$FILE_LIST" else - echo "No output_$segment_num.mp4 found in $dir" + echo "No $output_file found in $dir" fi done # Run ffmpeg to concatenate the videos -ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined.mp4 +ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined_test.mp4 # Remove the temporary file rm "$FILE_LIST" diff --git a/notebooks/foo_points_prev.py b/notebooks/foo_points_prev.py index 11ddc99..2a6e808 100644 --- a/notebooks/foo_points_prev.py +++ b/notebooks/foo_points_prev.py @@ -26,6 +26,7 @@ import os import cv2 import numpy as np +import cupy as cp from concurrent.futures import ThreadPoolExecutor import torch import logging @@ -91,60 +92,80 @@ def load_previous_segment_mask(prev_segment_dir): return per_obj_input_mask, input_palette + def apply_green_mask(frame, masks): - """ - Applies masks to the frame, replacing the background with green. - - Parameters: - - - frame: numpy array representing the image frame. - - masks: list of numpy arrays representing the masks. - - Returns: - - result_frame: numpy array with the green background applied. - """ - # Initialize combined mask as a boolean array - combined_mask = np.zeros(frame.shape[:2], dtype=bool) - + # Convert frame and masks to CuPy arrays + frame_gpu = cp.asarray(frame) + combined_mask = cp.zeros(frame_gpu.shape[:2], dtype=cp.bool_) for mask in masks: - mask = mask.squeeze() - - # Resize the mask if necessary - if mask.shape != frame.shape[:2]: - # Resize the mask using bilinear interpolation - - # and convert it to float32 for accurate interpolation - resized_mask = cv2.resize( - mask.astype(np.float32), - (frame.shape[1], frame.shape[0]), - interpolation=cv2.INTER_CUBIC - ) - # Threshold the resized mask to obtain a boolean mask - # add a small gausian blur to the mask to smooth out the edges - blurred_mask = cv2.GaussianBlur(resized_mask, (5, 5), 0) - - combined_mask = np.maximum(combined_mask, blurred_mask) - - mask = resized_mask > 0.5 + mask_gpu = cp.asarray(mask.squeeze()) + if mask_gpu.shape != frame_gpu.shape[:2]: + resized_mask = cv2.resize(cp.asnumpy(mask_gpu).astype(cp.float32), + (frame_gpu.shape[1], frame_gpu.shape[0])) + mask_gpu = cp.asarray(resized_mask > 0.5) # Convert back to CuPy boolean array else: - # Ensure mask is boolean - mask = mask.astype(bool) + mask_gpu = mask_gpu.astype(cp.bool_) # Ensure boolean type + combined_mask |= mask_gpu # Perform the bitwise OR operation - # Combine masks using logical OR - combined_mask |= mask # Now both arrays are bool + green_background = cp.full(frame_gpu.shape, cp.array([0, 255, 0], dtype=cp.uint8), dtype=cp.uint8) + result_frame = cp.where(combined_mask[..., None], frame_gpu, green_background) + return cp.asnumpy(result_frame) # Convert back to NumPy - # Create a green background image - green_background = np.full_like(frame, [0, 255, 0]) - # Use combined mask to overlay the original frame onto the green background - result_frame = np.where( - combined_mask[..., None], - frame, - green_background - ) - - return result_frame +# def apply_green_mask(frame, masks): +# """ +# Applies masks to the frame, replacing the background with green. +# +# Parameters: +# +# - frame: numpy array representing the image frame. +# - masks: list of numpy arrays representing the masks. +# +# Returns: +# - result_frame: numpy array with the green background applied. +# """ +# # Initialize combined mask as a boolean array +# combined_mask = cp.zeros(frame.shape[:2], dtype=bool) +# +# +# for mask in masks: +# mask = mask.squeeze() +# +# # Resize the mask if necessary +# if mask.shape != frame.shape[:2]: +# # Resize the mask using bilinear interpolation +# +# # and convert it to float32 for accurate interpolation +# resized_mask = cv2.resize( +# mask.astype(cp.float32), +# (frame.shape[1], frame.shape[0]), +# interpolation=cv2.INTER_CUBIC +# ) +# # Threshold the resized mask to obtain a boolean mask +# # add a small gausian blur to the mask to smooth out the edges +# +# mask = resized_mask > 0.5 +# else: +# # Ensure mask is boolean +# mask = mask.astype(bool) +# +# # Combine masks using logical OR +# combined_mask |= mask # Now both arrays are bool +# +# # Create a green background image +# green_background = cp.full_like(frame, [0, 255, 0]) +# # Use combined mask to overlay the original frame onto the green background +# result_frame = cp.where( +# combined_mask[..., None], +# +# frame, +# green_background +# ) +# #result_frame = frame.copy() +# #result_frame[~combined_mask] = [0, 255, 0] +# +# return result_frame def initialize_predictor(): if torch.cuda.is_available(): @@ -194,7 +215,7 @@ def load_first_frame(video_path, scale=1.0): return frame -def select_points(first_frame): +def select_points_old(first_frame): points_a = [] points_b = [] current_object = 'A' @@ -231,6 +252,43 @@ def select_points(first_frame): cv2.destroyAllWindows() return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32) +def select_points(first_frame): + points_a = [] + point_count = 0 + selection_complete = False + frame_width = first_frame.shape[1] + half_frame_width = frame_width // 2 # Integer division for pixel coordinates + + def mouse_callback(event, x, y, flags, param): + nonlocal points_a, point_count, selection_complete + if event == cv2.EVENT_LBUTTONDOWN: + points_a.append((x, y)) + point_count += 1 + print(f"Selected point {point_count} for Object A: ({x}, {y})") + if len(points_a) == 5: # Collect 5 points for Object A + selection_complete = True + + print("Select 5 points for Object A (left side)") + cv2.namedWindow('Select Points', cv2.WINDOW_NORMAL) + cv2.resizeWindow('Select Points', int(first_frame.shape[1] * (500 / first_frame.shape[0])), 500) + cv2.imshow('Select Points', first_frame) + cv2.setMouseCallback('Select Points', mouse_callback) + + while not selection_complete: + cv2.waitKey(1) + + cv2.destroyAllWindows() + + # Automatically generate points for Object B by shifting x-coordinates + points_a = np.array(points_a, dtype=np.float32) + points_b = points_a.copy() + points_b[:, 0] += half_frame_width # Shift x-coordinate by half the frame width + + # Ensure that the shifted points are within the frame boundaries + points_b[:, 0] = np.clip(points_b[:, 0], 0, frame_width - 1) + + return points_a, points_b + def add_points_to_predictor(predictor, inference_state, points, obj_id): labels = np.array([1, 1, 1, 1, 1], np.int32) # Update labels to match 4 points points = np.array(points, dtype=np.float32) # Ensure points have shape (4, 2) @@ -309,7 +367,7 @@ def process_and_save_output_video(video_path, output_video_path, video_segments, '-i', '-', # Input from stdin '-an', # No audio '-vcodec', encoder, - '-pix_fmt', 'yuv420p', + '-pix_fmt', 'nv12', '-preset', 'slow', '-b:v', '50M', output_video_path diff --git a/notebooks/rvm_split_seconds.sh b/notebooks/rvm_split_seconds.sh index b1f8e94..c577f43 100755 --- a/notebooks/rvm_split_seconds.sh +++ b/notebooks/rvm_split_seconds.sh @@ -18,7 +18,7 @@ output_folder="${input_file%.*}_segments" mkdir -p "$output_folder" # Split the video into segments using ffmpeg -ffmpeg -i "$input_file" -c copy -f segment -segment_time "$time" -reset_timestamps 1 "$output_folder/segment_%03d.mp4" +ffmpeg -i "$input_file" -force_key_frames "expr:gte(t,n_forced*5)" -c copy -f segment -segment_time "$time" -reset_timestamps 1 -copyts "$output_folder/segment_%03d.mp4" # Change to the output folder cd "$output_folder"