use cuda for mask

2024-12-07 12:04:58 -08:00
parent ad5db644fa
commit 61d525dc83
3 changed files with 115 additions and 52 deletions
--- a/notebooks/concat_output_videos.sh
+++ b/notebooks/concat_output_videos.sh
@@ -23,18 +23,23 @@ for dir in $dirs; do
    segment_name=$(basename "$dir")
    segment_num=$(echo "$segment_name" | sed 's/segment_//')
    #formatted_segment_number=$(printf "%03d" "$segment_num")
    output_file="$dir/output_$segment_num.mp4"
    #output_file="$dir/output_${formatted_segment_num}.mp4"
    if [ -f "$output_file" ]; then
        echo "file '$output_file'" >> "$FILE_LIST"
    else
-        echo "No output_$segment_num.mp4 found in $dir"
+        echo "No $output_file found in $dir"
    fi
 done
 # Run ffmpeg to concatenate the videos
-ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined.mp4
+ffmpeg -f concat -safe 0 -i "$FILE_LIST" -c copy output_combined_test.mp4
 # Remove the temporary file
 rm "$FILE_LIST"
--- a/notebooks/foo_points_prev.py
+++ b/notebooks/foo_points_prev.py
@@ -26,6 +26,7 @@
 import os
 import cv2
 import numpy as np
 import cupy as cp
 from concurrent.futures import ThreadPoolExecutor
 import torch
 import logging
@@ -91,60 +92,80 @@ def load_previous_segment_mask(prev_segment_dir):
    return per_obj_input_mask, input_palette
 def apply_green_mask(frame, masks):
-    """
+    # Convert frame and masks to CuPy arrays
-    Applies masks to the frame, replacing the background with green.
+    frame_gpu = cp.asarray(frame)
-
+    combined_mask = cp.zeros(frame_gpu.shape[:2], dtype=cp.bool_)
    Parameters:
    - frame: numpy array representing the image frame.
    - masks: list of numpy arrays representing the masks.
    Returns:
    - result_frame: numpy array with the green background applied.
    """
    # Initialize combined mask as a boolean array
    combined_mask = np.zeros(frame.shape[:2], dtype=bool)
    for mask in masks:
-        mask = mask.squeeze()
+        mask_gpu = cp.asarray(mask.squeeze())
-
+        if mask_gpu.shape != frame_gpu.shape[:2]:
-        # Resize the mask if necessary
+            resized_mask = cv2.resize(cp.asnumpy(mask_gpu).astype(cp.float32), 
-        if mask.shape != frame.shape[:2]:
+                                      (frame_gpu.shape[1], frame_gpu.shape[0]))
-            # Resize the mask using bilinear interpolation
+            mask_gpu = cp.asarray(resized_mask > 0.5)  # Convert back to CuPy boolean array
            # and convert it to float32 for accurate interpolation
            resized_mask = cv2.resize(
                mask.astype(np.float32),
                (frame.shape[1], frame.shape[0]),
                interpolation=cv2.INTER_CUBIC
            )
            # Threshold the resized mask to obtain a boolean mask
            # add a small gausian blur to the mask to smooth out the edges
            blurred_mask = cv2.GaussianBlur(resized_mask, (5, 5), 0)
            combined_mask = np.maximum(combined_mask, blurred_mask)
            mask = resized_mask > 0.5
        else:
-            # Ensure mask is boolean
+            mask_gpu = mask_gpu.astype(cp.bool_)  # Ensure boolean type
-            mask = mask.astype(bool)
+        combined_mask |= mask_gpu  # Perform the bitwise OR operation
-        # Combine masks using logical OR
+    green_background = cp.full(frame_gpu.shape, cp.array([0, 255, 0], dtype=cp.uint8), dtype=cp.uint8)
-        combined_mask |= mask  # Now both arrays are bool
+    result_frame = cp.where(combined_mask[..., None], frame_gpu, green_background)
    return cp.asnumpy(result_frame)  # Convert back to NumPy
    # Create a green background image
    green_background = np.full_like(frame, [0, 255, 0])
    # Use combined mask to overlay the original frame onto the green background
    result_frame = np.where(
        combined_mask[..., None],
-        frame,
+# def apply_green_mask(frame, masks):
-        green_background
+#     """
-    )
+#     Applies masks to the frame, replacing the background with green.
-
+#
-    return result_frame
+#     Parameters:
 #
 #     - frame: numpy array representing the image frame.
 #     - masks: list of numpy arrays representing the masks.
 #
 #     Returns:
 #     - result_frame: numpy array with the green background applied.
 #     """
 #     # Initialize combined mask as a boolean array
 #     combined_mask = cp.zeros(frame.shape[:2], dtype=bool)
 #
 #
 #     for mask in masks:
 #         mask = mask.squeeze()
 #
 #         # Resize the mask if necessary
 #         if mask.shape != frame.shape[:2]:
 #             # Resize the mask using bilinear interpolation
 #
 #             # and convert it to float32 for accurate interpolation
 #             resized_mask = cv2.resize(
 #                 mask.astype(cp.float32),
 #                 (frame.shape[1], frame.shape[0]),
 #                 interpolation=cv2.INTER_CUBIC
 #             )
 #             # Threshold the resized mask to obtain a boolean mask
 #             # add a small gausian blur to the mask to smooth out the edges
 #
 #             mask = resized_mask > 0.5
 #         else:
 #             # Ensure mask is boolean
 #             mask = mask.astype(bool)
 #
 #         # Combine masks using logical OR
 #         combined_mask |= mask  # Now both arrays are bool
 #
 #     # Create a green background image
 #     green_background = cp.full_like(frame, [0, 255, 0])
 #     # Use combined mask to overlay the original frame onto the green background
 #     result_frame = cp.where(
 #         combined_mask[..., None],
 #
 #         frame,
 #         green_background
 #     )
 #     #result_frame = frame.copy()
 #     #result_frame[~combined_mask] = [0, 255, 0]
 #
 #     return result_frame
 def initialize_predictor():
    if torch.cuda.is_available():
@@ -194,7 +215,7 @@ def load_first_frame(video_path, scale=1.0):
    return frame
-def select_points(first_frame):
+def select_points_old(first_frame):
    points_a = []
    points_b = []
    current_object = 'A'
@@ -231,6 +252,43 @@ def select_points(first_frame):
    cv2.destroyAllWindows()
    return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32)
 def select_points(first_frame):
    points_a = []
    point_count = 0
    selection_complete = False
    frame_width = first_frame.shape[1]
    half_frame_width = frame_width // 2  # Integer division for pixel coordinates
    def mouse_callback(event, x, y, flags, param):
        nonlocal points_a, point_count, selection_complete
        if event == cv2.EVENT_LBUTTONDOWN:
            points_a.append((x, y))
            point_count += 1
            print(f"Selected point {point_count} for Object A: ({x}, {y})")
            if len(points_a) == 5:  # Collect 5 points for Object A
                selection_complete = True
    print("Select 5 points for Object A (left side)")
    cv2.namedWindow('Select Points', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Select Points', int(first_frame.shape[1] * (500 / first_frame.shape[0])), 500)
    cv2.imshow('Select Points', first_frame)
    cv2.setMouseCallback('Select Points', mouse_callback)
    while not selection_complete:
        cv2.waitKey(1)
    cv2.destroyAllWindows()
    # Automatically generate points for Object B by shifting x-coordinates
    points_a = np.array(points_a, dtype=np.float32)
    points_b = points_a.copy()
    points_b[:, 0] += half_frame_width  # Shift x-coordinate by half the frame width
    # Ensure that the shifted points are within the frame boundaries
    points_b[:, 0] = np.clip(points_b[:, 0], 0, frame_width - 1)
    return points_a, points_b
 def add_points_to_predictor(predictor, inference_state, points, obj_id):
    labels = np.array([1, 1, 1, 1, 1], np.int32)  # Update labels to match 4 points
    points = np.array(points, dtype=np.float32)  # Ensure points have shape (4, 2)
@@ -309,7 +367,7 @@ def process_and_save_output_video(video_path, output_video_path, video_segments,
            '-i', '-',  # Input from stdin
            '-an',  # No audio
            '-vcodec', encoder,
-            '-pix_fmt', 'yuv420p',
+            '-pix_fmt', 'nv12',
            '-preset', 'slow',
            '-b:v', '50M',
            output_video_path
--- a/notebooks/rvm_split_seconds.sh
+++ b/notebooks/rvm_split_seconds.sh
@@ -18,7 +18,7 @@ output_folder="${input_file%.*}_segments"
 mkdir -p "$output_folder"
 # Split the video into segments using ffmpeg
-ffmpeg -i "$input_file" -c copy -f segment -segment_time "$time" -reset_timestamps 1 "$output_folder/segment_%03d.mp4"
+ffmpeg -i "$input_file" -force_key_frames "expr:gte(t,n_forced*5)" -c copy -f segment -segment_time "$time"  -reset_timestamps 1 -copyts "$output_folder/segment_%03d.mp4"
 # Change to the output folder
 cd "$output_folder"