use cuda for mask

2024-12-07 12:04:58 -08:00
parent ad5db644fa
commit 61d525dc83
3 changed files with 115 additions and 52 deletions
--- a/notebooks/foo_points_prev.py
+++ b/notebooks/foo_points_prev.py
@@ -26,6 +26,7 @@
 import os
 import cv2
 import numpy as np
+import cupy as cp
 from concurrent.futures import ThreadPoolExecutor
 import torch
 import logging
@@ -91,60 +92,80 @@ def load_previous_segment_mask(prev_segment_dir):
    
    return per_obj_input_mask, input_palette

+
 def apply_green_mask(frame, masks):
-    """
-    Applies masks to the frame, replacing the background with green.
-
-    Parameters:
-
-    - frame: numpy array representing the image frame.
-    - masks: list of numpy arrays representing the masks.
-
-    Returns:
-    - result_frame: numpy array with the green background applied.
-    """
-    # Initialize combined mask as a boolean array
-    combined_mask = np.zeros(frame.shape[:2], dtype=bool)
-
+    # Convert frame and masks to CuPy arrays
+    frame_gpu = cp.asarray(frame)
+    combined_mask = cp.zeros(frame_gpu.shape[:2], dtype=cp.bool_)

    for mask in masks:
-        mask = mask.squeeze()
-
-        # Resize the mask if necessary
-        if mask.shape != frame.shape[:2]:
-            # Resize the mask using bilinear interpolation
-
-            # and convert it to float32 for accurate interpolation
-            resized_mask = cv2.resize(
-                mask.astype(np.float32),
-                (frame.shape[1], frame.shape[0]),
-                interpolation=cv2.INTER_CUBIC
-            )
-            # Threshold the resized mask to obtain a boolean mask
-            # add a small gausian blur to the mask to smooth out the edges
-            blurred_mask = cv2.GaussianBlur(resized_mask, (5, 5), 0)
-
-            combined_mask = np.maximum(combined_mask, blurred_mask)
-
-            mask = resized_mask > 0.5
+        mask_gpu = cp.asarray(mask.squeeze())
+        if mask_gpu.shape != frame_gpu.shape[:2]:
+            resized_mask = cv2.resize(cp.asnumpy(mask_gpu).astype(cp.float32), 
+                                      (frame_gpu.shape[1], frame_gpu.shape[0]))
+            mask_gpu = cp.asarray(resized_mask > 0.5)  # Convert back to CuPy boolean array
        else:
-            # Ensure mask is boolean
-            mask = mask.astype(bool)
+            mask_gpu = mask_gpu.astype(cp.bool_)  # Ensure boolean type
+        combined_mask |= mask_gpu  # Perform the bitwise OR operation

-        # Combine masks using logical OR
-        combined_mask |= mask  # Now both arrays are bool
+    green_background = cp.full(frame_gpu.shape, cp.array([0, 255, 0], dtype=cp.uint8), dtype=cp.uint8)
+    result_frame = cp.where(combined_mask[..., None], frame_gpu, green_background)
+    return cp.asnumpy(result_frame)  # Convert back to NumPy

-    # Create a green background image
-    green_background = np.full_like(frame, [0, 255, 0])
-    # Use combined mask to overlay the original frame onto the green background
-    result_frame = np.where(
-        combined_mask[..., None],

-        frame,
-        green_background
-    )
-
-    return result_frame
+# def apply_green_mask(frame, masks):
+#     """
+#     Applies masks to the frame, replacing the background with green.
+#
+#     Parameters:
+#
+#     - frame: numpy array representing the image frame.
+#     - masks: list of numpy arrays representing the masks.
+#
+#     Returns:
+#     - result_frame: numpy array with the green background applied.
+#     """
+#     # Initialize combined mask as a boolean array
+#     combined_mask = cp.zeros(frame.shape[:2], dtype=bool)
+#
+#
+#     for mask in masks:
+#         mask = mask.squeeze()
+#
+#         # Resize the mask if necessary
+#         if mask.shape != frame.shape[:2]:
+#             # Resize the mask using bilinear interpolation
+#
+#             # and convert it to float32 for accurate interpolation
+#             resized_mask = cv2.resize(
+#                 mask.astype(cp.float32),
+#                 (frame.shape[1], frame.shape[0]),
+#                 interpolation=cv2.INTER_CUBIC
+#             )
+#             # Threshold the resized mask to obtain a boolean mask
+#             # add a small gausian blur to the mask to smooth out the edges
+#
+#             mask = resized_mask > 0.5
+#         else:
+#             # Ensure mask is boolean
+#             mask = mask.astype(bool)
+#
+#         # Combine masks using logical OR
+#         combined_mask |= mask  # Now both arrays are bool
+#
+#     # Create a green background image
+#     green_background = cp.full_like(frame, [0, 255, 0])
+#     # Use combined mask to overlay the original frame onto the green background
+#     result_frame = cp.where(
+#         combined_mask[..., None],
+#
+#         frame,
+#         green_background
+#     )
+#     #result_frame = frame.copy()
+#     #result_frame[~combined_mask] = [0, 255, 0]
+#
+#     return result_frame

 def initialize_predictor():
    if torch.cuda.is_available():
@@ -194,7 +215,7 @@ def load_first_frame(video_path, scale=1.0):

    return frame

-def select_points(first_frame):
+def select_points_old(first_frame):
    points_a = []
    points_b = []
    current_object = 'A'
@@ -231,6 +252,43 @@ def select_points(first_frame):
    cv2.destroyAllWindows()
    return np.array(points_a, dtype=np.float32), np.array(points_b, dtype=np.float32)

+def select_points(first_frame):
+    points_a = []
+    point_count = 0
+    selection_complete = False
+    frame_width = first_frame.shape[1]
+    half_frame_width = frame_width // 2  # Integer division for pixel coordinates
+
+    def mouse_callback(event, x, y, flags, param):
+        nonlocal points_a, point_count, selection_complete
+        if event == cv2.EVENT_LBUTTONDOWN:
+            points_a.append((x, y))
+            point_count += 1
+            print(f"Selected point {point_count} for Object A: ({x}, {y})")
+            if len(points_a) == 5:  # Collect 5 points for Object A
+                selection_complete = True
+
+    print("Select 5 points for Object A (left side)")
+    cv2.namedWindow('Select Points', cv2.WINDOW_NORMAL)
+    cv2.resizeWindow('Select Points', int(first_frame.shape[1] * (500 / first_frame.shape[0])), 500)
+    cv2.imshow('Select Points', first_frame)
+    cv2.setMouseCallback('Select Points', mouse_callback)
+
+    while not selection_complete:
+        cv2.waitKey(1)
+
+    cv2.destroyAllWindows()
+
+    # Automatically generate points for Object B by shifting x-coordinates
+    points_a = np.array(points_a, dtype=np.float32)
+    points_b = points_a.copy()
+    points_b[:, 0] += half_frame_width  # Shift x-coordinate by half the frame width
+
+    # Ensure that the shifted points are within the frame boundaries
+    points_b[:, 0] = np.clip(points_b[:, 0], 0, frame_width - 1)
+
+    return points_a, points_b
+
 def add_points_to_predictor(predictor, inference_state, points, obj_id):
    labels = np.array([1, 1, 1, 1, 1], np.int32)  # Update labels to match 4 points
    points = np.array(points, dtype=np.float32)  # Ensure points have shape (4, 2)
@@ -309,7 +367,7 @@ def process_and_save_output_video(video_path, output_video_path, video_segments,
            '-i', '-',  # Input from stdin
            '-an',  # No audio
            '-vcodec', encoder,
-            '-pix_fmt', 'yuv420p',
+            '-pix_fmt', 'nv12',
            '-preset', 'slow',
            '-b:v', '50M',
            output_video_path