sbs working phase 1
This commit is contained in:
325
main.py
325
main.py
@@ -188,6 +188,293 @@ def resolve_detect_segments(detect_segments, total_segments: int) -> List[int]:
|
||||
logger.warning(f"Invalid detect_segments format: {detect_segments}. Using all segments.")
|
||||
return list(range(total_segments))
|
||||
|
||||
def process_segment_with_separate_eyes(segment_info, detector, sam2_processor, mask_processor, config,
|
||||
previous_left_masks=None, previous_right_masks=None):
|
||||
"""
|
||||
Process a single segment using separate eye processing mode.
|
||||
Split video first, then run YOLO independently on each eye.
|
||||
|
||||
Args:
|
||||
segment_info: Segment information dictionary
|
||||
detector: YOLO detector instance
|
||||
sam2_processor: SAM2 processor with eye processing enabled
|
||||
mask_processor: Mask processor instance
|
||||
config: Configuration loader instance
|
||||
previous_left_masks: Previous masks for left eye
|
||||
previous_right_masks: Previous masks for right eye
|
||||
|
||||
Returns:
|
||||
Tuple of (success, left_masks, right_masks)
|
||||
"""
|
||||
segment_idx = segment_info['index']
|
||||
logger.info(f"VR180 Separate Eyes: Processing segment {segment_idx} (video-split approach)")
|
||||
|
||||
# Get video properties
|
||||
cap = cv2.VideoCapture(segment_info['video_file'])
|
||||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
cap.release()
|
||||
|
||||
full_frame_shape = (frame_height, frame_width)
|
||||
|
||||
# Step 1: Split the segment video into left and right eye videos
|
||||
left_eye_video = os.path.join(segment_info['directory'], "left_eye.mp4")
|
||||
right_eye_video = os.path.join(segment_info['directory'], "right_eye.mp4")
|
||||
|
||||
logger.info(f"VR180 Separate Eyes: Splitting segment video into eye videos")
|
||||
success = sam2_processor.eye_processor.split_video_into_eyes(
|
||||
segment_info['video_file'],
|
||||
left_eye_video,
|
||||
right_eye_video,
|
||||
scale=config.get_inference_scale()
|
||||
)
|
||||
|
||||
if not success:
|
||||
logger.error(f"VR180 Separate Eyes: Failed to split video for segment {segment_idx}")
|
||||
return False, None, None
|
||||
|
||||
# Check if both eye videos were created
|
||||
if not os.path.exists(left_eye_video) or not os.path.exists(right_eye_video):
|
||||
logger.error(f"VR180 Separate Eyes: Eye video files not created for segment {segment_idx}")
|
||||
return False, None, None
|
||||
|
||||
logger.info(f"VR180 Separate Eyes: Created eye videos - left: {left_eye_video}, right: {right_eye_video}")
|
||||
|
||||
# Step 2: Run YOLO independently on each eye video
|
||||
left_detections = detector.detect_humans_in_video_first_frame(
|
||||
left_eye_video, scale=1.0 # Already scaled during video splitting
|
||||
)
|
||||
|
||||
right_detections = detector.detect_humans_in_video_first_frame(
|
||||
right_eye_video, scale=1.0 # Already scaled during video splitting
|
||||
)
|
||||
|
||||
logger.info(f"VR180 Separate Eyes: YOLO detections - left: {len(left_detections)}, right: {len(right_detections)}")
|
||||
|
||||
# Check if we have YOLO segmentation masks
|
||||
has_yolo_masks = False
|
||||
if detector.supports_segmentation:
|
||||
has_yolo_masks = any(d.get('has_mask', False) for d in (left_detections + right_detections))
|
||||
|
||||
if has_yolo_masks:
|
||||
logger.info(f"VR180 Separate Eyes: YOLO segmentation mode - using direct masks instead of bounding boxes")
|
||||
|
||||
# Save eye-specific debug frames if enabled
|
||||
if config.get('advanced.save_yolo_debug_frames', False) and (left_detections or right_detections):
|
||||
try:
|
||||
# Load first frames from each eye video
|
||||
left_cap = cv2.VideoCapture(left_eye_video)
|
||||
ret_left, left_frame = left_cap.read()
|
||||
left_cap.release()
|
||||
|
||||
right_cap = cv2.VideoCapture(right_eye_video)
|
||||
ret_right, right_frame = right_cap.read()
|
||||
right_cap.release()
|
||||
|
||||
if ret_left and ret_right:
|
||||
# Save eye-specific debug frames
|
||||
left_debug_path = os.path.join(segment_info['directory'], "left_eye_debug.jpg")
|
||||
right_debug_path = os.path.join(segment_info['directory'], "right_eye_debug.jpg")
|
||||
|
||||
detector.save_eye_debug_frames(
|
||||
left_frame, right_frame,
|
||||
left_detections, right_detections,
|
||||
left_debug_path, right_debug_path
|
||||
)
|
||||
|
||||
logger.info(f"VR180 Separate Eyes: Saved eye-specific debug frames for segment {segment_idx}")
|
||||
else:
|
||||
logger.warning(f"VR180 Separate Eyes: Could not load eye frames for debug visualization")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"VR180 Separate Eyes: Failed to create eye debug frames: {e}")
|
||||
|
||||
# Step 3: Process left eye if detections exist or we have previous masks
|
||||
left_masks = None
|
||||
if left_detections or previous_left_masks:
|
||||
try:
|
||||
left_prompts = None
|
||||
left_initial_masks = None
|
||||
|
||||
if left_detections:
|
||||
if has_yolo_masks:
|
||||
# YOLO segmentation mode: convert masks to initial masks for SAM2
|
||||
left_initial_masks = {}
|
||||
for i, detection in enumerate(left_detections):
|
||||
if detection.get('has_mask', False):
|
||||
mask = detection['mask']
|
||||
left_initial_masks[1] = mask.astype(bool) # Always use obj_id=1 for single eye
|
||||
logger.info(f"VR180 Separate Eyes: Left eye YOLO mask - shape: {mask.shape}, pixels: {np.sum(mask)}")
|
||||
break # Only take the first/best mask for single eye processing
|
||||
|
||||
if left_initial_masks:
|
||||
logger.info(f"VR180 Separate Eyes: Left eye - using YOLO segmentation masks as initial masks")
|
||||
else:
|
||||
# YOLO detection mode: convert bounding boxes to prompts
|
||||
left_prompts = detector.convert_detections_to_sam2_prompts(left_detections, frame_width // 2)
|
||||
logger.info(f"VR180 Separate Eyes: Left eye - {len(left_prompts)} SAM2 prompts")
|
||||
|
||||
# Create temporary segment info for left eye processing
|
||||
left_segment_info = segment_info.copy()
|
||||
left_segment_info['video_file'] = left_eye_video
|
||||
|
||||
left_masks = sam2_processor.process_single_eye_segment(
|
||||
left_segment_info, 'left', left_prompts,
|
||||
left_initial_masks or previous_left_masks,
|
||||
1.0 # Scale already applied during video splitting
|
||||
)
|
||||
|
||||
if left_masks:
|
||||
logger.info(f"VR180 Separate Eyes: Left eye processed - {len(left_masks)} frame masks")
|
||||
else:
|
||||
logger.warning(f"VR180 Separate Eyes: Left eye processing failed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"VR180 Separate Eyes: Error processing left eye for segment {segment_idx}: {e}")
|
||||
left_masks = None
|
||||
|
||||
# Step 4: Process right eye if detections exist or we have previous masks
|
||||
right_masks = None
|
||||
if right_detections or previous_right_masks:
|
||||
try:
|
||||
right_prompts = None
|
||||
right_initial_masks = None
|
||||
|
||||
if right_detections:
|
||||
if has_yolo_masks:
|
||||
# YOLO segmentation mode: convert masks to initial masks for SAM2
|
||||
right_initial_masks = {}
|
||||
for i, detection in enumerate(right_detections):
|
||||
if detection.get('has_mask', False):
|
||||
mask = detection['mask']
|
||||
right_initial_masks[1] = mask.astype(bool) # Always use obj_id=1 for single eye
|
||||
logger.info(f"VR180 Separate Eyes: Right eye YOLO mask - shape: {mask.shape}, pixels: {np.sum(mask)}")
|
||||
break # Only take the first/best mask for single eye processing
|
||||
|
||||
if right_initial_masks:
|
||||
logger.info(f"VR180 Separate Eyes: Right eye - using YOLO segmentation masks as initial masks")
|
||||
else:
|
||||
# YOLO detection mode: convert bounding boxes to prompts
|
||||
right_prompts = detector.convert_detections_to_sam2_prompts(right_detections, frame_width // 2)
|
||||
logger.info(f"VR180 Separate Eyes: Right eye - {len(right_prompts)} SAM2 prompts")
|
||||
|
||||
# Create temporary segment info for right eye processing
|
||||
right_segment_info = segment_info.copy()
|
||||
right_segment_info['video_file'] = right_eye_video
|
||||
|
||||
right_masks = sam2_processor.process_single_eye_segment(
|
||||
right_segment_info, 'right', right_prompts,
|
||||
right_initial_masks or previous_right_masks,
|
||||
1.0 # Scale already applied during video splitting
|
||||
)
|
||||
|
||||
if right_masks:
|
||||
logger.info(f"VR180 Separate Eyes: Right eye processed - {len(right_masks)} frame masks")
|
||||
else:
|
||||
logger.warning(f"VR180 Separate Eyes: Right eye processing failed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"VR180 Separate Eyes: Error processing right eye for segment {segment_idx}: {e}")
|
||||
right_masks = None
|
||||
|
||||
# Step 5: Check if we got any valid masks
|
||||
if not left_masks and not right_masks:
|
||||
logger.warning(f"VR180 Separate Eyes: Neither eye produced valid masks for segment {segment_idx}")
|
||||
|
||||
if config.get('processing.enable_greenscreen_fallback', True):
|
||||
logger.info(f"VR180 Separate Eyes: Using greenscreen fallback for segment {segment_idx}")
|
||||
success = mask_processor.process_greenscreen_only_segment(
|
||||
segment_info,
|
||||
green_color=config.get_green_color(),
|
||||
use_nvenc=config.get_use_nvenc(),
|
||||
bitrate=config.get_output_bitrate()
|
||||
)
|
||||
return success, None, None
|
||||
else:
|
||||
logger.error(f"VR180 Separate Eyes: No masks generated and greenscreen fallback disabled")
|
||||
return False, None, None
|
||||
|
||||
# Step 6: Combine masks back to full frame format
|
||||
try:
|
||||
logger.info(f"VR180 Separate Eyes: Combining eye masks for segment {segment_idx}")
|
||||
combined_masks = sam2_processor.eye_processor.combine_eye_masks(
|
||||
left_masks, right_masks, full_frame_shape
|
||||
)
|
||||
|
||||
if not combined_masks:
|
||||
logger.error(f"VR180 Separate Eyes: Failed to combine eye masks for segment {segment_idx}")
|
||||
return False, left_masks, right_masks
|
||||
|
||||
# Validate combined masks have reasonable content
|
||||
total_mask_pixels = 0
|
||||
for frame_idx, frame_masks in combined_masks.items():
|
||||
for obj_id, mask in frame_masks.items():
|
||||
if mask is not None:
|
||||
total_mask_pixels += np.sum(mask)
|
||||
|
||||
if total_mask_pixels == 0:
|
||||
logger.warning(f"VR180 Separate Eyes: Combined masks are empty for segment {segment_idx}")
|
||||
if config.get('processing.enable_greenscreen_fallback', True):
|
||||
logger.info(f"VR180 Separate Eyes: Using greenscreen fallback due to empty masks")
|
||||
success = mask_processor.process_greenscreen_only_segment(
|
||||
segment_info,
|
||||
green_color=config.get_green_color(),
|
||||
use_nvenc=config.get_use_nvenc(),
|
||||
bitrate=config.get_output_bitrate()
|
||||
)
|
||||
return success, left_masks, right_masks
|
||||
|
||||
logger.info(f"VR180 Separate Eyes: Combined masks contain {total_mask_pixels} total pixels")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"VR180 Separate Eyes: Error combining eye masks for segment {segment_idx}: {e}")
|
||||
# Try greenscreen fallback if mask combination fails
|
||||
if config.get('processing.enable_greenscreen_fallback', True):
|
||||
logger.info(f"VR180 Separate Eyes: Using greenscreen fallback due to mask combination error")
|
||||
success = mask_processor.process_greenscreen_only_segment(
|
||||
segment_info,
|
||||
green_color=config.get_green_color(),
|
||||
use_nvenc=config.get_use_nvenc(),
|
||||
bitrate=config.get_output_bitrate()
|
||||
)
|
||||
return success, left_masks, right_masks
|
||||
else:
|
||||
return False, left_masks, right_masks
|
||||
|
||||
# Step 7: Save combined masks
|
||||
mask_path = os.path.join(segment_info['directory'], "mask.png")
|
||||
sam2_processor.save_final_masks(
|
||||
combined_masks,
|
||||
mask_path,
|
||||
green_color=config.get_green_color(),
|
||||
blue_color=config.get_blue_color()
|
||||
)
|
||||
|
||||
# Step 8: Apply green screen and save output video
|
||||
success = mask_processor.process_segment(
|
||||
segment_info,
|
||||
combined_masks,
|
||||
use_nvenc=config.get_use_nvenc(),
|
||||
bitrate=config.get_output_bitrate()
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info(f"VR180 Separate Eyes: Successfully processed segment {segment_idx}")
|
||||
else:
|
||||
logger.error(f"VR180 Separate Eyes: Failed to create output video for segment {segment_idx}")
|
||||
|
||||
# Clean up temporary eye video files
|
||||
try:
|
||||
if os.path.exists(left_eye_video):
|
||||
os.remove(left_eye_video)
|
||||
if os.path.exists(right_eye_video):
|
||||
os.remove(right_eye_video)
|
||||
logger.debug(f"VR180 Separate Eyes: Cleaned up temporary eye videos for segment {segment_idx}")
|
||||
except Exception as e:
|
||||
logger.warning(f"VR180 Separate Eyes: Failed to clean up temporary eye videos: {e}")
|
||||
|
||||
return success, left_masks, right_masks
|
||||
|
||||
def main():
|
||||
"""Main processing pipeline."""
|
||||
args = parse_arguments()
|
||||
@@ -275,10 +562,23 @@ def main():
|
||||
)
|
||||
|
||||
logger.info("Step 3: Initializing SAM2 processor")
|
||||
|
||||
# Check if separate eye processing is enabled
|
||||
separate_eye_processing = config.get('processing.separate_eye_processing', False)
|
||||
eye_overlap_pixels = config.get('processing.eye_overlap_pixels', 0)
|
||||
enable_greenscreen_fallback = config.get('processing.enable_greenscreen_fallback', True)
|
||||
|
||||
if separate_eye_processing:
|
||||
logger.info("VR180 Separate Eye Processing: ENABLED")
|
||||
logger.info(f"Eye overlap pixels: {eye_overlap_pixels}")
|
||||
logger.info(f"Greenscreen fallback: {enable_greenscreen_fallback}")
|
||||
|
||||
sam2_processor = SAM2Processor(
|
||||
checkpoint_path=config.get_sam2_checkpoint(),
|
||||
config_path=config.get_sam2_config(),
|
||||
vos_optimized=config.get('models.sam2_vos_optimized', False)
|
||||
vos_optimized=config.get('models.sam2_vos_optimized', False),
|
||||
separate_eye_processing=separate_eye_processing,
|
||||
eye_overlap_pixels=eye_overlap_pixels
|
||||
)
|
||||
|
||||
# Initialize mask processor with quality enhancements
|
||||
@@ -293,6 +593,10 @@ def main():
|
||||
logger.info("Step 4: Processing segments sequentially")
|
||||
total_humans_detected = 0
|
||||
|
||||
# Initialize previous masks for separate eye processing
|
||||
previous_left_masks = None
|
||||
previous_right_masks = None
|
||||
|
||||
for i, segment_info in enumerate(segments_info):
|
||||
segment_idx = segment_info['index']
|
||||
|
||||
@@ -307,6 +611,25 @@ def main():
|
||||
logger.info(f"Segment {segment_idx} already processed, skipping")
|
||||
continue
|
||||
|
||||
# Branch based on processing mode
|
||||
if separate_eye_processing:
|
||||
# Use separate eye processing mode
|
||||
success, left_masks, right_masks = process_segment_with_separate_eyes(
|
||||
segment_info, detector, sam2_processor, mask_processor, config,
|
||||
previous_left_masks, previous_right_masks
|
||||
)
|
||||
|
||||
# Update previous masks for next segment
|
||||
previous_left_masks = left_masks
|
||||
previous_right_masks = right_masks
|
||||
|
||||
if success:
|
||||
logger.info(f"Successfully processed segment {segment_idx} with separate eye processing")
|
||||
else:
|
||||
logger.error(f"Failed to process segment {segment_idx} with separate eye processing")
|
||||
|
||||
continue # Skip the original processing logic
|
||||
|
||||
# Determine if we should use YOLO detections or previous masks
|
||||
use_detections = segment_idx in detect_segments
|
||||
|
||||
|
||||
Reference in New Issue
Block a user