import torch
import numpy as np
from ultralytics import YOLO
from typing import List, Tuple, Dict, Any
import cv2


class YOLODetector:
    """YOLOv8-based person detector for automatic SAM2 prompting"""
    
    def __init__(self, model_name: str = "yolov8n", confidence_threshold: float = 0.7, device: str = "cuda"):
        self.model_name = model_name
        self.confidence_threshold = confidence_threshold
        self.device = device
        self.model = None
        self._load_model()
    
    def _load_model(self):
        """Load YOLOv8 model"""
        try:
            self.model = YOLO(f"{self.model_name}.pt")
            if self.device == "cuda" and torch.cuda.is_available():
                self.model.to("cuda")
        except Exception as e:
            raise RuntimeError(f"Failed to load YOLO model {self.model_name}: {e}")
    
    def detect_persons(self, frame: np.ndarray) -> List[Dict[str, Any]]:
        """
        Detect persons in frame and return bounding boxes
        
        Args:
            frame: Input frame (H, W, 3)
            
        Returns:
            List of detection dictionaries with bbox, confidence, and class info
        """
        if self.model is None:
            raise RuntimeError("YOLO model not loaded")
        
        results = self.model(frame, verbose=False)
        detections = []
        
        for result in results:
            boxes = result.boxes
            if boxes is not None:
                for box in boxes:
                    # Only keep person detections (class 0 in COCO)
                    if int(box.cls) == 0 and float(box.conf) >= self.confidence_threshold:
                        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                        confidence = float(box.conf)
                        
                        detection = {
                            'bbox': [int(x1), int(y1), int(x2), int(y2)],
                            'confidence': confidence,
                            'class': 'person',
                            'area': (x2 - x1) * (y2 - y1)
                        }
                        detections.append(detection)
        
        # Sort by confidence (highest first)
        detections.sort(key=lambda x: x['confidence'], reverse=True)
        return detections
    
    def convert_to_sam_prompts(self, detections: List[Dict[str, Any]]) -> Tuple[np.ndarray, np.ndarray]:
        """
        Convert YOLO detections to SAM2 box prompts
        
        Args:
            detections: List of detection dictionaries
            
        Returns:
            Tuple of (box_prompts, labels) for SAM2
        """
        if not detections:
            return np.array([]), np.array([])
        
        box_prompts = []
        labels = []
        
        for detection in detections:
            bbox = detection['bbox']
            box_prompts.append(bbox)
            labels.append(1)  # Positive prompt
        
        return np.array(box_prompts), np.array(labels)
    
    def visualize_detections(self, frame: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarray:
        """
        Draw detection boxes on frame for debugging
        
        Args:
            frame: Input frame
            detections: List of detections
            
        Returns:
            Frame with drawn bounding boxes
        """
        vis_frame = frame.copy()
        
        for detection in detections:
            x1, y1, x2, y2 = detection['bbox']
            confidence = detection['confidence']
            
            # Draw bounding box
            cv2.rectangle(vis_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            # Draw confidence score
            label = f"Person: {confidence:.2f}"
            label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
            cv2.rectangle(vis_frame, (x1, y1 - label_size[1] - 10), 
                         (x1 + label_size[0], y1), (0, 255, 0), -1)
            cv2.putText(vis_frame, label, (x1, y1 - 5), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
        
        return vis_frame
    
    def get_largest_person(self, detections: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Get the largest detected person (by bounding box area)"""
        if not detections:
            return None
        
        return max(detections, key=lambda x: x['area'])
    
    def filter_by_size(self, detections: List[Dict[str, Any]], min_area: int = 1000) -> List[Dict[str, Any]]:
        """Filter detections by minimum bounding box area"""
        return [d for d in detections if d['area'] >= min_area]