
    |h                     f    d dl Zd dlZd dlmZ d dlmZ d dlmZ  G d de      Z	 G d de	e      Z
y)	    N)LoadVisualPrompt)DetectionPredictor)SegmentationPredictorc                   V     e Zd ZdZd	def fdZd Z fdZd
 fd	Z fdZ	d Z
 xZS )YOLOEVPDetectPredictoraN  
    A mixin class for YOLO-EVP (Enhanced Visual Prompting) predictors.

    This mixin provides common functionality for YOLO models that use visual prompting, including
    model setup, prompt handling, and preprocessing transformations.

    Attributes:
        model (torch.nn.Module): The YOLO model for inference.
        device (torch.device): Device to run the model on (CPU or CUDA).
        prompts (dict | torch.Tensor): Visual prompts containing class indices and bounding boxes or masks.

    Methods:
        setup_model: Initialize the YOLO model and set it to evaluation mode.
        set_prompts: Set the visual prompts for the model.
        pre_transform: Preprocess images and prompts before inference.
        inference: Run inference with visual prompts.
        get_vpe: Process source to get visual prompt embeddings.
    verbosec                 6    t         |   ||       d| _        y)z
        Set up the model for prediction.

        Args:
            model (torch.nn.Module): Model to load or use.
            verbose (bool, optional): If True, provides detailed logging.
        )r   TN)supersetup_modeldone_warmup)selfmodelr   	__class__s      d/var/www/html/test/engine/venv/lib/python3.12/site-packages/ultralytics/models/yolo/yoloe/predict.pyr   z"YOLOEVPDetectPredictor.setup_model   s     	E73    c                     || _         y)z
        Set the visual prompts for the model.

        Args:
            prompts (dict): Dictionary containing class indices and bounding boxes or masks.
                Must include a 'cls' key with class indices.
        N)prompts)r   r   s     r   set_promptsz"YOLOEVPDetectPredictor.set_prompts*   s     r   c           
      t   t         |   |      }| j                  j                  dd      }| j                  j                  dd      }| j                  d   }t	        |      dk(  rf| j                  |d   j                  dd |d   j                  dd |||      }|j                  d      j                  | j                        | _        |S |J d| d	       t        |t              rt        d
 |D              sJ d| d	       t        |t              rt        d |D              sJ d| d	       t	        |      t	        |      cxk(  rt	        |      k(  s.n J dt	        |       dt	        |       dt	        |       d	       t        t	        |            D cg c]<  }| j                  ||   j                  dd ||   j                  dd ||   ||         > }}t        j                  j                   j"                  j%                  |d      j                  | j                        | _        |S c c}w )a  
        Preprocess images and prompts before inference.

        This method applies letterboxing to the input image and transforms the visual prompts
        (bounding boxes or masks) accordingly.

        Args:
            im (list): List containing a single input image.

        Returns:
            (list): Preprocessed image ready for model inference.

        Raises:
            ValueError: If neither valid bounding boxes nor masks are provided in the prompts.
        bboxesNmaskscls   r      zExpected bboxes, but got !c              3   P   K   | ]  }t        |t        j                           y wN
isinstancenpndarray.0bs     r   	<genexpr>z7YOLOEVPDetectPredictor.pre_transform.<locals>.<genexpr>O   s     3^RSJq"**4M3^   $&z#Expected List[np.ndarray], but got c              3   P   K   | ]  }t        |t        j                           y wr   r   r"   s     r   r%   z7YOLOEVPDetectPredictor.pre_transform.<locals>.<genexpr>R   s     5bTUjBJJ6O5br&   z-Expected same length for all inputs, but got vsT)batch_first)r
   pre_transformr   poplen_process_single_imageshape	unsqueezetodevicer   listallrangetorchnnutilsrnnpad_sequence)	r   imimgr   r   categoryvisualsir   s	           r   r*   z$YOLOEVPDetectPredictor.pre_transform4   s2     g#B'!!(D1  $/<<&s8q=00Qbq1A2a5;;rPQ?T\^dfklG",,Q/224;;?DL( 
# %L)B6(!'LL%fd+3^W]3^0^ 5fXQ?^ h-#5bYa5b2b 5hZqAb r7c(m:s6{: ?By3x=/Y[\_`f\g[hhij:
 s3x **3q6<<+;RU[[!_hWXk[abc[deG  !88>>--::7PT:UXXY]YdYdeDL
s   AH5c                 @   |t        |      rt        j                  |t        j                        }|j                  dk(  r	|dddf   }t        |d   |d   z  |d   |d   z        }||z  }|ddddfxx   t        |d   |d   |z  z
  dz  dz
        z  cc<   |ddddfxx   t        |d   |d   |z  z
  dz  dz
        z  cc<   n:|-t        | !  |      }t        j                  |      }d||dk(  <   nt        d	      t               j                  ||||      S )
a  
        Process a single image by resizing bounding boxes or masks and generating visuals.

        Args:
            dst_shape (tuple): The target shape (height, width) of the image.
            src_shape (tuple): The original shape (height, width) of the image.
            category (str): The category of the image for visual prompts.
            bboxes (list | np.ndarray, optional): A list of bounding boxes in the format [x1, y1, x2, y2].
            masks (np.ndarray, optional): A list of masks corresponding to the image.

        Returns:
            (torch.Tensor): The processed visuals for the image.

        Raises:
            ValueError: If neither `bboxes` nor `masks` are provided.
        N)dtyper   r   .r   g?r   z$Please provide valid bboxes or masks)r,   r    arrayfloat32ndimminroundr
   r*   stack
ValueErrorr   get_visuals)	r   	dst_shape	src_shaper<   r   r   gainresized_masksr   s	           r   r-   z,YOLOEVPDetectPredictor._process_single_image`   s>   " #f+XXfBJJ7F{{aay|il2IaL9Q<4OPDdNF319	!y|d7J(Ja'ORU'U!VV319	!y|d7J(Ja'ORU'U!VV!G1%8MHH]+E"#E%3,CDD  !--h	65QQr   c                 B    t        |   |g|d| j                  i|S )a/  
        Run inference with visual prompts.

        Args:
            im (torch.Tensor): Input image tensor.
            *args (Any): Variable length argument list.
            **kwargs (Any): Arbitrary keyword arguments.

        Returns:
            (torch.Tensor): Model prediction results.
        vpe)r
   	inferencer   )r   r:   argskwargsr   s       r   rP   z YOLOEVPDetectPredictor.inference   s(     w GGGGGr   c                     | j                  |       t        | j                        dk(  sJ d       | j                  D ]6  \  }}}| j                  |      }| j	                  || j
                  d      c S  y)a  
        Process the source to get the visual prompt embeddings (VPE).

        Args:
            source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | List | Tuple): The source
                of the image to make predictions on. Accepts various types including file paths, URLs, PIL
                images, numpy arrays, and torch tensors.

        Returns:
            (torch.Tensor): The visual prompt embeddings (VPE) from the model.
        r   z get_vpe only supports one image!T)rO   
return_vpeN)setup_sourcer,   dataset
preprocessr   r   )r   source_im0sr:   s        r   get_vpezYOLOEVPDetectPredictor.get_vpe   sq     	&!4<< A%I'II%,, 	EJAtQ&B::bdllt:DD	Er   )T)NN)__name__
__module____qualname____doc__boolr   r   r*   r-   rP   r[   __classcell__)r   s   @r   r   r      s2    &	 $ 	 *X#RJHEr   r   c                       e Zd ZdZy)YOLOEVPSegPredictorz\Predictor for YOLO-EVP segmentation tasks combining detection and segmentation capabilities.N)r\   r]   r^   r_    r   r   rc   rc      s    fr   rc   )numpyr    r5   ultralytics.data.augmentr   ultralytics.models.yolo.detectr   ultralytics.models.yolo.segmentr   r   rc   rd   r   r   <module>ri      s8      5 = AXE/ XEv	02G 	r   