
    |h)&                         d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ d dl m!Z!m"Z"  G d de      Z# G d de#e      Z$y)    )deepcopy)Path)AnyDictOptionalUnionN)
functional)YOLOConcatDatasetbuild_dataloaderbuild_yolo_dataset)LoadVisualPrompt)check_det_dataset)DetectionValidator)SegmentationValidator)YOLOEDetect)
YOLOEModel)LOGGERTQDM)select_devicesmart_inference_modec                   r    e Zd ZdZ e       dej                  j                  j                  de	dej                  fd       Zdeeef   deeef   f fdZdeeef   dej                  j                  j                  fd	Z e       	 	 	 	 dd
ee   deee	ef      dee   dedeeef   f
 fd       Z xZS )YOLOEDetectValidatora  
    A validator class for YOLOE detection models that handles both text and visual prompt embeddings.

    This class extends DetectionValidator to provide specialized validation functionality for YOLOE models.
    It supports validation using either text prompts or visual prompt embeddings extracted from training samples,
    enabling flexible evaluation strategies for prompt-based object detection.

    Attributes:
        device (torch.device): The device on which validation is performed.
        args (namespace): Configuration arguments for validation.
        dataloader (DataLoader): DataLoader for validation data.

    Methods:
        get_visual_pe: Extract visual prompt embeddings from training samples.
        preprocess: Preprocess batch data ensuring visuals are on the same device as images.
        get_vpe_dataloader: Create a dataloader for LVIS training visual prompt samples.
        __call__: Run validation using either text or visual prompt embeddings.

    Examples:
        Validate with text prompts
        >>> validator = YOLOEDetectValidator()
        >>> stats = validator(model=model, load_vp=False)

        Validate with visual prompts
        >>> stats = validator(model=model, refer_data="path/to/data.yaml", load_vp=True)
    
dataloadermodelreturnc           	      b   t        |t              sJ t        |j                  j                  d   j                               D cg c]  }|j                  dd      d    }}t        j                  t        |      |j                  d   j                  | j                        }t        j                  t        |            }d}|D ]f  }|d   j                  d      j                  t        j                        j!                         }	t        j"                  |	t        |      	      }
||
z  }h |j                  | j                        }t%        |t        |      |
      }|D ]  }| j'                  |      }|j)                  |d   |d         }|d   }t+        |j,                  d         D ]  }|d   ||k(     j                  d      j                  t        j                        j!                  d      }	t        j.                  |j,                  d   | j                        dz  }|	|dt        |	       |	D ]-  }||xx   ||   ||k(     j1                  d      ||   z  z  cc<   /  
 t3        j4                  ||dk7     dd      ||dk7  <   d||dk(  <   |j7                  d      S c c}w )a  
        Extract visual prompt embeddings from training samples.

        This method processes a dataloader to compute visual prompt embeddings for each class using a YOLOE model.
        It normalizes the embeddings and handles cases where no samples exist for a class by setting their
        embeddings to zero.

        Args:
            dataloader (torch.utils.data.DataLoader): The dataloader providing training samples.
            model (YOLOEModel): The YOLOE model from which to extract visual prompt embeddings.

        Returns:
            (torch.Tensor): Visual prompt embeddings with shape (1, num_classes, embed_dim).
        names/   r   )devicez)Get visual prompt embeddings from samplescls)	minlength)totaldescimgvisuals)visual	batch_idxT)sortedN   )dimp)
isinstancer   listdatasetdatavaluessplittorchzeroslenr   embedr!   squeezetointuniquebincountr   
preprocessget_visual_perangeshapeonessumF	normalize	unsqueeze)selfr   r   namer   	visual_pecls_visual_numr%   batchr"   countpbarpredsr)   ipad_clscs                    `/var/www/html/test/engine/venv/lib/python3.12/site-packages/ultralytics/models/yolo/yoloe/val.pyr>   z"YOLOEDetectValidator.get_visual_pe1   s     %,,,37
8J8J8O8OPW8X8_8_8a3bc4C#A&ccKKE
EKKO,A,A$++V	SZ0:   	$E,&&r*--eii8??ACNN3#e*=Ee#N	$
 (**4;;7 Jc*oDA 
	VEOOE*E''eU9=M'NEk*I5;;q>* VEl9>2::2>AA%))LSS[_S`**U[[^DKKH2M&)
#c(# VAaLE!HW\$:$>$>q$ANSTDU$UULV	V
	V *+Y~QR?R5SY[_`)a	.A%&)*	.A%&""1%%? ds   J,rJ   c                 v    t         |   |      }d|v r$|d   j                  |d   j                        |d<   |S )zIPreprocess batch data, ensuring visuals are on the same device as images.r'   r&   )superr=   r9   r!   )rF   rJ   	__class__s     rQ   r=   zYOLOEDetectValidator.preprocessc   sC    "5)$Y/225<3F3FGE)    r1   c           	         t        | j                  |j                  | j                  j                  |j                  d            | j                  j                  |dd      }t        |t              r5|j                  D ]%  }|j                  j                  t                      ' n#|j                  j                  t                      t        || j                  j                  | j                  j                  dd      S )a  
        Create a dataloader for LVIS training visual prompt samples.

        This method prepares a dataloader for visual prompt embeddings (VPE) using the specified dataset.
        It applies necessary transformations including LoadVisualPrompt and configurations to the dataset
        for validation purposes.

        Args:
            data (dict): Dataset configuration dictionary containing paths and settings.

        Returns:
            (torch.utils.data.DataLoader): The dataloader for visual prompt samples.
        valF)moderectr    )shufflerank)r   argsgetr3   rJ   r.   r
   datasets
transformsappendr   r   workers)rF   r1   r0   ds       rQ   get_vpe_dataloaderz'YOLOEDetectValidator.get_vpe_dataloaderj   s     %IIHHTYY__dhhuo6IIOO
 g01%% 8##$4$678 %%&6&89IIOOII
 	
rU   trainer
refer_dataload_vpc                 T   |"|j                   | _         |j                  j                  }t        | j                  j                  j
                  d   j                               D cg c]  }|j                  dd      d    }}|rUt        j                  d       d| j                  _        | j                  | j                  |      }|j                  ||       n8t        j                  d       |j                  |      }|j                  ||       t        | A  ||      }	|	S |	|sJ d       t#        | j                  j                         | _         t%        |t&        t(        f      rdd	lm}
  |
|| j                   d
      }|j/                         j1                  | j                          t3        |xs | j                  j
                        }t        |d   j                               D cg c]  }|j                  dd      d    }}|rvt        j                  d       d| j                  _        | j5                  |      }| j                  ||      }|j                  ||       t        | A  t7        |            }	|	S t%        |j8                  d   t:              r)t=        |j8                  d   d      rt        | A  ||      S t        j                  d       |j                  |      }|j                  ||       t        | A  t7        |            }	|	S c c}w c c}w )a  
        Run validation on the model using either text or visual prompt embeddings.

        This method validates the model using either text prompts or visual prompts, depending on the load_vp flag.
        It supports validation during training (using a trainer object) or standalone validation with a provided
        model. For visual prompts, reference data can be specified to extract embeddings from a different dataset.

        Args:
            trainer (object, optional): Trainer object containing the model and device.
            model (YOLOEModel | str, optional): Model to validate. Required if trainer is not provided.
            refer_data (str, optional): Path to reference data for visual prompts.
            load_vp (bool): Whether to load visual prompts. If False, text prompts are used.

        Returns:
            (dict): Validation statistics containing metrics computed during validation.
        r   r   r   r   z!Validate using the visual prompt.FzValidate using the text prompt.z5Refer data is only used for visual prompt validation.)attempt_load_weightsT)r!   inplace)r   r    lrpc)r!   emar/   r   r0   r1   r2   r3   r   infor\   halfr>   set_classesget_text_perS   __call__r   r.   strr   ultralytics.nn.tasksrh   evalr9   r   rc   r   r   r   hasattr)rF   rd   r   re   rf   rG   r   vpetpestatsrh   r1   r   rT   s                rQ   rp   zYOLOEDetectValidator.__call__   s   0 !..DKKKOOE7;DOO<S<S<X<XY`<a<h<h<j7kltTZZQ'*lEl?@!&		((%@!!%-=>''.!!%-G$We4E< 9 %W WWw'		(8(89DK%#t-E,U4;;PTUJJLOODKK($Z%A499>>BD7;DM<P<P<R7STtTZZQ'*TET?@!&		 "44T:
((U;!!%-(x(?  EKKO[9gekkRToW]>^w'77=>''.!!%-(x(?U m0 Us   (L 1L%)NNNF)__name__
__module____qualname____doc__r   r4   utilsr1   
DataLoaderr   Tensorr>   r   rq   r   r=   rc   r   r   boolrp   __classcell__)rT   s   @rQ   r   r      s   6 /&(8(8(C(C /&J /&[`[g[g /& /&bS#X 4S> !
tCH~ !
%++:J:J:U:U !
F  "&26$(D#D j#o./D SM	D
 D 
c3hD DrU   r   c                       e Zd ZdZy)YOLOESegValidatorzRYOLOE segmentation validator that supports both text and visual prompt embeddings.N)rx   ry   rz   r{    rU   rQ   r   r      s    \rU   r   )%copyr   pathlibr   typingr   r   r   r   r4   torch.nnr	   rC   ultralytics.datar
   r   r   ultralytics.data.augmentr   ultralytics.data.utilsr   ultralytics.models.yolo.detectr   ultralytics.models.yolo.segmentr   ultralytics.nn.modules.headr   rr   r   ultralytics.utilsr   r   ultralytics.utils.torch_utilsr   r   r   r   r   rU   rQ   <module>r      sW      - -  $ T T 5 4 = A 3 + * M}- }@	,.C 	rU   