
    |h@I                         d dl mZ d dlmZmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlmZmZ  G d d	e      Z G d
 de      Z G d de      Zy)    )Path)AnyDictListOptionalUnion)load_inference_source)Model)yolo)ClassificationModelDetectionModelOBBModel	PoseModelSegmentationModel
WorldModel
YOLOEModelYOLOESegModel)ROOTYAMLc                   l     e Zd ZdZddeeef   dee   def fdZ	e
deeeeef   f   fd       Z xZS )	YOLOa  
    YOLO (You Only Look Once) object detection model.

    This class provides a unified interface for YOLO models, automatically switching to specialized model types
    (YOLOWorld or YOLOE) based on the model filename. It supports various computer vision tasks including object
    detection, segmentation, classification, pose estimation, and oriented bounding box detection.

    Attributes:
        model: The loaded YOLO model instance.
        task: The task type (detect, segment, classify, pose, obb).
        overrides: Configuration overrides for the model.

    Methods:
        __init__: Initialize a YOLO model with automatic type detection.
        task_map: Map tasks to their corresponding model, trainer, validator, and predictor classes.

    Examples:
        Load a pretrained YOLOv11n detection model
        >>> model = YOLO("yolo11n.pt")

        Load a pretrained YOLO11n segmentation model
        >>> model = YOLO("yolo11n-seg.pt")

        Initialize from a YAML configuration
        >>> model = YOLO("yolo11n.yaml")
    modeltaskverbosec                 ~   t        t        |t        t         f      r|nd      }d|j                  v r=|j                  dv r/t        ||      }t        |      | _        |j                  | _        yd|j                  v r>|j                  dv r0t        |||      }t        |      | _        |j                  | _        yt        | -  |||       t        | j                  d      rZd	| j                  j                  d
   j                         v r0ddlm}  ||       }t        |      | _        |j                  | _        yyy)a  
        Initialize a YOLO model.

        This constructor initializes a YOLO model, automatically switching to specialized model types
        (YOLOWorld or YOLOE) based on the model filename.

        Args:
            model (str | Path): Model name or path to model file, i.e. 'yolo11n.pt', 'yolo11n.yaml'.
            task (str, optional): YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'.
                Defaults to auto-detection based on model.
            verbose (bool): Display model info on load.

        Examples:
            >>> from ultralytics import YOLO
            >>> model = YOLO("yolo11n.pt")  # load a pretrained YOLOv11n detection model
            >>> model = YOLO("yolo11n-seg.pt")  # load a pretrained YOLO11n segmentation model
         z-world>   .pt.yml.yaml)r   yoloe)r   r   r   r   r   r   RTDETRr   )r"   N)r   
isinstancestrstemsuffix	YOLOWorldtype	__class____dict__YOLOEsuper__init__hasattrr   	_get_nameultralyticsr"   )selfr   r   r   pathnew_instancer"   r*   s          \/var/www/html/test/engine/venv/lib/python3.12/site-packages/ultralytics/models/yolo/model.pyr.   zYOLO.__init__2   s   $ ZT{;EDtyy T[[4L%L$T7;L!,/DN(11DM		!dkk5M&M D'BL!,/DN(11DM G5tWEtzz7+DJJ<L<LR<P<Z<Z<\0\.%d|!%l!3 , 5 5 1]+    returnc           	      <   t         t        j                  j                  t        j                  j                  t        j                  j
                  dt        t        j                  j                  t        j                  j                  t        j                  j                  dt        t        j                  j                  t        j                  j                  t        j                  j                  dt         t        j"                  j$                  t        j"                  j&                  t        j"                  j(                  dt*        t        j,                  j.                  t        j,                  j0                  t        j,                  j2                  ddS )z=Map head to model, trainer, validator, and predictor classes.)r   trainer	validator	predictor)classifydetectsegmentposeobb)r   r   r<   ClassificationTrainerClassificationValidatorClassificationPredictorr   r=   DetectionTrainerDetectionValidatorDetectionPredictorr   r>   SegmentationTrainerSegmentationValidatorSegmentationPredictorr   r?   PoseTrainerPoseValidatorPosePredictorr   r@   
OBBTrainerOBBValidatorOBBPredictorr2   s    r5   task_mapzYOLO.task_mapW   s   
 -==>>!]]BB!]]BB	 (;;77![[;;![[;;	 +<<;;!\\??!\\??	 #9900!YY44!YY44	 "88..!XX22!XX22	3
 	
r6   )z
yolo11n.ptNF)__name__
__module____qualname____doc__r   r%   r   r   boolr.   propertyr   r   rQ   __classcell__r*   s   @r5   r   r      s^    6#6eCI. #6Xc] #6dh #6J !
$sDcN23 !
 !
r6   r   c                   |     e Zd ZdZd
deeef   deddf fdZe	de
ee
eef   f   fd       Zdee   ddfd	Z xZS )r(   a  
    YOLO-World object detection model.

    YOLO-World is an open-vocabulary object detection model that can detect objects based on text descriptions
    without requiring training on specific classes. It extends the YOLO architecture to support real-time
    open-vocabulary detection.

    Attributes:
        model: The loaded YOLO-World model instance.
        task: Always set to 'detect' for object detection.
        overrides: Configuration overrides for the model.

    Methods:
        __init__: Initialize YOLOv8-World model with a pre-trained model file.
        task_map: Map tasks to their corresponding model, trainer, validator, and predictor classes.
        set_classes: Set the model's class names for detection.

    Examples:
        Load a YOLOv8-World model
        >>> model = YOLOWorld("yolov8s-world.pt")

        Set custom classes for detection
        >>> model.set_classes(["person", "car", "bicycle"])
    r   r   r7   Nc                     t         |   |d|       t        | j                  d      s;t	        j
                  t        dz        j                  d      | j                  _        yy)a  
        Initialize YOLOv8-World model with a pre-trained model file.

        Loads a YOLOv8-World model for object detection. If no custom class names are provided, it assigns default
        COCO class names.

        Args:
            model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
            verbose (bool): If True, prints additional information during initialization.
        r=   r!   namescfg/datasets/coco8.yamlN	r-   r.   r/   r   r   loadr   getr\   )r2   r   r   r*   s      r5   r.   zYOLOWorld.__init__   sT     	u8WE tzz7+#yy0I)IJNNwWDJJ ,r6   c                     dt         t        j                  j                  t        j                  j                  t        j
                  j                  diS )4Map head to model, validator, and predictor classes.r=   r   r:   r;   r9   )r   r   r=   rE   rF   worldWorldTrainerrP   s    r5   rQ   zYOLOWorld.task_map   s@     #![[;;![[;;::22	
 	
r6   classesc                     | j                   j                  |       d}||v r|j                  |       || j                   _        | j                  r|| j                  j                   _        yy)z
        Set the model's class names for detection.

        Args:
            classes (List[str]): A list of categories i.e. ["person"].
         N)r   set_classesremover\   r;   )r2   rf   
backgrounds      r5   ri   zYOLOWorld.set_classes   s[     	

w'
 NN:&"

 >>)0DNN  & r6   )zyolov8s-world.ptF)rR   rS   rT   rU   r   r%   r   rV   r.   rW   r   r   rQ   r   ri   rX   rY   s   @r5   r(   r(   |   st    2XeCI. Xd X_c X" 	
$sDcN23 	
 	
149 1 1r6   r(   c            	           e Zd ZdZ	 ddeeef   dee   deddf fdZ	e
deeeeef   f   fd	       Zd
 Zd Zdee   dee   ddfdZd Zdee   ddfdZ	 	 	 ddedee   fdZddi ddfdedeeef   f fdZ xZS )r,   a  
    YOLOE object detection and segmentation model.

    YOLOE is an enhanced YOLO model that supports both object detection and instance segmentation tasks with
    improved performance and additional features like visual and text positional embeddings.

    Attributes:
        model: The loaded YOLOE model instance.
        task: The task type (detect or segment).
        overrides: Configuration overrides for the model.

    Methods:
        __init__: Initialize YOLOE model with a pre-trained model file.
        task_map: Map tasks to their corresponding model, trainer, validator, and predictor classes.
        get_text_pe: Get text positional embeddings for the given texts.
        get_visual_pe: Get visual positional embeddings for the given image and visual features.
        set_vocab: Set vocabulary and class names for the YOLOE model.
        get_vocab: Get vocabulary for the given class names.
        set_classes: Set the model's class names and embeddings for detection.
        val: Validate the model using text or visual prompts.
        predict: Run prediction on images, videos, directories, streams, etc.

    Examples:
        Load a YOLOE detection model
        >>> model = YOLOE("yoloe-11s-seg.pt")

        Set vocabulary and class names
        >>> model.set_vocab(["person", "car", "dog"], ["person", "car", "dog"])

        Predict with visual prompts
        >>> prompts = {"bboxes": [[10, 20, 100, 200]], "cls": ["person"]}
        >>> results = model.predict("image.jpg", visual_prompts=prompts)
    NFr   r   r   r7   c                     t         |   |||       t        | j                  d      s;t	        j
                  t        dz        j                  d      | j                  _        yy)ah  
        Initialize YOLOE model with a pre-trained model file.

        Args:
            model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
            task (str, optional): Task type for the model. Auto-detected if None.
            verbose (bool): If True, prints additional information during initialization.
        r!   r\   r]   Nr^   )r2   r   r   r   r*   s       r5   r.   zYOLOE.__init__   sT     	u4A tzz7+#yy0I)IJNNwWDJJ ,r6   c                 P   t         t        j                  j                  t        j                  j
                  t        j                  j                  dt        t        j                  j                  t        j                  j                  t        j                  j                  ddS )rb   rc   )r=   r>   )r   r   r    YOLOEDetectValidatorr=   rF   YOLOETrainerr   YOLOESegValidatorr>   rI   YOLOESegTrainerrP   s    r5   rQ   zYOLOE.task_map   sn    
 $!ZZ<<![[;;::22	 '!ZZ99!\\??::55	
 	
r6   c                 p    t        | j                  t              sJ | j                  j                  |      S )z3Get text positional embeddings for the given texts.)r$   r   r   get_text_pe)r2   textss     r5   rt   zYOLOE.get_text_pe  s+    $**j111zz%%e,,r6   c                 r    t        | j                  t              sJ | j                  j                  ||      S )a  
        Get visual positional embeddings for the given image and visual features.

        This method extracts positional embeddings from visual features based on the input image. It requires
        that the model is an instance of YOLOEModel.

        Args:
            img (torch.Tensor): Input image tensor.
            visual (torch.Tensor): Visual features extracted from the image.

        Returns:
            (torch.Tensor): Visual positional embeddings.

        Examples:
            >>> model = YOLOE("yoloe-11s-seg.pt")
            >>> img = torch.rand(1, 3, 640, 640)
            >>> visual_features = model.model.backbone(img)
            >>> pe = model.get_visual_pe(img, visual_features)
        )r$   r   r   get_visual_pe)r2   imgvisuals      r5   rw   zYOLOE.get_visual_pe  s/    ( $**j111zz''V44r6   vocabr\   c                 v    t        | j                  t              sJ | j                  j                  ||       y)a  
        Set vocabulary and class names for the YOLOE model.

        This method configures the vocabulary and class names used by the model for text processing and
        classification tasks. The model must be an instance of YOLOEModel.

        Args:
            vocab (List[str]): Vocabulary list containing tokens or words used by the model for text processing.
            names (List[str]): List of class names that the model can detect or classify.

        Raises:
            AssertionError: If the model is not an instance of YOLOEModel.

        Examples:
            >>> model = YOLOE("yoloe-11s-seg.pt")
            >>> model.set_vocab(["person", "car", "dog"], ["person", "car", "dog"])
        )r\   N)r$   r   r   	set_vocab)r2   rz   r\   s      r5   r|   zYOLOE.set_vocab(  s/    $ $**j111

U%0r6   c                 p    t        | j                  t              sJ | j                  j                  |      S )z)Get vocabulary for the given class names.)r$   r   r   	get_vocab)r2   r\   s     r5   r~   zYOLOE.get_vocab=  s+    $**j111zz##E**r6   rf   c                     t        | j                  t              sJ | j                  j                  ||       d|vsJ || j                  _        | j
                  r|| j
                  j                  _        yy)z
        Set the model's class names and embeddings for detection.

        Args:
            classes (List[str]): A list of categories i.e. ["person"].
            embeddings (torch.Tensor): Embeddings corresponding to the classes.
        rh   N)r$   r   r   ri   r\   r;   )r2   rf   
embeddingss      r5   ri   zYOLOE.set_classesB  sf     $**j111

w
3'!!!"

 >>)0DNN  & r6   load_vp
refer_datac                     d| i}i | j                   ||ddi} |xs | j                  d      || j                        } || j                  ||       |j                  | _        |j                  S )a:  
        Validate the model using text or visual prompts.

        Args:
            validator (callable, optional): A callable validator function. If None, a default validator is loaded.
            load_vp (bool): Whether to load visual prompts. If False, text prompts are used.
            refer_data (str, optional): Path to the reference data for visual prompts.
            **kwargs (Any): Additional keyword arguments to override default settings.

        Returns:
            (dict): Validation statistics containing metrics computed during validation.
        rectmodevalr:   )args
_callbacks)r   r   r   )	overrides_smart_load	callbacksr   metrics)r2   r:   r   r   kwargscustomr   s          r5   r   z	YOLOE.valT  s}    & g+&D$..DFDfDfeD?Y?$"2"2;"?dW[WeWef	

G
K ((   r6   streamvisual_promptsc                    t        |      r{d|v rd|v sJ d|j                                 t        |d         t        |d         k(  s&J dt        |d          dt        |d          d       t        | j                  t        j
                  j                        sQ |xs t        j
                  j                  | j                  j                  dd|d	u d
d| j                        | _        t        |t              r|t        d |d   D              nt        t        |d               }|| j                  j                  d   _        t        |      D cg c]  }d| 	 c}| j                  _        | j                  j!                  |j#                                | j                  j%                  | j                         |5|3t'        |      }	|	j(                  dv rt+        t-        |	            d
   d   }|| j                  j/                  |      }
| j                  j1                  | j                  j                  |
       t        | j                  t        j2                  j4                        rdnd| _        d	| _        n5t        | j                  t        j
                  j                        rd	| _        t7        | p  ||fi |S c c}w )a  
        Run prediction on images, videos, directories, streams, etc.

        Args:
            source (str | int | PIL.Image | np.ndarray, optional): Source for prediction. Accepts image paths,
                directory paths, URL/YouTube streams, PIL images, numpy arrays, or webcam indices.
            stream (bool): Whether to stream the prediction results. If True, results are yielded as a
                generator as they are computed.
            visual_prompts (Dict[str, List]): Dictionary containing visual prompts for the model. Must include
                'bboxes' and 'cls' keys when non-empty.
            refer_image (str | PIL.Image | np.ndarray, optional): Reference image for visual prompts.
            predictor (callable, optional): Custom predictor function. If None, a predictor is automatically
                loaded based on the task.
            **kwargs (Any): Additional keyword arguments passed to the predictor.

        Returns:
            (List | generator): List of Results objects or generator of Results objects if stream=True.

        Examples:
            >>> model = YOLOE("yoloe-11s-seg.pt")
            >>> results = model.predict("path/to/image.jpg")
            >>> # With visual prompts
            >>> prompts = {"bboxes": [[10, 20, 100, 200]], "cls": ["person"]}
            >>> results = model.predict("path/to/image.jpg", visual_prompts=prompts)
        bboxesclsz7Expected 'bboxes' and 'cls' in visual prompts, but got z=Expected equal number of bounding boxes and classes, but got z and z respectivelypredictFN   )r   r   saver   batch)r   r   c              3   D   K   | ]  }t        t        |              y w)N)lenset).0cs     r5   	<genexpr>z YOLOE.predict.<locals>.<genexpr>  s     ?ACAK?s    r#   object)r   >   videor   r   r>   r=   )r   keysr$   r;   r   r    YOLOEVPDetectPredictorr   r   r   listmaxr   ncranger\   set_promptscopysetup_modelr	   r   nextiterget_vperi   r>   rI   r-   r   )r2   sourcer   r   refer_imager;   r   num_clsidatasetvper*   s              r5   r   zYOLOE.predicto  sy   D ~~-%>2I I.J]J]J_I`aI ~h/0Cu8M4NN OPSTbckTlPmOnns~e,-.m=N dnndjj.O.OP"P)"Ptzz/P/P $

 ) %#.$#6!"  $~~	" fd+0C ?)>??^E234 
 '.DJJR #6;GnE&EDJJNN&&~':':'<=NN&&TZZ&8"v'9/7<<#66"&tG}"5a"8";K&nn,,[9

&&tzz'7'7=)3DNNDLLDfDf)gImu	!%

(I(IJ!DNwvv888#  Fs   K)zyoloe-11s-seg.ptNF)NFN)rR   rS   rT   rU   r   r%   r   r   rV   r.   rW   r   r   rQ   rt   rw   r   r|   r~   ri   r   r   rX   rY   s   @r5   r,   r,      s    F inX39%XBJ3-XaeX	X" 
$sDcN23 
 
"-
5.1tCy 1c 1t 1*+
149 1T 1( $(	! ! SM	!: *,M9 M9 S$Y	M9 M9r6   r,   N)pathlibr   typingr   r   r   r   r   ultralytics.data.buildr	   ultralytics.engine.modelr
   ultralytics.modelsr   ultralytics.nn.tasksr   r   r   r   r   r   r   r   ultralytics.utilsr   r   r   r(   r,    r6   r5   <module>r      sY     3 3 8 * #	 	 	 )c
5 c
LG1 G1Tv9E v9r6   