
    |h#                     n    d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ ddlmZ  G d	 d
e      Zy)    N)Image)SegmentationPredictor)DEFAULT_CFGchecks)box_iou)scale_masks   )adjust_bboxes_to_image_borderc                   H     e Zd ZdZeddf fd	Z fdZddZd Zd Z	 xZ
S )	FastSAMPredictora  
    FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks.

    This class extends the SegmentationPredictor, customizing the prediction pipeline specifically for fast SAM. It
    adjusts post-processing steps to incorporate mask prediction and non-maximum suppression while optimizing for
    single-class segmentation.

    Attributes:
        prompts (dict): Dictionary containing prompt information for segmentation (bboxes, points, labels, texts).
        device (torch.device): Device on which model and tensors are processed.
        clip_model (Any, optional): CLIP model for text-based prompting, loaded on demand.
        clip_preprocess (Any, optional): CLIP preprocessing function for images, loaded on demand.

    Methods:
        postprocess: Apply postprocessing to FastSAM predictions and handle prompts.
        prompt: Perform image segmentation inference based on various prompt types.
        set_prompts: Set prompts to be used during inference.
    Nc                 6    t         |   |||       i | _        y)a>  
        Initialize the FastSAMPredictor with configuration and callbacks.

        This initializes a predictor specialized for Fast SAM (Segment Anything Model) segmentation tasks. The predictor
        extends SegmentationPredictor with custom post-processing for mask prediction and non-maximum suppression
        optimized for single-class segmentation.

        Args:
            cfg (dict): Configuration for the predictor.
            overrides (dict, optional): Configuration overrides.
            _callbacks (list, optional): List of callback functions.
        N)super__init__prompts)selfcfg	overrides
_callbacks	__class__s       a/var/www/html/test/engine/venv/lib/python3.12/site-packages/ultralytics/models/fastsam/predict.pyr   zFastSAMPredictor.__init__"   s     	i4    c                    | j                   j                  dd      }| j                   j                  dd      }| j                   j                  dd      }| j                   j                  dd      }t        |   |||      }|D ]  }	t	        j
                  dd|	j                  d   |	j                  d   g|d   j                  t        j                        }
t        |	j                  j                  |	j                        }t	        j                  t        |
d   |      d	kD        j                         }|j                         dk7  s|
|	j                  j                  |<    | j!                  |||||
      S )a  
        Apply postprocessing to FastSAM predictions and handle prompts.

        Args:
            preds (List[torch.Tensor]): Raw predictions from the model.
            img (torch.Tensor): Input image tensor that was fed to the model.
            orig_imgs (List[numpy.ndarray]): Original images before preprocessing.

        Returns:
            (List[Results]): Processed results with prompts applied.
        bboxesNpointslabelstextsr   r	   )devicedtypeg?)r   r   r   r   )r   popr   postprocesstorchtensor
orig_shaper   float32r
   boxesxyxynonzeror   flattennumelprompt)r   predsimg	orig_imgsr   r   r   r   resultsresultfull_boxr%   idxr   s                r   r    zFastSAMPredictor.postprocess2   sF    !!(D1!!(D1!!(D1  $/'%eS)< 	2F||Av((+V->->q-AB5QR8??bgboboH 2&,,2C2CVEVEVWE-- > DEMMOCyy{a)1!!#&	2 {{76&W\{]]r   c                 	   ||||S g }t        |t              s|g}|D ]  }t        |      dk(  r|j                  |       $|j                  j
                  }|j                  dd |j                  k7  rt        |d   |j                        d   }t        j                  t        |      t        j                  | j                        }	|t        j                  |t        j                  | j                        }|j                  dk(  r|d   n|}|dddf   |dddf   z
  |dddf   |dddf   z
  z  }
t        j                   |D cg c],  }|dd|d   |d   |d   |d   f   j#                  d      . c}      }t        j"                  |d      }|
dddf   |z   |z
  }d	|	t        j$                  ||z  d      <   |t        j                  |t        j                  | j                        }|j                  dk(  r|d   n|}|"t        j&                  |j                  d         }t        j                  |t        j                  | j                        }t        |      t        |      k(  sJ d
t        |       dt        |              |j#                         dk(  r9t        j&                  t        |      t        j                  | j                        n8t        j                  t        |      t        j                  | j                        }t)        ||      D ]9  \  }}t        |      |t        j*                  |dd|d   |d   f   d	      d   <   ; |	|z  }	|8t        |t,              r|g}g g }}t/        |j0                  j2                  j5                               D ]v  \  }}d |D        \  }}}}||   j#                         dk  r|j                  |       <|j                  t7        j8                  |j:                  ||||dddf                x | j=                  ||      }t        j$                  |d      }t        |      rB|t        j>                  || j                        d   tA        |      k  j#                  d      z  }d	|	|<   |j                  ||	           |S c c}w )a  
        Perform image segmentation inference based on cues like bounding boxes, points, and text prompts.

        Args:
            results (Results | List[Results]): Original inference results from FastSAM models without any prompts.
            bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
            texts (str | List[str], optional): Textual prompts, a list containing string objects.

        Returns:
            (List[Results]): Output results filtered and determined by the provided prompts.
        Nr   r	   )r   r         )r	   r4   )dimTz5Expected `labels` with same size as `point`, but got z and )as_tuplec              3   2   K   | ]  }t        |        y w)N)int).0xs     r   	<genexpr>z*FastSAMPredictor.prompt.<locals>.<genexpr>   s     %8c!f%8s   d   r   )!
isinstancelistlenappendmasksdatashaper#   r   r!   zerosboolr   	as_tensorint32ndimstacksumargmaxoneszipr'   str	enumerater%   r&   tolistr   	fromarrayorig_img_clip_inferencer"   r8   )r   r.   r   r   r   r   prompt_resultsr/   rC   r1   
bbox_areasb
mask_areasfull_mask_areasunion	point_idxpointlabelcrop_ims
filter_idxix1y1x2y2
similaritytext_idxs                              r   r*   zFastSAMPredictor.promptN   sK    >fnN'4(iG 3	/F6{a%%f-LL%%E{{12&"3"33#E$K1B1BCAF++c&kDKKPC!u{{4;;W)/)9v$QTlVAqD\9fQTlVTUWXTX\>YZ
"[[fl)mab%1Q4!A$;!qt0K*L*P*PU[*P*\)mn
"'))Ev">"1d7+o=
J?CELLe!3;<!u{{4;;W)/)9v>"ZZQ8Fu{{4;;W6{c&k1 KCPVK=X]^abh^i]jk1
 zz|q( JJs6{%**T[[QS[

4;;W 
 %($7 kLE5_cdi_jIemmE!U1XuQx2G,HSWXYZ[\ky  eS)"GE')2*%fll&7&7&>&>&@A ZDAq%8a%8NBBQx||~,"))!, OOEOOFOOBrE2b5RVTVRVDV4W$XYZ "11(EB
 <<
;z?j!Md!SWZ[cWd!d i ijk llH $H!!&+.g3	/j Q *ns   1S
c                    	 ddl }t	        | d      rt	        | d      s*|j                  d| j                        \  | _        | _        t        j                  |D cg c],  }| j                  |      j                  | j                        . c}      }|j                  |      j                  | j                        }| j                  j                  |      }| j                  j                  |      }||j                  dd	
      z  }||j                  dd	
      z  }||dddf   z  j!                  d      S # t        $ r t        j                  d       ddl }Y Yw xY wc c}w )a  
        Perform CLIP inference to calculate similarity between images and text prompts.

        Args:
            images (List[PIL.Image]): List of source images, each should be PIL.Image with RGB channel order.
            texts (List[str]): List of prompt texts, each should be a string object.

        Returns:
            (torch.Tensor): Similarity matrix between given images and texts with shape (M, N).
        r   Nz+git+https://github.com/ultralytics/CLIP.git
clip_modelclip_preprocesszViT-B/32r>   r=   T)r5   keepdim)clipImportErrorr   check_requirementshasattrloadr   ri   rj   r!   rK   totokenizeencode_imageencode_textnormrL   )r   imagesr   rl   imagetokenized_textimage_featurestext_featuress           r   rU   z FastSAMPredictor._clip_inference   s7   	 l+WTCT5U48IIjQUQ\Q\I4]1DOT1W]^ed2259<<T[[I^_u-00=55f=33NC.--"d-CC++D+AAq$w!77<<R@@  	%%&ST	
 _s   D: 1E#:"E E c                     || _         y)z(Set prompts to be used during inference.N)r   )r   r   s     r   set_promptszFastSAMPredictor.set_prompts   s	    r   )NNNN)__name__
__module____qualname____doc__r   r   r    r*   rU   r|   __classcell__)r   s   @r   r   r      s.    & '$4  ^8HTA4r   r   )r!   PILr   ultralytics.models.yolo.segmentr   ultralytics.utilsr   r   ultralytics.utils.metricsr   ultralytics.utils.opsr   utilsr
   r    r   r   <module>r      s+      A 1 - - 0f, fr   