
    |ho                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlZd dlmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZ d dlm Z m!Z! d dl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 dZ9 G d de-      Z: G d de:      Z; G d de:      Z< G d de      Z= G d de-      Z> G d d      Z?y)    N)defaultdict)repeat)
ThreadPool)Path)AnyDictListOptionalTuple)Image)ConcatDataset)
LOCAL_RANKLOGGERNUM_THREADSTQDMcolorstr)	Instances)resample_segmentssegments2boxes)TORCHVISION_0_18   )ComposeFormat	LetterBoxRandomLoadTextclassify_augmentationsclassify_transformsv8_transforms)BaseDataset)merge_multi_segment)HELP_URLcheck_file_speedsget_hashimg2label_pathsload_dataset_cache_filesave_dataset_cache_fileverify_imageverify_image_labelz1.0.3c                        e Zd ZdZddddee   def fdZ ed      fd	ed
efdZ	d
e
e   fdZddee   d
efdZded
dfdZded
efdZede
e   d
efd       Z xZS )YOLODataseta  
    Dataset class for loading object detection and/or segmentation labels in YOLO format.

    This class supports loading data for object detection, segmentation, pose estimation, and oriented bounding box
    (OBB) tasks using the YOLO format.

    Attributes:
        use_segments (bool): Indicates if segmentation masks should be used.
        use_keypoints (bool): Indicates if keypoints should be used for pose estimation.
        use_obb (bool): Indicates if oriented bounding boxes should be used.
        data (dict): Dataset configuration dictionary.

    Methods:
        cache_labels: Cache dataset labels, check images and read shapes.
        get_labels: Return dictionary of labels for YOLO training.
        build_transforms: Build and append transforms to the list.
        close_mosaic: Set mosaic, copy_paste and mixup options to 0.0 and build transformations.
        update_labels_info: Update label format for different tasks.
        collate_fn: Collate data samples into batches.

    Examples:
        >>> dataset = YOLODataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
        >>> dataset.get_labels()
    Ndetectdatataskr-   r.   c                    |dk(  | _         |dk(  | _        |dk(  | _        || _        | j                   r| j                  rJ d       t	        |   |d| j                  d   i| y)ap  
        Initialize the YOLODataset.

        Args:
            data (dict, optional): Dataset configuration dictionary.
            task (str): Task type, one of 'detect', 'segment', 'pose', or 'obb'.
            *args (Any): Additional positional arguments for the parent class.
            **kwargs (Any): Additional keyword arguments for the parent class.
        segmentposeobbz(Can not use both segments and keypoints.channelsN)use_segmentsuse_keypointsuse_obbr-   super__init__selfr-   r.   argskwargs	__class__s        W/var/www/html/test/engine/venv/lib/python3.12/site-packages/ultralytics/data/dataset.pyr8   zYOLODataset.__init__I   sm     !I-!V^u}	%%$*<*<i?ii=$I:)>I&I    ./labels.cachepathreturnc                 b   dg i}ddddg f\  }}}}}| j                    d|j                  |j                  z   d}t        | j                        }	| j
                  j                  dd      \  }
}| j                  r|
dk  s|dvrt        d      t        t              5 }|j                  t        t        | j                  | j                  t        | j                         t        | j                        t        t        | j
                  d	               t        |
      t        |      t        | j                               
      }t#        |||	      }|D ]{  \
  }}}}}}}}}}||z  }||z  }||z  }||z  }|r/|d   j%                  |||ddddf   |ddddf   ||ddd       |r|j%                  |       | d| d||z    d| d|_        } |j)                          ddd       |r$t+        j,                  dj/                  |             |dk(  r+t+        j0                  | j                    d| dt2                t5        | j                  | j                  z         |d<   ||||t        | j                        f|d<   ||d<   t7        | j                   ||t8               |S # 1 sw Y   xY w)z
        Cache dataset labels, check images and read shapes.

        Args:
            path (Path): Path where to save the cache file.

        Returns:
            (dict): Dictionary containing cached labels and related information.
        labelsr   	Scanning ...	kpt_shape)r   r   >         z'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'namesfunciterabledesctotalNr   Txywh)im_fileshapeclsbboxessegments	keypoints
normalizedbbox_format 	 images,  backgrounds,  corrupt
zNo labels found in z. hashresultsmsgs)prefixparentstemlenim_filesr-   getr5   
ValueErrorr   r   imapr(   ziplabel_filesr   
single_clsr   appendrO   closer   infojoinwarningr!   r#   r&   DATASET_CACHE_VERSION)r:   rA   xnmnfnencra   rO   rP   nkptndimpoolr`   pbarrR   lbrS   rV   keypointnm_fnf_fne_fnc_fmsgs                            r>   cache_labelszYOLODataset.cache_labelsZ   s    rN !Q2~BB++idii(?'@DDMM"YY]];7
d419F0Bi  $ $	ii'MM$$4;;'4--.3tyy1234L4L4??+	   G d%8DW[ WSUHhdD$PSd
d
d
d
hK&&'.%*#%a1f:&(ABi(0)1*.+1	 KK$#fAbT27)>"XV	)W* JJLI$	L KK		$(7NNdkk]*=dV2hZPQT--=>&	2r2s4=='99)&	T16KL]$	 $	s   EJ%%J.c           	         t        | j                        | _        t        | j                  d         j                  j                  d      }	 t        |      d}}|d   t        k(  sJ |d   t        | j                  | j                  z         k(  sJ 	 |j                  d      \  }}}}}|rbt        dv rZd	| d
| d||z    d| d	}	t        d| j                   |	z   ||       |d   r't#        j$                  dj'                  |d                dD 
cg c]  }
|j                  |
       c}
 |d   }|st)        d| dt*               |D cg c]  }|d   	 c}| _        d |D        }d t-        | D        \  }}}|r-||k7  r(t#        j.                  d| d| d       |D ]  }g |d<   	 |dk(  rt#        j.                  d| dt*                |S # t        t        t        f$ r | j                  |      d}}Y iw xY wc c}
w c c}w )a8  
        Return dictionary of labels for YOLO training.

        This method loads labels from disk or cache, verifies their integrity, and prepares them for training.

        Returns:
            (List[dict]): List of label dictionaries, each containing information about an image and its annotations.
        r   .cacheTversionr_   Fr`      r   rE   z... r[   r\   r]   NrO   rP   initialra   r^   )r_   r   ra   rD   zNo valid images found in z8. Images with incorrectly formatted labels are ignored. rR   c              3   n   K   | ]-  }t        |d          t        |d         t        |d         f / yw)rT   rU   rV   Nre   ).0r|   s     r>   	<genexpr>z)YOLODataset.get_labels.<locals>.<genexpr>   s2     ]PRC5	NC8$5s2j>7JK]s   35c              3   2   K   | ]  }t        |        y wN)sum)r   rs   s     r>   r   z)YOLODataset.get_labels.<locals>.<genexpr>   s     +JqCF+J   z@Box and segment counts should be equal, but got len(segments) = z, len(boxes) = z. To resolve this only boxes will be used and all segments will be removed. To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.rV   zLabels are missing or empty in z#, training may not work correctly. )r$   rf   rk   r   rc   with_suffixr%   rr   r#   FileNotFoundErrorAssertionErrorAttributeErrorr   popr   r   rb   r   ro   rp   RuntimeErrorr!   rj   rq   )r:   
cache_pathcacheexistsru   rt   rv   rw   ndkrD   r|   lengthslen_cls	len_boxeslen_segmentss                    r>   
get_labelszYOLODataset.get_labels   s:    +4==9$**1-.55AA(K
	A3J?6E#'<<<<=HT-=-=-M$NNNN
 "IIi0BBjG+J<tB4yb	PRtS[\ADKK!O1a@V}DIIeFm45  ;;!1;x+J<7opxoyz  2882I8 ^V\]+JCM+J(LI5NNRS_R` a  ){ +vv
  $!#:$a<NN<ZLHkltkuvwC ">>B 	A --j956E	A 	< 9s   AG H H'G=<G=hypc                    | j                   r| j                   r| j                  s|j                  nd|_        | j                   r| j                  s|j                  nd|_        | j                   r| j                  s|j                  nd|_        t        | | j                  |      }n-t        t        | j                  | j                  fd      g      }|j                  t        dd| j                  | j                  | j                  d|j                  |j                  | j                   r|j                   nd	             |S )z
        Build and append transforms to the list.

        Args:
            hyp (dict, optional): Hyperparameters for transforms.

        Returns:
            (Compose): Composed transforms.
                F)	new_shapescaleuprQ   T)	rY   	normalizereturn_maskreturn_keypoint
return_obb	batch_idx
mask_ratiomask_overlapbgr)augmentrectmosaicmixupcutmixr   imgszr   r   rm   r   r4   r5   r6   r   overlap_maskr   )r:   r   
transformss      r>   build_transformszYOLODataset.build_transforms   s     <<'+||DII3CJ%)\\$))		CI'+||DII3CJ&tTZZ=J )tzz4::6NX]"^!_`J" -- $ 2 2<<>> --#||CGG
	
 r?   c                 h    d|_         d|_        d|_        d|_        | j	                  |      | _        y)z
        Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.

        Args:
            hyp (dict): Hyperparameters for transforms.
        r   N)r   
copy_paster   r   r   r   )r:   r   s     r>   close_mosaiczYOLODataset.close_mosaic   s2     
	
//4r?   labelc                    |j                  d      }|j                  dg       }|j                  dd      }|j                  d      }|j                  d      }| j                  rdnd}t        |      d	kD  rAt        d
 |D              }||k  r|dz   n|}t	        j
                  t        ||      d	      }n(t	        j                  d	|dft        j                        }t        |||||      |d<   |S )a  
        Update label format for different tasks.

        Args:
            label (dict): Label dictionary containing bboxes, segments, keypoints, etc.

        Returns:
            (dict): Updated label dictionary with instances.

        Note:
            cls is not with bboxes now, classification and semantic segmentation need an independent cls label
            Can also support classification and semantic segmentation by adding or removing dict keys there.
        rU   rV   rW   NrY   rX   d   i  r   c              3   2   K   | ]  }t        |        y wr   r   )r   ss     r>   r   z1YOLODataset.update_labels_info.<locals>.<genexpr>  s     3Q#a&3r   r   )r   axisrH   dtype)rY   rX   	instances)
r   r6   re   maxnpstackr   zerosfloat32r   )	r:   r   rU   rV   rW   rY   rX   segment_resamplesmax_lens	            r>   update_labels_infozYOLODataset.update_labels_info   s     8$99Z,IIk40	ii.YY|,
 $(<<CTx=13(33G1BW1L1Rcxx 1(>O PWXYHxx$5q 9LH&vxP[hrskr?   batchc                    i }| D cg c]$  }t        t        |j                                     & } }| d   j                         }t	        t        | D cg c]  }t	        |j                                c}       }t        |      D ]~  \  }}||   }|dv rt        j                  |d      }n:|dk(  r5t        j                  j                  j                  j                  |d      }|dv rt        j                  |d      }|||<    t	        |d         |d<   t        t!        |d               D ]  }|d   |xx   |z  cc<    t        j                  |d   d      |d<   |S c c}w c c}w )
        Collate data samples into batches.

        Args:
            batch (List[dict]): List of dictionaries containing sample data.

        Returns:
            (dict): Collated batch with stacked tensors.
        r   >   img
text_featsvisualsT)batch_first>   rT   r2   masksrU   rV   rW   r   )dictsorteditemskeyslistrj   values	enumeratetorchr   nnutilsrnnpad_sequencecatrangere   )r   	new_batchbr   r   ir   values           r>   
collate_fnzYOLODataset.collate_fn  sV    	278QfQWWY'(88Qx}}ce<D,<=>dO 	!DAq1IE))E1-i**7747PNN		%+ IaL	! "&i&<!=	+s9[123 	+Ak"1%*%	+!&9[+A1!E	+! 9<s   )E" E'r   )__name__
__module____qualname____doc__r
   r   strr8   r   r   r	   r   r   r   r   r   staticmethodr   __classcell__r=   s   @r>   r*   r*   /   s    2 6:x JHTN J J" )--=(> B B4 BH0DJ 0dHTN g @5 5 5  B $t*   r?   r*   c            	            e Zd ZdZddddee   def fdZded	ef fd
Zddee   d	e	f fdZ
ed        Zed        Zeddeded	ee   fd       Z xZS )YOLOMultiModalDataseta  
    Dataset class for loading object detection and/or segmentation labels in YOLO format with multi-modal support.

    This class extends YOLODataset to add text information for multi-modal model training, enabling models to
    process both image and text data.

    Methods:
        update_labels_info: Add text information for multi-modal model training.
        build_transforms: Enhance data transformations with text augmentation.

    Examples:
        >>> dataset = YOLOMultiModalDataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
        >>> batch = next(iter(dataset))
        >>> print(batch.keys())  # Should include 'texts'
    Nr+   r,   r-   r.   c                *    t        |   |||d| y)ax  
        Initialize a YOLOMultiModalDataset.

        Args:
            data (dict, optional): Dataset configuration dictionary.
            task (str): Task type, one of 'detect', 'segment', 'pose', or 'obb'.
            *args (Any): Additional positional arguments for the parent class.
            **kwargs (Any): Additional keyword arguments for the parent class.
        r,   Nr7   r8   r9   s        r>   r8   zYOLOMultiModalDataset.__init__N  s     	$T??r?   r   rB   c                     t         |   |      }| j                  d   j                         D cg c]  \  }}|j	                  d       c}}|d<   |S c c}}w )a  
        Add text information for multi-modal model training.

        Args:
            label (dict): Label dictionary containing bboxes, segments, keypoints, etc.

        Returns:
            (dict): Updated label dictionary with instances and texts.
        rJ   /texts)r7   r   r-   r   split)r:   r   rD   _vr=   s        r>   r   z(YOLOMultiModalDataset.update_labels_infoZ  sS     +E2 59IIg4F4L4L4NODAq1773<Ow Ps   Ar   c                     t         |   |      }| j                  rPt        t	        | j
                  d   d      d| j                  | j                              }|j                  d|       |S )a!  
        Enhance data transformations with optional text augmentation for multi-modal training.

        Args:
            hyp (dict, optional): Hyperparameters for transforms.

        Returns:
            (Compose): Composed transforms including text augmentation if applicable.
        rw   P   Tmax_samplespaddingpadding_valuer   )	r7   r   r   r   minr-   _get_neg_textscategory_freqinsertr:   r   r   	transformr=   s       r>   r   z&YOLOMultiModalDataset.build_transformsk  sh     W-c2
<<
 '		$4"11$2D2DEI
 b),r?   c                     | j                   d   j                         }|D ch c](  }|j                  d      D ]  }|j                          * c}}S c c}}w )zw
        Return category names for the dataset.

        Returns:
            (Set[str]): List of class names.
        rJ   r   )r-   r   r   strip)r:   rJ   namer   s       r>   category_namesz$YOLOMultiModalDataset.category_names  sI     		'"))+&+EdTZZ_E	E	EEEs   -Ac                 X   | j                   d   j                         D cg c]  }|j                  d       }}t        t              }| j
                  D ]M  }|d   j                  d      D ]4  }|t	        |         }|D ]  }|j                         }||xx   dz  cc<   ! 6 O |S c c}w )1Return frequency of each category in the dataset.rJ   r   rT   r   r   )r-   r   r   r   intrD   squeezer	  )r:   r   r   r  r   ctextts           r>   r  z#YOLOMultiModalDataset.category_freq  s     (,yy'9'@'@'BC!CC#C([[ 	*E5\))"- *SV} *A	A!!$)$**	*  Ds   B'r  	thresholdc                 `    | j                         D cg c]  \  }}||k\  s| c}}S c c}}w z7Get negative text samples based on frequency threshold.r   r  r  r   r   s       r>   r  z$YOLOMultiModalDataset._get_neg_texts  +     ,113FdaqI~FFF   **r   r   )r   r   r   r   r
   r   r   r8   r   r   r   propertyr  r  r   r  r	   r  r   r   s   @r>   r   r   =  s      6:x 
@HTN 
@ 
@  "HTN g 0 F F 
 
 Gd Gs GT#Y G Gr?   r   c            	           e Zd ZdZddddedef fdZded	efd
Zdeeee	f      d	dfdZ
 ed      fded	eee	f   fdZd	ee   fdZddee   d	ef fdZed        Zed        Zeddeded	ee   fd       Z xZS )GroundingDataseta'  
    Dataset class for object detection tasks using annotations from a JSON file in grounding format.

    This dataset is designed for grounding tasks where annotations are provided in a JSON file rather than
    the standard YOLO format text files.

    Attributes:
        json_file (str): Path to the JSON file containing annotations.

    Methods:
        get_img_files: Return empty list as image files are read in get_labels.
        get_labels: Load annotations from a JSON file and prepare them for training.
        build_transforms: Configure augmentations for training with optional text loading.

    Examples:
        >>> dataset = GroundingDataset(img_path="path/to/images", json_file="annotations.json", task="detect")
        >>> len(dataset)  # Number of valid images with annotations
    r+    )r.   	json_filer.   r  c                R    |dv sJ d       || _         t        |   ||ddid| y)a  
        Initialize a GroundingDataset for object detection.

        Args:
            json_file (str): Path to the JSON file containing annotations.
            task (str): Must be 'detect' or 'segment' for GroundingDataset.
            *args (Any): Additional positional arguments for the parent class.
            **kwargs (Any): Additional keyword arguments for the parent class.
        >   r+   r0   zEGroundingDataset currently only supports `detect` and `segment` tasksr3   rI   )r.   r-   N)r  r7   r8   )r:   r.   r  r;   r<   r=   s        r>   r8   zGroundingDataset.__init__  s;     ,,u.uu,"$TQJ6Jr?   img_pathrB   c                     g S )a  
        The image files would be read in `get_labels` function, return empty list here.

        Args:
            img_path (str): Path to the directory containing images.

        Returns:
            (list): Empty list as image files are read in get_labels.
         )r:   r!  s     r>   get_img_fileszGroundingDataset.get_img_files  s	     	r?   rD   Nc           	         ddddd}t        d |D              }|j                         D ]5  \  }}|| j                  v s||k(  sJ d| j                   d| d	| d
        y t        j                  d| j                   d       y)a  
        Verify the number of instances in the dataset matches expected counts.

        This method checks if the total number of bounding box instances in the provided
        labels matches the expected count for known datasets. It performs validation
        against a predefined set of datasets with known instance counts.

        Args:
            labels (List[Dict[str, Any]]): List of label dictionaries, where each dictionary
                contains dataset annotations. Each label dict must have a 'bboxes' key with
                a numpy array or tensor containing bounding box coordinates.

        Raises:
            AssertionError: If the actual instance count doesn't match the expected count
                for a recognized dataset.

        Note:
            For unrecognized datasets (those not in the predefined expected_counts),
            a warning is logged and verification is skipped.
        iL7 i+8 i	 i	 )final_mixed_train_no_coco_segmfinal_mixed_train_no_coco"final_flickr_separateGT_train_segmfinal_flickr_separateGT_trainc              3   @   K   | ]  }|d    j                   d     yw)rU   r   N)rS   )r   r   s     r>   r   z1GroundingDataset.verify_labels.<locals>.<genexpr>  s     J%U8_2215Js   'z' has z instances, expected .Nz?Skipping instance count verification for unrecognized dataset ')r   r   r  r   rq   )r:   rD   expected_countsinstance_count	data_namecounts         r>   verify_labelszGroundingDataset.verify_labels  s    , /6)028-3	
 J6JJ / 5 5 7 	IuDNN*%.w!DNN3C6.IYYnotnuuv0ww.	 	XY]YgYgXhhijkr?   r@   rA   c                 
   dg i}t        j                  d       t        | j                        5 }t	        j
                  |      }ddd       d   D ci c]
  }|d   d| }}t        t              }|d   D ]  }||d      j                  |        t        |j                         d	| j                   
      D ]X  \  }}	||d   }
|
d   |
d   |
d   }}}t        | j                        |z  }|j                         sF| j                  j                  t        |             g }g }i }g }|	D ]  }|d   r
t!        j"                  |d   t         j$                        }|ddxxx |dd dz  z  ccc |ddgxx   t'        |      z  cc<   |ddgxx   t'        |      z  cc<   |d   dk  s|d   dk  r|
d   }dj)                  |d   D cg c]  }||d   |d     c}      j+                         j-                         }|s||vr t/        |      ||<   |j                  |g       ||   }|g|j1                         z   }||vs|j                  |       |j3                  d      =t/        |d         dk(  r|j                  |       at/        |d         dkD  rkt5        |d         }t!        j6                  |d      t!        j"                  ||gt         j$                        z  j9                  d      j1                         }n|d   D cg c]  }|D ]  }|  }}}t!        j"                  |t         j$                        j9                  dd      t!        j"                  ||gt         j$                        z  j9                  d      j1                         }|g|z   }|j                  |        t/        |      r%t!        j"                  |t         j$                        n$t!        j:                  dt         j$                        }|rt!        j"                  |D cg c]  }|d   	 c}t         j$                        }|D cg c]:  }t!        j"                  |dd t         j$                        j9                  dd      < }}t!        j6                  |j9                  dd      t=        |      fd      }t!        j"                  |t         j$                        }d   j                  |||f|ddddf   |ddddf   |dd|d       [ t?        | j                        d<   tA        | jB                  ||tD               |S # 1 sw Y   xY wc c}w c c}w c c}}w c c}w c c}w ) a!  
        Load annotations from a JSON file, filter, and normalize bounding boxes for each image.

        Args:
            path (Path): Path where to save the cache file.

        Returns:
            (Dict[str, Any]): Dictionary containing cached labels and related information.
        rD   zLoading annotation file...Nimagesidr   annotationsimage_idzReading annotations )rO   heightwidth	file_nameiscrowdbboxr   rH   r   r   rI   captionrZ   tokens_positivesegmentationr   r   )r      TrQ   )rR   rS   rT   rU   rV   rX   rY   r   r_   )#r   ro   openr  jsonloadr   r   rm   r   r   r   r!  r   rf   r   r   arrayr   floatrp   lowerr	  re   tolistrg   r    concatenatereshaper   r   r#   r&   rb   rr   )r:   rA   rs   fr5  r3  img_to_annsannimg_idannsr   hwrR   rU   rV   cat2idr   boxr<  r  cat_namerT   r   r   jr|   classess                               r>   r   zGroundingDataset.cache_labels  s    rN01$..! 	'Q))A,K	'-8-BCQtWQK!#CC!$'}- 	5CJ(//4	5 !2!2!4=QRVR`R`Qa;bc D	LFDF1:'C(mS\3{3C!qA4==)A-G>>#MM  W.FHFE %+y>hhs6{"**=BQ3qr7Q;&QFuQx'QFuQx'q6Q;#a&A+i.883GXCY$ZaWQqTAaD%9$Z[aaciik6)'*6{F8$LL(,X&ecjjl*f$MM#&ww~.:s>23q8$OOC0$ ^!459 3C4G HA!#!:RXXq!fTVT^T^=_!_ h hik l s s uA,/,? Kq KA K KA K!#!2::!>!F!Fr1!MPRPXPXZ[]^Y_gigqgqPr!r!(!' 
 !EAI *K%+L 8;6{&

3QW_a_i_iHjB(((#;QAaD#;2::NV^_QRBHHQqrU"**=EEb!L__^^W__R%;^H=U$VXYZ"BJJ/BhK&Va1f: ABi ("&#)"	sD	J T^^,&	T16KL[	' 	'C4 %[& !L $<_s)   UUU!U&
'U,
?U1Uc                 @   t        | j                        j                  d      }	 t        |      d}}|d   t        k(  sJ |d   t        | j                        k(  sJ 	 dD cg c]  }|j                  |       c} |d   }| j                  |       |D cg c]  }t        |d          c}| _        t        d	v r%t!        j"                  d
| j                   d|        |S # t        t        t        t        f$ r | j                  |      d}}Y w xY wc c}w c c}w )z
        Load labels from cache or generate them from JSON file.

        Returns:
            (List[dict]): List of label dictionaries, each containing information about an image and its annotations.
        r   Tr   r_   F)r_   r   rD   rR   r   zLoad z from cache file )r   r  r   r%   rr   r#   r   r   r   ModuleNotFoundErrorr   r   r1  r   rf   r   r   ro   )r:   r   r   r   r   rD   r   s          r>   r   zGroundingDataset.get_labelsM  s    $..)55h?
	<.z:D1E#'<<<<=HT^^$<<<<  33!13x6"<BC5U9-.C KK%//@MN ">>CVW 	<((4e1E	<3 Ds   8C$ #DD$,DDr   c                     t         |   |      }| j                  r9t        dd| j	                  | j
                              }|j                  d|       |S )a  
        Configure augmentations for training with optional text loading.

        Args:
            hyp (dict, optional): Hyperparameters for transforms.

        Returns:
            (Compose): Composed transforms including text augmentation if applicable.
        r   Tr   r   )r7   r   r   r   r  r  r  r  s       r>   r   z!GroundingDataset.build_transformsc  sY     W-c2
<<
 '"11$2D2DEI
 b),r?   c           	          | j                   D ch c]#  }|d   D ]  }|D ]  }|j                           % c}}}S c c}}}w )z.Return unique category names from the dataset.r   )rD   r	  )r:   r   r  r  s       r>   r  zGroundingDataset.category_names{  sC     (,{{\\euW~\tW[\RS	\	\	\\\s   (>c                     t        t              }| j                  D ]0  }|d   D ]&  }|D ]  }|j                         }||xx   dz  cc<   ! ( 2 |S )r  r   r   )r   r  rD   r	  )r:   r  r   r  r  s        r>   r  zGroundingDataset.category_freq  sf     $C([[ 	*Eg * *A	A!!$)$**	*
 r?   r  r  c                 `    | j                         D cg c]  \  }}||k\  s| c}}S c c}}w r  r  r  s       r>   r  zGroundingDataset._get_neg_texts  r  r  r   r  )r   r   r   r   r   r8   r	   r$  r   r   r1  r   r   r   r
   r   r   r  r  r  r   r  r  r   r   s   @r>   r  r    s	   & +3R KC Ks K
c 
d 
!lDc3h$8 !lT !lF )--=(> Y Y4S> YvDJ ,HTN g 0 ] ]   Gd Gs GT#Y G Gr?   r  c                   @    e Zd ZdZedee   defd       ZdeddfdZy)YOLOConcatDataseta  
    Dataset as a concatenation of multiple datasets.

    This class is useful to assemble different existing datasets for YOLO training, ensuring they use the same
    collation function.

    Methods:
        collate_fn: Static method that collates data samples into batches using YOLODataset's collation function.

    Examples:
        >>> dataset1 = YOLODataset(...)
        >>> dataset2 = YOLODataset(...)
        >>> combined_dataset = YOLOConcatDataset([dataset1, dataset2])
    r   rB   c                 ,    t         j                  |       S )r   )r*   r   )r   s    r>   r   zYOLOConcatDataset.collate_fn  s     %%e,,r?   r   Nc                 b    | j                   D ]   }t        |d      s|j                  |       " y)z
        Set mosaic, copy_paste and mixup options to 0.0 and build transformations.

        Args:
            hyp (dict): Hyperparameters for transforms.
        r   N)datasetshasattrr   )r:   r   datasets      r>   r   zYOLOConcatDataset.close_mosaic  s2     }} 	&G7N3  %	&r?   )	r   r   r   r   r   r	   r   r   r   r#  r?   r>   r\  r\    s@     
-$t* 
- 
- 
-
& 
& 
&r?   r\  c                   "     e Zd ZdZ fdZ xZS )SemanticDatasetzSemantic Segmentation Dataset.c                 "    t         |           y)z$Initialize a SemanticDataset object.Nr   )r:   r=   s    r>   r8   zSemanticDataset.__init__  s    r?   )r   r   r   r   r8   r   r   s   @r>   rc  rc    s    ( r?   rc  c                   T    e Zd ZdZddededefdZdedefdZ	defd	Z
dee   fd
Zy)ClassificationDataseta  
    Dataset class for image classification tasks extending torchvision ImageFolder functionality.

    This class offers functionalities like image augmentation, caching, and verification. It's designed to efficiently
    handle large datasets for training deep learning models, with optional image transformations and caching mechanisms
    to speed up training.

    Attributes:
        cache_ram (bool): Indicates if caching in RAM is enabled.
        cache_disk (bool): Indicates if caching on disk is enabled.
        samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
                        file (if caching on disk), and optionally the loaded image array (if caching in RAM).
        torch_transforms (callable): PyTorch transforms to be applied to the images.
        root (str): Root directory of the dataset.
        prefix (str): Prefix for logging and cache filenames.

    Methods:
        __getitem__: Return subset of data and targets corresponding to given indices.
        __len__: Return the total number of samples in the dataset.
        verify_images: Verify all images in dataset.
    rootr   rb   c                    ddl }t        r#|j                  j                  |d      | _        n!|j                  j                  |      | _        | j                  j
                  | _        | j                  j                  | _        |rL|j                  dk  r=| j
                  dt        t        | j
                        |j                  z         | _        |rt        | d      nd| _        |j                  du xs& t        |j                        j                         d	k(  | _        | j                  rt!        j"                  d
       d| _        t        |j                        j                         dk(  | _        | j'                         | _        | j
                  D cg c]-  }t)        |      t+        |d         j-                  d      dgz   / c}| _        d|j.                  z
  df}|rjt1        |j2                  ||j4                  |j6                  |j8                  |j:                  |j<                  |j>                  |j@                  	      | _"        ytC        |j2                        | _"        yc c}w )ab  
        Initialize YOLO classification dataset with root directory, arguments, augmentations, and cache settings.

        Args:
            root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
            args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
                parameters, and cache settings.
            augment (bool, optional): Whether to apply augmentations to the dataset.
            prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification.
        r   NT)rg  allow_empty)rg  g      ?z: r  ramzClassification `cache_ram` training has known memory leak in https://github.com/ultralytics/ultralytics/issues/9824, setting `cache_ram=False`.Fdiskz.npy)	sizescalehflipvfliperasingauto_augmenthsv_hhsv_shsv_v)rl  )#torchvisionr   r_  ImageFolderbasesamplesrg  fractionroundre   r   rb   r   r   rE  	cache_ramr   rq   
cache_diskverify_imagesr   r   r   rm  r   r   fliplrflipudrp  rq  rr  rs  rt  r   torch_transforms)r:   rg  r;   r   rb   ru  rs   rm  s           r>   r8   zClassificationDataset.__init__  s    	 #,,88dPT8UDI#,,88d8CDIyy((IINN	 t}}s*<<(R%DLL0ADMM0Q*RSDL17h&}-Rt+Os4::/D/D/F%/O>>NNe #DNdjj///1V;))+RVR^R^_QQ4!:#9#9&#A4"HH_tzz!3'  #ZZkkkk!..jjjjjj
 	 %$**5 	 `s   2I.r   rB   c                 0   | j                   |   \  }}}}| j                  r*|t        j                  |      x}| j                   |   d<   n| j                  r_|j                         s9t        j                  |j                         t        j                  |      d       t        j                  |      }nt        j                  |      }t        j                  t        j                  |t        j                              }| j                  |      }||dS )z
        Return subset of data and targets corresponding to given indices.

        Args:
            i (int): Index of the sample to retrieve.

        Returns:
            (dict): Dictionary containing the image and its class index.
        rI   F)allow_pickle)r   rT   )rx  r{  cv2imreadr|  r   r   saveas_posixrB  r   	fromarraycvtColorCOLOR_BGR2RGBr  )r:   r   rI  rS  fnimsamples          r>   __getitem__z!ClassificationDataset.__getitem__  s     ||A1b">>z*-**Q-7T\\!_Q'__99;szz!}5IBAB__S\\"c.?.?@A&&r*a((r?   c                 ,    t        | j                        S )z2Return the total number of samples in the dataset.)re   rx  )r:   s    r>   __len__zClassificationDataset.__len__(  s    4<<  r?   c                    | j                    d| j                   d}t        | j                        j                  d      }	 t	        | j
                  dd D cg c]  \  }}|	 c}}| j                          t        |      }|d   t        k(  sJ |d   t        | j
                  D cg c]  }|d	   	 c}      k(  sJ |j                  d
      \  }}}	}
t        dv rF| d| d| d}t        d||	|	       |d   r't        j                  dj                  |d                |
S c c}}w c c}w # t        t         t"        f$ rz d	d	g g i f\  }}}}
}t%        t&              5 }|j)                  t*        t-        | j
                  t/        | j                                     }t        ||t1        | j
                              }|D ]G  \  }}}}|r|
j3                  |       |r|j3                  |       ||z  }||z  }| d| d| d|_        I |j7                          ddd       n# 1 sw Y   nxY w|r$t        j                  dj                  |             t        | j
                  D cg c]  }|d	   	 nc c}w c}      d<   ||t1        |
      |
f|d
<   ||d<   t9        | j                   ||t               |
cY S w xY w)z
        Verify all images in dataset.

        Returns:
            (list): List of valid samples after verification.
        rE   rF   r   Nr?  )rb   r   r_   r   r`   r   rZ   r[   r]   r   ra   r^   rK   rN   )rb   rg  r   r   r"   rx  r%   rr   r#   r   r   r   r   ro   rp   r   r   r   r   r   ri   r'   rj   r   re   rm   rO   rn   r&   )r:   rO   rA   filer   r   rs   ru   rw   r   rx  r   ra   rz   r`   r{   r  r   r   r   s                       r>   r}  z#ClassificationDataset.verify_images,  s    ++i		{#6DII**84"	T\\"15EF	qtFt{{[+D1E#'<<<<=HDLL-Iqad-I$JJJJ!&9!5BAwW$fAbT2$h7TA6=KK		%- 89N G .J ">>B 	'(!RR'7$BD'1K( D))DLLRXY]YdYdRe@f)gG$c$,,6GH/3 D+FD$v.C($JB$JB#'&"Yrd( CDID 

   DIIdO, !=1!A$!=!=>AfIr3w<8AiLAfI#DKKq:OPN+	sY   D1 D&
%AD1 )D,
5A0D1 &D1 11K "B<H'	K 'H0	,A K ,I9
8AK ?K N)Fr  )r   r   r   r   r   boolr8   r  r   r  r  r	   r   r}  r#  r?   r>   rf  rf    sQ    ,2
S 2
 2
s 2
h)S )T )2! !,tE{ ,r?   rf  )@rA  collectionsr   	itertoolsr   multiprocessing.poolr   pathlibr   typingr   r   r	   r
   r   r  numpyr   r   PILr   torch.utils.datar   ultralytics.utilsr   r   r   r   r   ultralytics.utils.instancer   ultralytics.utils.opsr   r   ultralytics.utils.torch_utilsr   r   r   r   r   r   r   r   r   rw  r   	converterr    r   r!   r"   r#   r$   r%   r&   r'   r(   rr   r*   r   r  r\  rc  rf  r#  r?   r>   <module>r     s     #  +  3 3 
    * M M 0 C :    *	 	 	   K+ K\aGK aGHmG{ mG`'& '&Vk T Tr?   