
    |h              	           d dl Z d dlZd dlmZ d dlmZmZ d dlmZm	Z	 d dl
mZmZmZ ddeeef   dedefd	Zed
z  ddfdedeeeef   deddfdZedk(  r	 ed       yy)    N)Path)TupleUnion)IMG_FORMATSimg2label_paths)DATASETS_DIRLOGGERTQDM
source_dirtrain_ratioreturnc                    t        |       }t        | d      }|dz  |dz  }}|j                  d       |j                  d       |j                  d       |j                         D cg c]  }|j                         s| }}t	        d |D              }t        |       d| d}	t        j                  d	| d
|	 d|ddd|z
  dd	       |D ]  }
||
j                  z  j                  d       ||
j                  z  j                  d       t        |
j                  d            }t        j                  |       t        t        |      |z        }|d| D ]2  }t        j                  |||
j                  z  |j                  z         4 ||d D ]2  }t        j                  |||
j                  z  |j                  z         4  t        j                  d| d       |S c c}w )u.  
    Split classification dataset into train and val directories in a new directory.

    Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class
    structure with an 80/20 split by default.

    Directory structure:
        Before:
            caltech/
            ├── class1/
            │   ├── img1.jpg
            │   ├── img2.jpg
            │   └── ...
            ├── class2/
            │   ├── img1.jpg
            │   └── ...
            └── ...

        After:
            caltech_split/
            ├── train/
            │   ├── class1/
            │   │   ├── img1.jpg
            │   │   └── ...
            │   ├── class2/
            │   │   ├── img1.jpg
            │   │   └── ...
            │   └── ...
            └── val/
                ├── class1/
                │   ├── img2.jpg
                │   └── ...
                ├── class2/
                │   └── ...
                └── ...

    Args:
        source_dir (str | Path): Path to classification dataset root directory.
        train_ratio (float): Ratio for train split, between 0 and 1.

    Returns:
        (Path): Path to the created split directory.

    Examples:
        Split dataset with default 80/20 ratio
        >>> split_classify_dataset("path/to/caltech")

        Split with custom ratio
        >>> split_classify_dataset("path/to/caltech", 0.75)
    _splittrainvalT)exist_okc              3   b   K   | ]'  }t        t        |j                  d                    ) yw)*.*N)lenlistglob).0ds     U/var/www/html/test/engine/venv/lib/python3.12/site-packages/ultralytics/data/split.py	<genexpr>z)split_classify_dataset.<locals>.<genexpr>J   s"     DAs4u./Ds   -/z
 classes, z imagesz
Splitting z (z) into z.0%z train,    z val...r   NzSplit complete in u    ✅)r   mkdiriterdiris_dirsumr   r	   infonamer   r   randomshuffleintshutilcopy2)r   r   source_path
split_path
train_pathval_pathr   
class_dirstotal_imagesstats	class_dirimage_files	split_idximgs                 r   split_classify_datasetr3      s   f z"KV,-J%/e1CJ d#d#NNDN! )002Aahhj!AJADDDL:z,w?E
KK*[ME7'+c9J(STWbSbcfRggnop D		inn	$++T+:	INN	"))4)8 9>>%01{#K(;67	z	* 	FCLLj9>>9CHHDE	F yz* 	DCLLh7#((BC	DD  KK$ZL56- Bs   ,G,G,zcoco8/images)g?g?g        Fpathweightsannotated_onlyc                    t        |       } t        d | j                  d      D              }t        |      }t	        j
                  d       t	        j                  g d||      }g d}|D ]=  }| j                  |z  j                         s!| j                  |z  j                          ? t        j                  d|  d|z  z          t        t        ||      |	      D ]  \  }}	|r0t        t        t        |	      g      d         j                         s8t!        | j                  ||   z  d
d      5 }
|
j#                  d|	j%                  | j                        j'                          dz          ddd        y# 1 sw Y   xY w)az  
    Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.

    Args:
        path (Path): Path to images directory.
        weights (tuple): Train, validation, and test split fractions.
        annotated_only (bool): If True, only images with an associated txt file are used.

    Examples:
        Split images with default weights
        >>> from ultralytics.data.split import autosplit
        >>> autosplit()

        Split with custom weights and annotated images only
        >>> autosplit(path="path/to/images", weights=(0.8, 0.15, 0.05), annotated_only=True)
    c              3   h   K   | ]*  }|j                   d d j                         t        v s'| , yw)r   N)suffixlowerr   )r   xs     r   r   zautosplit.<locals>.<genexpr>x   s*     W188AB<3E3E3G;3V1Ws   (22r   r   )r   r      )r5   k)zautosplit_train.txtzautosplit_val.txtzautosplit_test.txtzAutosplitting images from z!, using *.txt labeled images only)totalazutf-8)encodingz./
N)r   sortedrglobr   r#   seedchoicesparentexistsunlinkr	   r!   r
   zipr   stropenwriterelative_toas_posix)r4   r5   r6   filesnindicestxtr;   ir2   fs              r   	autosplitrU   b   sY   * :DWdjj/WWEE
A
KKNnnY1=G
LC 'KK!O##%[[1_$$&' KK,TF36Y\j6jjks7E*!4 O3os3xj&A!&D!E!L!L!NdkkCF*C'B Oa"S__T[[9BBDEFMNO OOO Os   8?FF	__main__
caltech101)g?)r#   r&   pathlibr   typingr   r   ultralytics.data.utilsr   r   ultralytics.utilsr   r	   r
   rJ   floatr3   boolrU   __name__     r   <module>ra      s        ? 8 8SuS$Y'7 Se SVZ Sn .*9 $O
$O5%&'$O $O 
	$ON z<( r`   