
    ujhg                         d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ dZdZd	Zg d
Z G d de
      ZdededefdZy)    N)Path)OptionalTupleUnion)Dataset)download_url_to_file)_extract_tar_load_waveformz6https://speech.fit.vutbr.cz/files/quesst14Database.tgzi@  @4f869e06bc066bbe9c5dde31dbd3909a0870d70291110ebbb38878dcbc2fc5e4)albanianbasqueczech	nnenglishromanianslovakc                       e Zd ZdZ	 	 ddeeef   dedee   deddf
dZ	d	e
deee
ef   fd
Zd	e
deej                  e
ef   fdZde
fdZy)QUESST14a  *QUESST14* :cite:`Mir2015QUESST2014EQ` dataset.

    Args:
        root (str or Path): Root directory where the dataset's top level directory is found
        subset (str): Subset of the dataset to use. Options: [``"docs"``, ``"dev"``, ``"eval"``].
        language (str or None, optional): Language to get dataset for.
            Options: [``None``, ``albanian``, ``basque``, ``czech``, ``nnenglish``, ``romanian``, ``slovak``].
            If ``None``, dataset consists of all languages. (default: ``"nnenglish"``)
        download (bool, optional): Whether to download the dataset if it is not found at root path.
            (default: ``False``)
    rootsubsetlanguagedownloadreturnNc                 J   |dvrt        d      |#|t        vrt        dt        t                     t        j                  |      }t        j
                  j                  t              }t        j
                  j                  ||      }|j                  dd      d   }t        j
                  j                  ||      | _
        t        j
                  j                  | j                        sNt        j
                  j                  |      s#|st        d      t        t        |t               t!        ||       |d	k(  rt#        | j                  |d
      | _        y |dk(  rt#        | j                  |d      | _        y |dk(  rt#        | j                  |d      | _        y y )N)docsdevevalz/`subset` must be one of ['docs', 'dev', 'eval']z"`language` must be None or one of .   r   z9Dataset not found. Please use `download=True` to download)hash_prefixr   zlanguage_key_utterances.lstr   zlanguage_key_dev.lstr   zlanguage_key_eval.lst)
ValueError
_LANGUAGESstrosfspathpathbasenameURLjoinrsplit_pathisdirisfileRuntimeErrorr   	_CHECKSUMr	   filter_audio_pathsdata)selfr   r   r   r   r&   archives          Z/var/www/html/dev/engine/venv/lib/python3.12/site-packages/torchaudio/datasets/quesst14.py__init__zQUESST14.__init__&   sD    00NOOHJ$>A#j/ARSTT yy77##C('',,tX.??3*1-WW\\$1
ww}}TZZ(77>>'*&'bcc$S'yI$'V*4::xA^_DIu_*4::xAWXDIv*4::xAXYDI     nc                     | j                   |   }t        j                  j                  || j                        }|t
        |j                  d      j                  fS )a  Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
        but otherwise returns the same fields as :py:func:`__getitem__`.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            str:
                Path to audio
            int:
                Sample rate
            str:
                File name
         )r0   r#   r%   relpathr*   SAMPLE_RATEwith_suffixname)r1   r6   
audio_pathr9   s       r3   get_metadatazQUESST14.get_metadataJ   sG    " YYq\
''//*djj9Z%;%;B%?%D%DDDr5   c                 p    | j                  |      }t        | j                  |d   |d         }|f|dd z   S )a:  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                File name
        r      N)r>   r
   r*   )r1   r6   metadatawaveforms       r3   __getitem__zQUESST14.__getitem___   sA      $$Q'!$**hqk8A;G{Xab\))r5   c                 ,    t        | j                        S )N)lenr0   )r1   s    r3   __len__zQUESST14.__len__s   s    499~r5   )r   F)__name__
__module____qualname____doc__r   r"   r   r   boolr4   intr   r>   torchTensorrC   rF    r5   r3   r   r      s    
  #."ZCI"Z "Z 3-	"Z
 "Z 
"ZHEc EeCcM&: E**S *U5<<c+A%B *( r5   r   r%   r   lst_namec                 &   g }t        |       } t        | dz  |z        5 }|D ]V  }|j                         j                         \  }}|||k7  r,t	        j
                  dd|      }|j                  | |z         X 	 ddd       |S # 1 sw Y   |S xY w)z+Extract audio paths for the given language.scoringNz^.*?\/r8   )r   openstripsplitresubappend)r%   r   rP   audio_pathsfliner=   langs           r3   r/   r/   w   s     K:D	dY)	* 2a 	2D#zz|113J#(8	2z:Jtj01	22 2 s   ABB)r#   rV   pathlibr   typingr   r   r   rM   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr	   r
   r'   r:   r.   r!   r   r"   r/   rO   r5   r3   <module>rb      sf    	 	  ) )  $ 5 B ?N	
[w [|
 r5   