
    ujhC                         d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ dZddiZeeeeeef   Z G d	 d
e      Zy)    N)Tuple)Tensor)Dataset)download_url_to_file)_extract_zipzNhttps://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip@f96258be9fdc2cbff6559541aae7ea4f59df3fcaf5cf963aae5ca647357e359cc            	           e Zd ZdZddedfdedededefd	Zd
efdZd
e	e
ef   fdZdededed
efdZded
efdZd
efdZy)VCTK_092a:  *VCTK 0.92* :cite:`yamagishi2019vctk` dataset

    Args:
        root (str): Root directory where the dataset's top level directory is found.
        mic_id (str, optional): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional): The URL to download the dataset from.
            (default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
        audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.

    Note:
        * All the speeches from speaker ``p315`` will be skipped due to the lack of the corresponding text files.
        * All the speeches from ``p280`` will be skipped for ``mic_id="mic2"`` due to the lack of the audio files.
        * Some of the speeches from speaker ``p362`` will be skipped due to the lack of  the audio files.
        * See Also: https://datashare.is.ed.ac.uk/handle/10283/3443
    mic2Fz.flacrootmic_iddownloadurlc           
         |dvrt        d|       t        j                  j                  |d      }t        j                  j                  |d      | _        t        j                  j                  | j                  d      | _        t        j                  j                  | j                  d      | _        || _        || _        |rt        j                  j                  | j                        sYt        j                  j                  |      s$t        j                  |d       }t        |||       t        || j                         t        j                  j                  | j                        st        d      t        t        j                   | j
                              | _        g | _        	 | j"                  D ]  }|d	k(  r|d
k(  rt        j                  j                  | j
                  |      }	t        d t        j                   |	      D              D ]  }
t        j                  j'                  |
      d   }t        j                  j                  | j                  || d| | j                         }|dk(  r t        j                  j                  |      s| j$                  j)                  |j+                  d               y )N)mic1r   z3`mic_id` has to be either "mic1" or "mic2". Found: zVCTK-Corpus-0.92.zipzVCTK-Corpus-0.92txtwav48_silence_trimmed)hash_prefixz=Dataset not found. Please use `download=True` to download it.p280r   c              3   D   K   | ]  }|j                  d       s|  yw).txtN)endswith).0fs     V/var/www/html/dev/engine/venv/lib/python3.12/site-packages/torchaudio/datasets/vctk.py	<genexpr>z$VCTK_092.__init__.<locals>.<genexpr>U   s     (dqQRQ[Q[\bQc(ds     r   _p362)RuntimeErrorospathjoin_path_txt_dir
_audio_dir_mic_id
_audio_extisdirisfile
_CHECKSUMSgetr   r   sortedlistdir_speaker_ids_sample_idssplitextappendsplit)selfr   r   r   r   	audio_extarchivechecksum
speaker_idutterance_dirutterance_fileutterance_idaudio_path_mics                r   __init__zVCTK_092.__init__&   s    ))!TU[T\]^^'',,t%;<WW\\$(:;
TZZ7'',,tzz3JK#77==,ww~~g.)~~c48H(g8LWdjj1ww}}TZZ(^__ #2::dmm#<=		 ++ 	AJV#&(8GGLL
CM"((dBJJ}4M(d"d 	A!ww//?B!#OO#nAfXdoo->?"
 '~0N  ''(:(:3(?@	A		A    returnc                 j    t        |      5 }|j                         d   cd d d        S # 1 sw Y   y xY w)Nr   )open	readlinesr3   	file_paths     r   
_load_textzVCTK_092._load_text`   s1    )_ 	,	&&(+	, 	, 	,s   )2c                 ,    t        j                  |      S N)
torchaudioloadrB   s     r   _load_audiozVCTK_092._load_audiod   s    y))r=   r7   r:   c           
      :   t         j                  j                  | j                  || d| d      }t         j                  j                  | j                  || d| d| | j
                         }| j                  |      }| j                  |      \  }}|||||fS )Nr   r   )r    r!   r"   r$   r%   r'   rD   rI   )	r3   r7   r:   r   transcript_path
audio_path
transcriptwaveformsample_rates	            r   _load_samplezVCTK_092._load_sampleg   s    '',,t}}jZLPQR^Q__cBdeWW\\OOl!L>6(4??2CD

 ___5
 !% 0 0 <++z:|LLr=   nc                 `    | j                   |   \  }}| j                  ||| j                        S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                Transcript
            str:
                Speaker ID
            std:
                Utterance ID
        )r/   rP   r&   )r3   rQ   r7   r:   s       r   __getitem__zVCTK_092.__getitem__w   s2    ( $(#3#3A#6 
L  \4<<HHr=   c                 ,    t        | j                        S rF   )lenr/   )r3   s    r   __len__zVCTK_092.__len__   s    4##$$r=   N)__name__
__module____qualname____doc__URLstrboolr<   rD   r   r   intrI   
SampleTyperP   rS   rV    r=   r   r
   r
      s    * 8A8A 8A 	8A
 8At,s ,*fck(: *Ms M# Ms Mz M IS IZ I.% %r=   r
   )r    typingr   rG   torchr   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr   r[   r*   r^   r\   r_   r
   r`   r=   r   <module>rf      sU    	    $ 5 2VT  WY

 63S#-.
|%w |%r=   