
    ujhf                         d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ dddZh d	Zd
ee   dedee   fdZ G d de
      Zy)    N)Path)IterableListTupleUnion)Dataset)download_url_to_file@209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4@408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027)Bhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7bJhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols>8   .DOT+PLUS-DASH"QUOTE'QUOTE(PAREN)PAREN,COMMA--DASH.POINT/SLASH:COLON{BRACE(PARENS)PARENS-HYPHEN.PERIOD"UNQUOTE%PERCENT.DECIMAL
"END-QUOTE
"IN-QUOTES
#HASH-MARK
&AMPERSAND
'END-QUOTE
)END-PAREN
.FULL-STOP#POUND-SIGN#SHARP-SIGN(LEFT-PAREN)END-PARENS...ELLIPSIS;SEMI-COLON{LEFT-BRACE{OPEN-BRACE"CLOSE-QUOTE'INNER-QUOTE(PARENTHESES)CLOSE-PAREN)RIGHT-PAREN}CLOSE-BRACE}RIGHT-BRACE"DOUBLE-QUOTE"END-OF-QUOTE'SINGLE-QUOTE(BEGIN-PARENS)END-THE-PAREN;SEMI-COLON(1)?QUESTION-MARK(IN-PARENTHESES)UN-PARENTHESES'END-INNER-QUOTE)END-PARENTHESES(OPEN-PARENTHESES!EXCLAMATION-POINT)CLOSE-PARENTHESESlinesexclude_punctuationsreturnc                    t        j                  d      }g }| D ]  }|r|j                  d      r|j                         j	                  d      \  }}|t
        v r0|rD|j                  d      rd}n|j                  d      rd}n|d   }t        j                  |d|      }|j	                  d      }|j                  ||f        |S )	Nz
\([0-9]+\)z;;;z  z...z--r     )recompile
startswithstripsplit_PUNCTUATIONSsubappend)rF   rG   _alt_recmudictlinewordphoness          Y/var/www/html/dev/engine/venv/lib/python3.12/site-packages/torchaudio/datasets/cmudict.py_parse_dictionaryrZ   J   s    jj'G+-G 'tu-zz|))$/f= # u%&Aw
 vvgr4(c"f~&/'2 N    c                       e Zd ZdZ	 ddddddeeef   deded	ed
eddfdZde	de
eee   f   fdZde	fdZedee   fd       Zy)CMUDictaZ  *CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.
        exclude_punctuations (bool, optional):
            When enabled, exclude the pronounciation of punctuations, such as
            `!EXCLAMATION-POINT` and `#HASH-MARK`.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional):
            The URL to download the dictionary from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
        url_symbols (str, optional):
            The URL to download the list of symbols from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
    Fr   r   )downloadurlurl_symbolsrootrG   r^   r_   r`   rH   Nc                   || _         t        |      | _        t        j                  j                  | j                        st        d|       | j                  t        j                  j                  |      z  }| j                  t        j                  j                  |      z  }t        j                  j                  |      s3|st        d|       t        j                  |d       }t        |||       t        j                  j                  |      s3|st        d|       t        j                  |d       }t        |||       t        |d      5 }	|	j                         D 
cg c]  }
|
j                          c}
| _        d d d        t        |dd      5 }	t!        |	j                         | j                         | _        d d d        y c c}
w # 1 sw Y   PxY w# 1 sw Y   y xY w)Nz#The root directory does not exist; z`The dictionary file is not found in the following location. Set `download=True` to download it. z\The symbol file is not found in the following location. Set `download=True` to download it. rzlatin-1)encoding)rG   )rG   r   
_root_pathospathisdirRuntimeErrorbasenameexists
_CHECKSUMSgetr	   open	readlinesrO   _symbolsrZ   _dictionary)selfra   rG   r^   r_   r`   	dict_filesymbol_filechecksumtextrV   s              rY   __init__zCMUDict.__init__{   s    %9!t*ww}}T__-!DTFKLLOObgg&6&6s&;;	oo(8(8(EEww~~i(";;D+G  "~~c40H i:ww~~k*";;F-I  "~~k48H k8D+s# 	Ht6:nn6FGdTZZ\GDM	H )S95 	s01AX\XqXqrD	s 	s H	H 	H	s 	ss*   G*GG+G$GG!$G-nc                      | j                   |   S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded.

        Returns:
            Tuple of a word and its phonemes

            str:
                Word
            List[str]:
                Phonemes
        )rq   )rr   rx   s     rY   __getitem__zCMUDict.__getitem__   s     ""r[   c                 ,    t        | j                        S )N)lenrq   rr   s    rY   __len__zCMUDict.__len__   s    4##$$r[   c                 6    | j                   j                         S )zLlist[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``.)rp   copyr}   s    rY   symbolszCMUDict.symbols   s     }}!!##r[   )T)__name__
__module____qualname____doc__r   strr   boolrw   intr   r   rz   r~   propertyr    r[   rY   r]   r]   i   s    ( &*'s
 Wg'sCI's #'s
 's 's 's 
'sR#S #U3S	>%: # % % $c $ $r[   r]   )rf   rL   pathlibr   typingr   r   r   r   torch.utils.datar   torchaudio._internalr	   rl   rQ   r   r   rZ   r]   r   r[   rY   <module>r      sh    	 	  / / $ 5 KM SU
9xXc] $ 4PS9 >Q$g Q$r[   