
    ujhW                         d dl mZ d dlmZmZmZ d dlZd dlmZ d dlm	Z	 g Z
e		 	 	 ddededee   d	ee   d
edeeef   fd       Ze G d d             Zddeded
edee   fdZy)    )	dataclass)ListOptionalTupleN)Tensor)fail_if_no_align	log_probstargetsinput_lengthstarget_lengthsblankreturnc                 X   ||v rt        d| d      t        j                  |      | j                  d   k\  rt        d      |T| j	                  d      | j	                  d      }}t        j
                  |f|t        j                  | j                        }|T|j	                  d      |j	                  d      }}t        j
                  |f|t        j                  |j                        }|J |J t        j                  j                  j                  | ||||      \  }}||fS )a  Align a CTC label sequence to an emission.

    .. devices:: CPU CUDA

    .. properties:: TorchScript

    Args:
        log_probs (Tensor): log probability of CTC emission output.
            Tensor of shape `(B, T, C)`. where `B` is the batch size, `T` is the input length,
            `C` is the number of characters in alphabet including blank.
        targets (Tensor): Target sequence. Tensor of shape `(B, L)`,
            where `L` is the target length.
        input_lengths (Tensor or None, optional):
            Lengths of the inputs (max value must each be <= `T`). 1-D Tensor of shape `(B,)`.
        target_lengths (Tensor or None, optional):
            Lengths of the targets. 1-D Tensor of shape `(B,)`.
        blank_id (int, optional): The index of blank symbol in CTC emission. (Default: 0)

    Returns:
        Tuple(Tensor, Tensor):
            Tensor: Label for each time step in the alignment path computed using forced alignment.

            Tensor: Log probability scores of the labels for each time step.

    Note:
        The sequence length of `log_probs` must satisfy:


        .. math::
            L_{\text{log\_probs}} \ge L_{\text{label}} + N_{\text{repeat}}

        where :math:`N_{\text{repeat}}` is the number of consecutively repeated tokens.
        For example, in str `"aabbc"`, the number of repeats are `2`.

    Note:
        The current version only supports ``batch_size==1``.
    z4targets Tensor shouldn't contain blank index. Found .z2targets values must be less than the CTC dimensionr      )dtypedevice)
ValueErrortorchmaxshapesizefullint64r   ops
torchaudioforced_align)	r	   r
   r   r   r   
batch_sizelengthpathsscoress	            ^/var/www/html/dev/engine/venv/lib/python3.12/site-packages/torchaudio/functional/_alignment.pyr   r      s   Z OPWyXYZ[[yyY__R00MNN&^^A.	q0AF


J=&T]TdTde$\\!_gll1oF
ZM6U\UcUcd $$$%%%II((55i-YginoME6&=    c                   N    e Zd ZU dZeed<   	 eed<   	 eed<   	 eed<   	 defdZy)		TokenSpanz[TokenSpan()
    Token with time stamps and score. Returned by :py:func:`merge_tokens`.
    tokenstartendscorer   c                 4    | j                   | j                  z
  S )zReturns the time span)r)   r(   )selfs    r#   __len__zTokenSpan.__len__[   s    xx$**$$r$   N)__name__
__module____qualname____doc__int__annotations__floatr-    r$   r#   r&   r&   L   s5     JJ;	H9L&% %r$   r&   tokensr"   c                    | j                   dk7  s|j                   dk7  rt        d      t        |       t        |      k7  rt        d      t        j                  | t        j
                  dg| j                        t        j
                  dg| j                              }t        j                  |dk7        j                         j                         }| j                         } t        |dd |dd       D cg c]=  \  }}| |   x}|k7  r.t        |||||| j                         j                         	      ? }}}|S c c}}w )
a  Removes repeated tokens and blank tokens from the given CTC token sequence.

    Args:
        tokens (Tensor): Alignment tokens (unbatched) returned from :py:func:`forced_align`.
            Shape: `(time, )`.
        scores (Tensor): Alignment scores (unbatched) returned from :py:func:`forced_align`.
            Shape: `(time, )`. When computing the token-size score, the given score is averaged
            across the corresponding time span.

    Returns:
        list of TokenSpan

    Example:
        >>> aligned_tokens, scores = forced_align(emission, targets, input_lengths, target_lengths)
        >>> token_spans = merge_tokens(aligned_tokens[0], scores[0])
    r   z(`tokens` and `scores` must be 1D Tensor.z.`tokens` and `scores` must be the same length.r   )r   )prependappendr   N)r'   r(   r)   r*   )ndimr   lenr   difftensorr   nonzerosqueezetolistzipr&   meanitem)	r6   r"   r   r<   changes_wo_blankr(   r)   r'   spanss	            r#   merge_tokensrF   `   s,   " {{a6;;!+CDD
6{c&k!IJJ::bT&--@WYVZcicpcpIqD }}dai199;BBD]]_F .s35Eab5IJE3E]"Eu, 	U6%;L;Q;Q;S;X;X;Z[E 
 Ls   =AE)NNr   )r   )dataclassesr   typingr   r   r   r   r   torchaudio._extensionr   __all__r2   r   r&   rF   r5   r$   r#   <module>rK      s    ! ( (   2
  '+'+=== F#= V$	=
 = 66>= =@ % % %&       DO  r$   