
    |hi                         d dl mZ d dl d dlmZ d dlmZmZ d dlZ	d dl
Z
d dlZd dlZdZddeeee	j                  f   dee   fd	Zd
 Zd ZddZy)    )binary_dilation)*)Path)OptionalUnionNi  fpath_or_wav	source_src                    t        | t              st        | t              r$t        j                  t        |       d      \  }}n| }|t        j
                  ||t              }t        |t        d      }t        |      }|S )a  
    Applies preprocessing operations to a waveform either on disk or in memory such that  
    The waveform will be resampled to match the data hyperparameters.

    :param fpath_or_wav: either a filepath to an audio file (many extensions are supported, not 
    just .wav), either the waveform as a numpy array of floats.
    :param source_sr: if passing an audio waveform, the sampling rate of the waveform before 
    preprocessing. After preprocessing, the waveform'speaker sampling rate will match the data 
    hyperparameters. If passing a filepath, the sampling rate will be automatically detected and 
    this argument will be ignored.
    N)sr)orig_sr	target_srT)increase_only)

isinstancestrr   librosaloadresamplesampling_ratenormalize_volumeaudio_norm_target_dBFStrim_long_silences)r   r	   wavs      P/var/www/html/test/engine/venv/lib/python3.12/site-packages/resemblyzer/audio.pypreprocess_wavr      sq     ,$
<(F c,&7DAY sIO 3 6d
KC
S
!CJ    c           	         t         j                  j                  | t        t	        t        t
        z  dz        t	        t        t        z  dz        t              }|j                  t        j                        j                  S )z
    Derives a mel spectrogram ready to be used by the encoder from a preprocessed audio waveform.
    Note: this not a log-mel spectrogram.
      )yr   n_fft
hop_lengthn_mels)r   featuremelspectrogramr   intmel_window_lengthmel_window_stepmel_n_channelsastypenpfloat32T)r   framess     r   wav_to_mel_spectrogramr-   *   sf    
 __++
-"33d:;}6=> , F ==$&&&r   c           	         t         t        z  dz  }| dt        |       t        |       |z  z
   } t        j                  dt        |       z  gt        j                  | t        z        j                  t
        j                         }g }t        j                  d      }t        dt        |       |      D ]6  }||z   }|j                  |j                  ||dz  |dz   t                     8 t        j                  |      }d	 } ||t               }t        j                  |      j                  t"              }t%        |t        j&                  t(        d
z               }t        j*                  ||      }| |dk(     S )a+  
    Ensures that segments without voice in the waveform remain no longer than a 
    threshold determined by the VAD parameters in params.py.

    :param wav: the raw waveform as a numpy array of floats 
    :return: the same waveform with silences trimmed away (length <= original wav length)
    r   Nz%dh   )moder      )sample_ratec                     t        j                  t        j                  |dz
  dz        | t        j                  |dz        f      }t        j                  |t              }||d  |d |  z
  ||d  ||dz
  d  |z  S )N   r1   )dtype)r)   concatenatezeroscumsumfloat)arraywidtharray_paddedrets       r   moving_averagez*trim_long_silences.<locals>.moving_averageT   s|    ~~rxxq0@'A5"((SX\]S]J^&_`iiE2%&kC%L0EF519:&&r   r4   T)vad_window_lengthr   lenstructpackr)   round	int16_maxr(   int16	webrtcvadVadrangeappend	is_speechr:   vad_moving_average_widthboolr   onesvad_max_silence_lengthrepeat)	r   samples_per_windowpcm_wavevoice_flagsvadwindow_start
window_endr>   
audio_masks	            r   r   r   9   sc    ,m;D 9s3x3s8&889
:C {{53s8+[rxxi/H.P.PQSQYQY.Z[H K
--Q
CaS+=> E!$66
3==,2B:PQ>)R5B ) D 	EE ((;'K'  -EFJ*%,,T2J !RWW5Ka5O-PQJ:'9:JzT!""r   c                    |r|rt        d      t        j                  t        j                  | t        z  dz              }dt        j
                  |t        z        z  }||z
  }|dk  r|s|dkD  r|r| S | d|dz  z  z  S )Nz,Both increase only and decrease only are setr1      r   
   )
ValueErrorr)   sqrtmeanrD   log10)r   target_dBFSr   decrease_onlyrms	wave_dBFSdBFS_changes          r   r   r   d   s    GHH
''"''3?q01
2CRXXcIo..I	)KQ=K!O
"r)*++r   )N)FF)scipy.ndimage.morphologyr   resemblyzer.hparamspathlibr   typingr   r   numpyr)   rF   r   rA   rD   r   ndarrayr$   r   r-   r   r    r   r   <module>rj      sZ    4 !  "    	sD"**'<!= (SV- :'(#V,r   