
    9|h J                     ~   d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZ defdZ ej>                  e       Z! ejD                  d      de#fd       Z$defdZ% G d de&e      Z' G d de      Z( G d de(e      Z) G d de(e      Z*y)z@A chain for comparing the output of two models using embeddings.    N)Enum)util)AnyDictListOptional)AsyncCallbackManagerForChainRunCallbackManagerForChainRun	Callbacks)
Embeddings)pre_init)
ConfigDictField)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc                  J    	 dd l } | S # t        $ r}t        d      |d }~ww xY w)Nr   z@Could not import numpy, please install with `pip install numpy`.)numpyImportError)npes     k/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain/evaluation/embedding_distance/base.py_import_numpyr      s2    	 N
	s    	""   )maxsizec                  l    t        t        j                  d            ryt        j	                  d       y)Nr   Ta  NumPy not found in the current Python environment. langchain will use a pure Python implementation for embedding distance operations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyF)boolr   	find_specloggerwarning     r   _check_numpyr&   %   s,    DNN7#$
NN	 r%   c                      	 ddl m}   |        S # t        $ r) 	 ddlm}  n# t        $ r t        d      w xY wY  |        S w xY w)zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.)langchain_openair)   r   %langchain_community.embeddings.openair(   s    r   _embedding_factoryr-   3   sd    5   		  	Q 	 	s!    	A A5A Ac                   $    e Zd ZdZdZdZdZdZdZy)EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    cosine	euclidean	manhattan	chebyshevhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr$   r%   r   r/   r/   J   s"     FIIIGr%   r/   c                      e Zd ZU dZ ee      Zeed<    ee	j                        Ze	ed<   edeeef   deeef   fd       Z ed	
      Zedee   fd       ZdedefdZde	defdZedededefd       Zedededefd       Zedededefd       Zedededefd       Zedededefd       ZdedefdZ y)_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metricvaluesr   c                 J   |j                  d      }g }	 ddlm} |j                  |       	 ddlm} |j                  |       |st	        d      t        |t        |            r	 ddl}|S |S # t        $ r Y Nw xY w# t        $ r Y Ew xY w# t        $ r t	        d      w xY w)zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        rA   r   r(   r*   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr+   r)   appendr   r,   
isinstancetupletiktoken)clsrD   rA   types_r)   rJ   s         r   _validate_tiktoken_installedz9_EmbeddingDistanceChainMixin._validate_tiktoken_installedh   s     ZZ-
	9MM*+	 MM*+ Q 
 j%-0 v9  		  		  !I s.   A/ A> 'B /	A;:A;>	B
	B
B"T)arbitrary_types_allowedc                     dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer$   selfs    r   output_keysz(_EmbeddingDistanceChainMixin.output_keys   s     yr%   resultc                 D    d|d   i}t         |v r|t            |t         <   |S )NrP   r   )rR   rT   parseds      r   _prepare_outputz,_EmbeddingDistanceChainMixin._prepare_output   s*    6'?+f$WoF7Or%   metricc           
      8   t         j                  | j                  t         j                  | j                  t         j
                  | j                  t         j                  | j                  t         j                  | j                  i}||v r||   S t        d|       )zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: )r/   r9   _cosine_distancer:   _euclidean_distancer;   _manhattan_distancer<   _chebyshev_distancer=   _hamming_distance
ValueError)rR   rX   metricss      r   _get_metricz(_EmbeddingDistanceChainMixin._get_metric   s     $$d&;&;'')A)A'')A)A'')A)A%%t'='=
 W6?"/x899r%   abc                 X    	 ddl m} d || |      z
  S # t        $ r t        d      w xY w)zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.g      ?)langchain_community.utils.mathre   r   )rb   rc   re   s      r   rZ   z-_EmbeddingDistanceChainMixin._cosine_distance   sB    	H &q!,,,  	6 	s    )c                     t               r"ddl}|j                  j                  | |z
        S t	        d t        | |      D              dz  S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        r   Nc              3   8   K   | ]  \  }}||z
  ||z
  z    y wNr$   .0xys      r   	<genexpr>zC_EmbeddingDistanceChainMixin._euclidean_distance.<locals>.<genexpr>   s!     ;AAEa!e$;s   g      ?)r&   r   linalgnormsumziprb   rc   r   s      r   r[   z0_EmbeddingDistanceChainMixin._euclidean_distance   s?     >99>>!a%((;Q;;sBBr%   c                     t               r-t               }|j                  |j                  | |z
              S t        d t	        | |      D              S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        c              3   >   K   | ]  \  }}t        ||z
          y wri   absrj   s      r   rn   zC_EmbeddingDistanceChainMixin._manhattan_distance.<locals>.<genexpr>        4$!Q3q1u:4   )r&   r   rq   rw   rr   rs   s      r   r\   z0_EmbeddingDistanceChainMixin._manhattan_distance   B     >B66"&&Q-((4#a)444r%   c                     t               r-t               }|j                  |j                  | |z
              S t        d t	        | |      D              S )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        c              3   >   K   | ]  \  }}t        ||z
          y wri   rv   rj   s      r   rn   zC_EmbeddingDistanceChainMixin._chebyshev_distance.<locals>.<genexpr>  rx   ry   )r&   r   maxrw   rr   rs   s      r   r]   z0_EmbeddingDistanceChainMixin._chebyshev_distance   rz   r%   c                     t               rt               }|j                  | |k7        S t        d t	        | |      D              t        |       z  S )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        c              3   2   K   | ]  \  }}||k7  sd   yw)r   Nr$   rj   s      r   rn   zA_EmbeddingDistanceChainMixin._hamming_distance.<locals>.<genexpr>  s     5Aa1f15s   )r&   r   meanrq   rr   lenrs   s      r   r^   z._EmbeddingDistanceChainMixin._hamming_distance  sB     >B7716?"5Q55A>>r%   vectorsc                 L   | j                  | j                        }t               rft        |t	               j
                        rH ||d   j                  dd      |d   j                  dd            j                         }t        |      S  ||d   |d         }t        |      S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r   r   )	ra   rC   r&   rH   r   ndarrayreshapeitemfloat)rR   r   rX   rP   s       r   _compute_scorez+_EmbeddingDistanceChainMixin._compute_score  s     !!$"6"67>j-/2I2IJ71:--a4gaj6H6HB6OPUUWE U| 71:wqz2EU|r%   N)!r5   r6   r7   r8   r   r-   rA   r   __annotations__r/   r9   rC   r   r   strr   rM   r   model_configpropertyr   rS   dictrW   ra   staticmethodrZ   r[   r\   r]   r^   r   r   r$   r%   r   r?   r?   \   s    #3EFJ
F).7H7O7O)PO&P+$sCx. +T#s(^ + +Z  $L T#Y  d t :"3 : :* -C -C -C - -( Cs Cs Cs C C" 5s 5s 5s 5 5  5s 5s 5s 5 5  ?S ?S ?S ? ? c e r%   r?   c                   l   e Zd ZdZedefd       Zedefd       Zede	e   fd       Z
	 ddeeef   dee   deeef   fd	Z	 ddeeef   dee   deeef   fd
Zdddddddedee   dedee	e      deeeef      dededefdZdddddddedee   dedee	e      deeeef      dededefdZy)EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                      y)zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr$   rQ   s    r   requires_referencez-EmbeddingDistanceEvalChain.requires_reference4  s     r%   c                 6    d| j                   j                   dS )N
embedding_	_distancerC   valuerQ   s    r   evaluation_namez*EmbeddingDistanceEvalChain.evaluation_name=  s    D00667yAAr%   c                 
    ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer$   rQ   s    r   
input_keysz%EmbeddingDistanceEvalChain.input_keysA  s     k**r%   Ninputsrun_managerc                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   rP   rA   embed_documentsr&   r   arrayr   rR   r   r   r   r   rP   s         r   _callz EmbeddingDistanceEvalChain._callJ  sa     //11L!6+#67
 >Bhhw'G##G,r%   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   NrP   rA   aembed_documentsr&   r   r   r   r   s         r   _acallz!EmbeddingDistanceEvalChain._acallb  sr      88|${#
 
 >Bhhw'G##G,
   'A*A(?A*F)r   	callbackstagsmetadatainclude_run_infor   r   r   r   r   r   kwargsc                D     | ||d||||      }| j                  |      S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   r   r   r   r   r   rW   	rR   r   r   r   r   r   r   r   rT   s	            r   _evaluate_stringsz,EmbeddingDistanceEvalChain._evaluate_strings}  s5    2 ",9E-
 ##F++r%   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   NacallrW   r   s	            r   _aevaluate_stringsz-EmbeddingDistanceEvalChain._aevaluate_strings  sL     2 zz",9E- " 
 
 ##F++
   757ri   )r5   r6   r7   r8   r   r    r   r   r   r   r   r   r   r   r
   r   r	   r   r   r   r   r   r$   r%   r   r   r   )  s    D   B B B +DI + + =A S#X  89  
c3h	 6 BF S#X  =>  
c3h	 > $(#$(-1!& ,  , C=	 ,
  , tCy! , 4S>* ,  ,  , 
 ,L $(#$(-1!& ,  , C=	 ,
  , tCy! , 4S>* ,  ,  , 
 ,r%   r   c                   F   e Zd ZdZedee   fd       Zedefd       Z	 dde	ee
f   dee   de	ee
f   fdZ	 dde	ee
f   dee   de	ee
f   fd	Zdddd
ddedededeee      dee	ee
f      dede
defdZdddd
ddedededeee      dee	ee
f      dede
defdZy)"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 
    ddgS )r   r   prediction_br$   rQ   s    r   r   z-PairwiseEmbeddingDistanceEvalChain.input_keys  s     n--r%   c                 6    d| j                   j                   dS )Npairwise_embedding_r   r   rQ   s    r   r   z2PairwiseEmbeddingDistanceEvalChain.evaluation_name  s    $T%9%9%?%?$@	JJr%   Nr   r   c                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   rP   r   r   s         r   r   z(PairwiseEmbeddingDistanceEvalChain._call  sd     //11|$~&
 >Bhhw'G##G,r%   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   NrP   r   r   s         r   r   z)PairwiseEmbeddingDistanceEvalChain._acall  sr      88|$~&
 
 >Bhhw'G##G,
r   F)r   r   r   r   r   r   r   r   r   r   r   c                D     | ||d||||      }| j                  |      S )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   r   r   	rR   r   r   r   r   r   r   r   rT   s	            r   _evaluate_string_pairsz9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairs  s5    4 ",lK-
 ##F++r%   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   Nr   r   s	            r   _aevaluate_string_pairsz:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairs4  sL     8 zz",lK- " 
 
 ##F++
r   ri   )r5   r6   r7   r8   r   r   r   r   r   r   r   r   r
   r   r	   r   r   r    r   r   r   r$   r%   r   r   r     s    .DI . . K K K =A S#X  89  
c3h	 < BF S#X  =>  
c3h	 @  $$(-1!&!, !, 	!,
 !, tCy!!, 4S>*!, !, !, 
!,P  $$(-1!&#, #, 	#,
 #, tCy!#, 4S>*#, #, #, 
#,r%   r   )+r8   	functoolsloggingenumr   	importlibr   typingr   r   r   r    langchain_core.callbacks.managerr	   r
   r   langchain_core.embeddingsr   langchain_core.utilsr   pydanticr   r   langchain.chains.baser   langchain.evaluation.schemar   r   langchain.schemar   r   	getLoggerr5   r"   	lru_cacher    r&   r-   r   r/   r?   r   r   r$   r%   r   <module>r      s    F     , , 
 1 ) & ' P $s  
		8	$ Q
d 
  
J .T $J5 JZV,!= V,rU, "9U,r%   