
    6|hoy                       d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlmZ ddlmZ 	 ddlmZmZmZmZ ddlZdd	lm Z  dd
l!m"Z"m#Z#m$Z$m%Z%  ejL                  e'      Z( G d de      Z) G d ded      Z* G d de      Z+ G d ded      Z, G d d      Z-ee+e,e.f   Z/ G d de      Z0ee0e.f   Z1 G d de-      Z2	 	 d&dZ3dZ4d'dZ5d(dZ6 G d d       Z7	 	 	 	 d)d!Z8	 	 	 	 d*d"Z9	 	 	 	 d+d#Z:	 	 	 	 d,d$Z;eeeejJ                     eejH                     gee+e,f   f   eeejJ                     eejH                     gee+e,f   f   f   Z<d-d%Z=y# e$ r ddlmZmZmZmZ Y -w xY w).z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)
Any	AwaitableCallableDictListLiteralOptionalSequenceUnioncast)	TypedDict)schemas)	BaseModelFieldValidationError	validator)wraps)
SCORE_TYPE
VALUE_TYPEExampleRunc                  (    e Zd ZU dZded<   	 ded<   y)Categoryz$A category for categorical feedback.Optional[Union[float, int]]valuestrlabelN__name__
__module____qualname____doc____annotations__     ]/var/www/html/test/engine/venv/lib/python3.12/site-packages/langsmith/evaluation/evaluator.pyr   r   1   s    .&&CJ&r'   r   c                  @    e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   y	)
FeedbackConfigziConfiguration to define a type of feedback.

    Applied on on the first creation of a feedback_key.
    z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[List[Union[Category, dict]]]
categoriesNr    r&   r'   r(   r*   r*   :   s*    
 ;:	$$;	$$A55r'   r*   F)totalc                      e Zd ZU dZded<   	 dZded<   	 dZded<   	 dZd	ed
<   	 dZded<   	  e	e
      Zded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	  G d d      Z edd      d        Zy)EvaluationResultzEvaluation result.r   keyNr   scorer   r   zOptional[str]commentzOptional[Dict]
correction)default_factoryr   evaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextrac                      e Zd ZdZdZy)EvaluationResult.ConfigzPydantic model configuration.FN)r!   r"   r#   r$   allow_extrar&   r'   r(   Configr>   d   s
    +r'   r@   T)prec                t    d|vs|d   .t        |t        t        f      rt        j	                  d|        |S )z$Check that the value is not numeric.r3   zJNumeric values should be provided in the 'score' field, not 'value'. Got: )
isinstanceintfloatloggerwarning)clsvvaluess      r(   check_value_non_numericz(EvaluationResult.check_value_non_numerici   sE    
 & F7O$;!c5\*C!
 r'   )r!   r"   r#   r$   r%   r3   r   r4   r5   r   dictr7   r8   r:   r;   r<   r@   r   rK   r&   r'   r(   r1   r1   I   s    	H@E:0E:8!G]!2!%J%: 6ND65=AO:A;59M29659M29 !E> ) 
 wD! "r'   r1   c                      e Zd ZU dZded<   y)EvaluationResultszqBatch evaluation results.

    This makes it easy for your evaluator to return multiple
    metrics at once.
    zList[EvaluationResult]resultsNr    r&   r'   r(   rN   rN   x   s     $#!r'   rN   c                  F    e Zd ZdZe	 d	 	 	 	 	 dd       Z	 d	 	 	 	 	 ddZy)RunEvaluatorzEvaluator interface class.Nc                     y)zEvaluate an example.Nr&   selfrunexamples      r(   evaluate_runzRunEvaluator.evaluate_run   s    r'   c                ~   K   t        j                         j                  d| j                  ||       d{   S 7 w)z#Evaluate an example asynchronously.N)asyncioget_running_looprun_in_executorrW   rS   s      r(   aevaluate_runzRunEvaluator.aevaluate_run   s=      --/??$##S'
 
 	
 
s   4=;=NrU   r   rV   Optional[Example]return*Union[EvaluationResult, EvaluationResults])r!   r"   r#   r$   r   rW   r\   r&   r'   r(   rQ   rQ      sT    $59##!2#	3# # 6:

!2
	3
r'   rQ   c                  H    e Zd ZU dZded<   	 ded<   	 dZded<   	 dZd	ed
<   y)ComparisonEvaluationResultzFeedback scores for the results of comparative evaluations.

    These are generated by functions that compare two or more runs,
    returning a ranking or other feedback.
    r   r2   z'Dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNr9   r:   z6Optional[Union[str, Dict[Union[uuid.UUID, str], str]]]r4   )r!   r"   r#   r$   r%   r:   r4   r&   r'   r(   rc   rc      s8     
H@33459M296FJGCJ:r'   rc   c                       e Zd ZdZ	 d	 	 	 ddZ	 d	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZedd       Z		 d	 	 	 	 	 ddZ
dd fdZ	 d	 	 	 	 	 dd	Zdd
Z xZS )DynamicRunEvaluatora  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

    This class is designed to be used with the `@run_evaluator` decorator, allowing
    functions that take a `Run` and an optional `Example` as arguments, and return
    an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

    Attributes:
        func (Callable): The function that is wrapped by this evaluator.
    c                   t        |      }|rt        |      } t        |      |        ddlm} |.|j	                  |t
              | _        t        |dd      | _        t        j                  |      r<|t        d      |j	                  |t
              | _        t        |dd      | _        y|j	                  t        t        t        t        t            gt"        f   |      t
              | _        t        |dd      | _        y)zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns a dict or `ComparisonEvaluationResult`.
        r   run_helpersNprocess_inputsr!   rf   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.)_normalize_evaluator_funcr   	langsmithri   ensure_traceable_serialize_inputsafuncgetattr_nameinspectiscoroutinefunction	TypeErrorr   r   r   r   r   _RUNNABLE_OUTPUTfuncrT   rx   rq   ri   s       r(   __init__zDynamicRunEvaluator.__init__   s   ( ).-e4EdD)$55&7 6 DJ !
4IJDJ&&t, 3 
 %55%6 6 DJ !z3HIDJ#44XsHW$568HHI4P0 5 DI !z3HIDJr'   c                <   t        t              rj                  s|_        S 	 st        d       dvr|r| j                  d<   t        fddD              rt        d       t        di d|iS # t        $ r}t        d       |d }~ww xY w)	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r2   c              3  &   K   | ]  }|v 
 y wr]   r&   ).0kresults     r(   	<genexpr>z@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>   s     Jq1F?J   )r3   r   r4   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got r:   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got r&   )rC   r1   r:   
ValueErrorrs   allr   )rT   r   r:   allow_no_keyes    `   r(   _coerce_evaluation_resultz-DynamicRunEvaluator._coerce_evaluation_result   s     f./'''4$M	 FFLXO  F"| $

uJ,IJJ OOUhX  $Q&P&PQQ 	44:8= 	s   AA> >	BBBc                    d|v rB|j                         }|d   D cg c]  }| j                  ||       c}|d<   t        di |S | j                  t        t        |      |d      S c c}w )NrO   )r:   T)r:   r   r&   )copyr   rN   r   rL   )rT   rO   r:   cprs        r(   _coerce_evaluation_resultsz.DynamicRunEvaluator._coerce_evaluation_results  s    
 B !+ ..q.NByM %*r**--w}4 . 
 	
s   A)c                    t        |t              r|j                  s||_        |S t        |      }| j	                  ||      S r]   )rC   r1   r:   _format_evaluator_resultr   )rT   r   r:   s      r(   _format_resultz"DynamicRunEvaluator._format_result  sB     f./'''4$M)&1..v}EEr'   c                    t        | d      S zCheck if the evaluator function is asynchronous.

        Returns:
            bool: True if the evaluator function is asynchronous, False otherwise.
        rq   hasattrrT   s    r(   is_asynczDynamicRunEvaluator.is_async'       tW%%r'   c                   t        | d      sPt        j                         }|j                         rt	        d      |j                  | j                  ||            S t        j                         }d|j                  i}t        |dd      rt        |j                        |d<   | j                  ||||d      }| j                  ||      S )	a  Evaluate a run using the wrapped function.

        This method directly invokes the wrapped function with the provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rx   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.r;   
session_idN
experimentrun_idmetadatalangsmith_extra)r   rY   get_event_loop
is_runningRuntimeErrorrun_until_completer\   uuiduuid4idrr   r   r   rx   r   )rT   rU   rV   running_loopr:   r   r   s          r(   rW   z DynamicRunEvaluator.evaluate_run0  s     tV$"113L&&("R 
 $66t7I7I#w7WXX

$3SVV#<3d+%(%8H\"'4(K  

 ""6=99r'   c                L  K   t        | d      st        | 	  ||       d{   S t        j                         }d|j
                  i}t        |dd      rt        |j                        |d<   | j                  ||||d       d{   }| j                  ||      S 7 |7 w)a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rq   Nr;   r   r   r   r   )r   superr\   r   r   r   rr   r   r   rq   r   )rT   rU   rV   r:   r   r   	__class__s         r(   r\   z!DynamicRunEvaluator.aevaluate_runR  s      tW%.sG<<<

$3SVV#<3d+%(%8H\"zz'4(K " 
 

 ""6=99 =

s"    B$B A%B$	B"
B$"B$c                &    | j                  ||      S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        )rW   rS   s      r(   __call__zDynamicRunEvaluator.__call__m  s       g..r'   c                "    d| j                    dS ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >rs   r   s    r(   __repr__zDynamicRunEvaluator.__repr__~  s    &tzzl!44r'   r]   )rx   XCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]rq   zIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]])F)r   zUnion[EvaluationResult, dict]r:   	uuid.UUIDr   boolr`   r1   )rO   zUnion[dict, EvaluationResults]r:   r   r`   ra   )r   zMUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list]r:   r   r`   ra   r`   r   r^   )rU   r   rV   r_   r`   r   )r!   r"   r#   r$   rz   r   r   r   propertyr   rW   r\   r   r   __classcell__)r   s   @r(   rf   rf      s   , 0J
0J
0Jl #	- ! 	
 
<
/
 !
 
4	
"F
F
 !F 
4F & & 6: : :!2 :	3 :D:8 6://!2/	3/"5r'   rf   c                    t        |       S )zmCreate a run evaluator from a function.

    Decorator that transforms a function into a `RunEvaluator`.
    )rf   rx   s    r(   run_evaluatorr     s     t$$r'   i'  c                ^    t        |       }t        |      t        kD  r|d t        dz
   dz   }|S )N   z...))reprlen_MAXSIZE)objss     r(   _maxsize_reprr     s1    S	A
1vn1&Hr'   c                t    t        | j                  d            }t        | j                  d            }||dS )NrU   rV   )rU   rV   )r   get)inputsrun_truncatedexample_truncateds      r(   rp   rp     s5    !&**U"34M%fjj&;< ->??r'   c                      e Zd ZdZ	 d	 	 	 ddZedd       Z	 d	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZ	ddZ
edd	       Z	 	 	 	 	 	 	 	 dd
Zy)DynamicComparisonRunEvaluatorz4Compare predictions (as traces) from 2 or more runs.Nc                   t        |      }|rt        |      } t        |      |        ddlm} |.|j	                  |t
              | _        t        |dd      | _        t        j                  |      r<|t        d      |j	                  |t
              | _        t        |dd      | _        y|j	                  t        t        t        t           t         t"           gt$        f   |      t
              | _        t        |dd      | _        y)zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns an `EvaluationResult` or `EvaluationResults`.
        r   rh   Nrj   r!   rf   rl   )$_normalize_comparison_evaluator_funcr   rn   ri   ro   rp   rq   rr   rs   rt   ru   rv   r   r   r   r   r   r   _COMPARISON_OUTPUTrx   ry   s       r(   rz   z&DynamicComparisonRunEvaluator.__init__  s   ( 4D98?EdD)$55&7 6 DJ !
4IJDJ&&t, 3 
 %55%6 6 DJ !z3HIDJ#44!#(9:*,   1 5 	DI !z3HIDJr'   c                    t        | d      S r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.is_async  r   r'   c                X   t        | d      sPt        j                         }|j                         rt	        d      |j                  | j                  ||            S t        j                         }| j                  |      }| j                  ||||d      }| j                  |||      S )zCompare runs to score preferences.

        Args:
            runs: A list of runs to compare.
            example: An optional example to be used in the evaluation.

        rx   r   r   tagsr   )r   rY   r   r   r   r   acompare_runsr   r   	_get_tagsrx   _format_results)rT   runsrV   r   r:   r   r   s          r(   compare_runsz*DynamicComparisonRunEvaluator.compare_runs  s     tV$"113L&&("R 
 $66&&tW5  

~~d#'4dC  

 ##FM4@@r'   c                   K   t        | d      s| j                  ||      S t        j                         }| j	                  |      }| j                  ||||d       d{   }| j                  |||      S 7 w)a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            runs (Run): The runs to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        rq   r   r   N)r   r   r   r   r   rq   r   )rT   r   rV   r:   r   r   s         r(   r   z+DynamicComparisonRunEvaluator.acompare_runs  s       tW%$$T733

~~d#zz'4dC " 
 

 ##FM4@@
s   AA: A8!A:c                &    | j                  ||      S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        )r   )rT   r   rV   s      r(   r   z&DynamicComparisonRunEvaluator.__call__  s       w//r'   c                "    d| j                    dS )r   z<DynamicComparisonRunEvaluator r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.__repr__/  s    0A>>r'   c                    g }| D ]^  }|j                  dt        |j                        z          t        |dd      s8|j                  dt        |j                        z          ` |S )zExtract tags from runs.zrun:r   Nzexperiment:)appendr   r   rr   r   )r   r   rU   s      r(   r   z'DynamicComparisonRunEvaluator._get_tags3  s`      	ACKKSVV,-sL$/MC,??@	A r'   c                   t        |t              r|j                  s||_        |S t        |t              r9t	        ||      D ci c]  \  }}|j
                  | c}}| j                  |d}n4t        |t              rd|vr | j                  |d<   nd|}t        |      	 t        di d|i|S c c}}w # t        $ r}t        d|       |d }~ww xY w)N)rd   r2   r:   r2   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=r:   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got r&   )
rC   rc   r:   listzipr   rs   rL   r   r   )rT   r   r:   r   rU   r3   msgr   s           r(   r   z-DynamicComparisonRunEvaluator._format_results>  s     f89'''4$M%;>tV;LMZS%3665=Mzz!.F
 %F" $

u-%+I/  S/!		- "M<V<  N"  	x! 		s   B3$B9 9	CCCr]   )rx   fCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]rq   zUOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]r   )r   Sequence[Run]rV   r_   r`   rc   r   )r   r   r`   z	List[str])r   z-Union[dict, list, ComparisonEvaluationResult]r:   r   r   r   r`   rc   )r!   r"   r#   r$   rz   r   r   r   r   r   r   staticmethodr   r   r&   r'   r(   r   r     s    > 6J
6J
6Jp & & AEA!A,=A	#A@ AEA!A,=A	#A: AE0!0,=0	#0"?  "=" !" 	"
 
$"r'   r   c                    t        |       S )z.Create a comaprison evaluator from a function.)r   r   s    r(   comparison_evaluatorr   c  s     )..r'   c                    dt        j                         }|j                  j                         D cg c]+  \  }}|j                  |j
                  |j                  fv r|- c}}r"t        fdD              st              dk7  rd d}t        |      t        fdD              rddgk(  r S t        j                         r?	 	 	 	 	 	 d fd	}t         d
      rt         d
      |_        |S |j                  |_        |S d fd}t         d
      rt         d
      |_        |S |j                  |_        |S c c}}w )N)rU   rV   r   outputsreference_outputsattachmentsc              3  &   K   | ]  }|v  
 y wr]   r&   r}   pnamesupported_argss     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>       EE'Er      kInvalid evaluator function. Must have at least one positional argument. Supported positional arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc              3  &   K   | ]  }|v  
 y wr]   r&   r   s     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>        $)r   rU   rV   c                   K   | ||r|j                   ni | j                  xs i |r|j                  xs i ni |r|j                  xs i ni dfdD        } |  d {   S 7 w)NrU   rV   r   r   r   r   c              3  (   K   | ]	  }|     y wr]   r&   r}   argarg_maps     r(   r   z>_normalize_evaluator_func.<locals>.awrapper.<locals>.<genexpr>       @@   r   r   r   rU   rV   argsr   rx   positional_argss      @r(   awrapperz+_normalize_evaluator_func.<locals>.awrapper  sl      &07gnnR"{{0b@G7#6#6#<"RBI)>Br A@!4[(((s   AA(!A&"A(r!   c                    | ||r|j                   ni | j                  xs i |j                  xs i |r|j                  xs i ni dfdD        } | S )Nr   c              3  (   K   | ]	  }|     y wr]   r&   r   s     r(   r   z=_normalize_evaluator_func.<locals>.wrapper.<locals>.<genexpr>  r   r   r   r   s      @r(   wrapperz*_normalize_evaluator_func.<locals>.wrapper  s\    &07gnnR"{{0b#*#6#6#<"BI)>Br A@T{"r'   )rU   r   rV   r_   r`   rw   )rU   r   rV   r   r`   rw   rt   	signature
parametersitemskindPOSITIONAL_OR_KEYWORDPOSITIONAL_ONLYr   r   r   ru   r   rr   r!   	rx   sigr   pr   r   r  r   r   s	   `      @@r(   rm   rm   m  s   N 

D
!C ,,.E166a--q/@/@AA 	O
 E_EE A%<<J;K LFG 	 o -< 	UI.	. &&t,))#4)!)  4, j) 
 O && 
 O
# 4, j) 
 N %% 
 NC   0Ec                    dt        j                         }|j                  j                         D cg c]+  \  }}|j                  |j
                  |j                  fv r|- c}}r"t        fdD              st              dk7  rd d}t        |      t        fdD              rddgk(  r S t        j                         r?	 	 	 	 	 	 d fd	}t         d
      rt         d
      |_        |S |j                  |_        |S d fd}t         d
      rt         d
      |_        |S |j                  |_        |S c c}}w )Nr   rV   r   r   r   c              3  &   K   | ]  }|v  
 y wr]   r&   r   s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>  r   r   r   r   r   c              3  &   K   | ]  }|v  
 y wr]   r&   r   s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>  r   r   r   rV   c                   K   | ||r|j                   ni | D cg c]  }|j                  xs i  c}|r|j                  xs i ni dfdD        } |  d {   S c c}w 7 	w)Nr  c              3  (   K   | ]	  }|     y wr]   r&   r   s     r(   r   zI_normalize_comparison_evaluator_func.<locals>.awrapper.<locals>.<genexpr>  r   r   r   r   r   rV   rU   r   r   rx   r   s       @r(   r   z6_normalize_comparison_evaluator_func.<locals>.awrapper  sp      !&07gnnR=ABc 1r 1BBI)>Br A@!4[((	  C )s   A(A!
+A(A&
A(r!   c                    | ||r|j                   ni | D cg c]  }|j                  xs i  c}|r|j                  xs i ni dfdD        } | S c c}w )Nr  c              3  (   K   | ]	  }|     y wr]   r&   r   s     r(   r   zH_normalize_comparison_evaluator_func.<locals>.wrapper.<locals>.<genexpr>  r   r   r  r  s       @r(   r  z5_normalize_comparison_evaluator_func.<locals>.wrapper  sb     &07gnnR=ABc 1r 1BBI)>Br A@T{"	  Cs   A
)r   r   rV   r_   r`   r   )r   r   rV   r   r`   r   r  r	  s	   `      @@r(   r   r     s    SN


D
!C ,,.E166a--q/@/@AA 	O
 E_EE A%<<J;K LFG 	 o  -< 	VY/	/&&t,)#).?)#) 4, j) 
 O && 
 O	# 4, j) 
 N %% 
 Nr  c                @   t        | t        t        t        f      rd| i} | S | st	        d|        t        | t
              r't        d | D              st	        d|  d      d| i} | S t        | t              rd| i} | S t        | t              r	 | S t	        d|        )	Nr3   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c              3  <   K   | ]  }t        |t                y wr]   )rC   rL   )r}   xs     r(   r   z+_format_evaluator_result.<locals>.<genexpr>  s     71:a&7s   z8Expected a list of dicts or EvaluationResults. Received .rO   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	rC   r   rE   rD   r   r   r   r   rL   )r   s    r(   r   r   
  s     &4,-6"* M) ;;A(D
 	
 
FD	!777J6(RST  V$ M 
FC	 6" M 
FD	! M	 &&,X/
 	
r'   c                    dt        j                         }|j                  j                         D cg c]+  \  }}|j                  |j
                  |j                  fv r|- c}}r"t        fdD              s*t              dk7  rd d}r	|d dz  }t        |      t        fdD              rdd	gk(  r S 	 	 	 	 	 	 d fd
}t         d      rt         d      |_        |S |j                  |_        |S c c}}w )Nr   examplesr   r   r   c              3  &   K   | ]  }|v  
 y wr]   r&   r   s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>;  r   r   r   r   r  z Received positional arguments c              3  &   K   | ]  }|v  
 y wr]   r&   r   s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>G  r   r   r   r  c           	     .   | ||D cg c]  }|j                    c}| D cg c]  }|j                  xs i  c}|D cg c]  }|j                  xs i  c}dfdD        } | }t        |t              r|S t	        |      S c c}w c c}w c c}w )Nr  c              3  (   K   | ]	  }|     y wr]   r&   r   s     r(   r   z@_normalize_summary_evaluator.<locals>.wrapper.<locals>.<genexpr>W  s     <SGCL<r   )r   r   rC   r1   r   )	r   r  rV   rU   r   r   r   rx   r   s	         @r(   r  z-_normalize_summary_evaluator.<locals>.wrapperM  s     $9ABg7>>B9=>#CKK-2->KS%Tgoo&;&;%TG =O<D4[F&"23+F33 C>%Ts   BB
Br!   )r   zSequence[schemas.Run]r  zSequence[schemas.Example]r`   ra   )rt   r  r  r  r  r  r  r   r   r   r   rr   r!   )rx   r
  r   r  r   r  r   r   s   `     @@r(   _normalize_summary_evaluatorr"  2  sH   SN


D
!C ,,.E166a--q/@/@AA 	O
 E_EE A%<<J;K1N 	 4_4EQGGCo  -< 	VZ0	0	4'	43L	47	4" *1z)BGD*% 	  IPHXHX 	 Ws   0D)rx   r   )r   r   )r   rL   r`   rL   )rx   r   r`   r   )rx   r   r`   z|Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]])rx   r   r`   zUnion[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]])r   z;Union[EvaluationResults, dict, str, int, bool, float, list]r`   zUnion[EvaluationResults, dict])rx   r   r`   SUMMARY_EVALUATOR_T)>r$   
__future__r   rY   rt   r   abcr   typingr   r   r   r   r	   r
   r   r   r   r   typing_extensionsr   rn   r   pydantic.v1r   r   r   r   ImportErrorpydanticlogging	functoolsr   langsmith.schemasr   r   r   r   	getLoggerr!   rF   r   r*   r1   rN   rQ   rL   rw   rc   r   rf   r   r   r   rp   r   r   rm   r   r   r#  r"  r&   r'   r(   <module>r/     s   E "       (     B B			8	$'y '6Ye 6,y ,^"	 "
 
$ )+<dBC : :$ 5t;< S5, S5l	%	% @A AH//
 #/P
PPfG
GGTG#8 	'++	 9: 112	4 	gkk	D12 112	4		 .c   s    E% %E:9E: