
    ih)                        d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZ erd dlmZ d dl	mZ  G d d	e      Z G d
 d      Zy)    )annotations)TYPE_CHECKINGAnyCallableOptional	TypedDictUnion)DynamicRunEvaluator)	traceable)ExampleRunStringEvaluator)RunEvaluatorc                  4    e Zd ZU dZded<   	 ded<   	 ded<   y)	SingleEvaluatorInputz!The input to a `StringEvaluator`.str
predictionzOptional[Any]	referencezOptional[str]inputN)__name__
__module____qualname____doc____annotations__     j/var/www/html/dev/engine/venv/lib/python3.12/site-packages/langsmith/evaluation/integrations/_langchain.pyr   r      s    +O r   r   c                  6    e Zd ZdZddd	 	 	 	 	 ddZ	 	 ddZy)LangChainStringEvaluatora  A class for wrapping a LangChain StringEvaluator.

    Requires the `langchain` package to be installed.

    Attributes:
        evaluator (StringEvaluator): The underlying StringEvaluator OR the name
            of the evaluator to load.

    Methods:
        as_run_evaluator() -> RunEvaluator:
            Convert the LangChainStringEvaluator to a RunEvaluator.

    Examples:
        Creating a simple LangChainStringEvaluator:

        >>> evaluator = LangChainStringEvaluator("exact_match")

        Converting a LangChainStringEvaluator to a RunEvaluator:

        >>> from langsmith.evaluation import LangChainStringEvaluator
        >>> from langchain_openai import ChatOpenAI
        >>> evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if"
        ...             " it is correct and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatOpenAI(model="gpt-4o"),
        ...     },
        ... )
        >>> run_evaluator = evaluator.as_run_evaluator()
        >>> run_evaluator  # doctest: +ELLIPSIS
        <DynamicRunEvaluator ...>

        Customizing the LLM model used by the evaluator:

        >>> from langsmith.evaluation import LangChainStringEvaluator
        >>> from langchain_anthropic import ChatAnthropic
        >>> evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if"
        ...             " it is correct and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ... )
        >>> run_evaluator = evaluator.as_run_evaluator()
        >>> run_evaluator  # doctest: +ELLIPSIS
        <DynamicRunEvaluator ...>

        Using the `evaluate` API with different evaluators:
        >>> def prepare_data(run: Run, example: Example):
        ...     # Convert the evaluation data into the format expected by the evaluator
        ...     # Only required for datasets with multiple inputs/output keys
        ...     return {
        ...         "prediction": run.outputs["prediction"],
        ...         "reference": example.outputs["answer"],
        ...         "input": str(example.inputs),
        ...     }
        >>> import re
        >>> from langchain_anthropic import ChatAnthropic
        >>> import langsmith
        >>> from langsmith.evaluation import LangChainStringEvaluator, evaluate
        >>> criteria_evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if it is correct"
        ...             " and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ...     prepare_data=prepare_data,
        ... )
        >>> embedding_evaluator = LangChainStringEvaluator("embedding_distance")
        >>> exact_match_evaluator = LangChainStringEvaluator("exact_match")
        >>> regex_match_evaluator = LangChainStringEvaluator(
        ...     "regex_match", config={"flags": re.IGNORECASE}, prepare_data=prepare_data
        ... )
        >>> scoring_evaluator = LangChainStringEvaluator(
        ...     "labeled_score_string",
        ...     config={
        ...         "criteria": {
        ...             "accuracy": "Score 1: Completely inaccurate\nScore 5: Somewhat accurate\nScore 10: Completely accurate"
        ...         },
        ...         "normalize_by": 10,
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ...     prepare_data=prepare_data,
        ... )
        >>> string_distance_evaluator = LangChainStringEvaluator(
        ...     "string_distance",
        ...     config={"distance_metric": "levenshtein"},
        ...     prepare_data=prepare_data,
        ... )
        >>> from langsmith import Client
        >>> client = Client()
        >>> results = evaluate(
        ...     lambda inputs: {"prediction": "foo"},
        ...     data=client.list_examples(dataset_name="Evaluate Examples", limit=1),
        ...     evaluators=[
        ...         embedding_evaluator,
        ...         criteria_evaluator,
        ...         exact_match_evaluator,
        ...         regex_match_evaluator,
        ...         scoring_evaluator,
        ...         string_distance_evaluator,
        ...     ],
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
    N)configprepare_datac                   ddl m} t        ||      r|| _        || _	        yt        |t              r ddlm}  ||fi |xs i | _        || _	        yt        dt        |             )a(  Initialize a LangChainStringEvaluator.

        See: https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.schema.StringEvaluator.html#langchain-evaluation-schema-stringevaluator

        Args:
            evaluator (StringEvaluator): The underlying StringEvaluator.
        r   r   )load_evaluatorzUnsupported evaluator type: N)
langchain.evaluation.schemar   
isinstance	evaluatorr   langchain.evaluationr$   NotImplementedErrortype_prepare_data)selfr'   r!   r"   r   r$   s         r   __init__z!LangChainStringEvaluator.__init__   sk      	@i1&DN * 	3';+IH&,BHDN * &(DT)_DU&VWWr   c                r     j                   j                  rdnd} j                   j                  rdnd}d| | dt        	 d
	 	 	 	 	 d fd       t         j                   j                        d
d fd       }t         j                   j                        d
d fd	       }t        ||      S )zConvert the LangChainStringEvaluator to a RunEvaluator.

        This is the object used in the LangSmith `evaluate` API.

        Returns:
            RunEvaluator: The converted RunEvaluator.
        z)
       "input": example.inputs['input'], z0
       "reference": example.outputs['expected']z]
def prepare_data(run, example):
    return {
        "prediction": run.outputs['my_output'],zL
    }
evaluator = LangChainStringEvaluator(..., prepare_data=prepare_data)
c           
        | j                   r3t        | j                         dkD  rt        dj                   d       j                  j                  rA|r?|j                   r3t        |j                         dkD  rt        dj                   d       j                  j
                  rA|r?|j                  r3t        |j                        dkD  rt        dj                   d       t        t        t        | j                   j                                     j                  j                  r:|r8|j                   r,t        t        |j                   j                                     nd j                  j
                  r?|r=|j                  r1t        t        |j                  j                                           S d       S )N   z
Evaluator z{ only supports a single prediction key. Please ensure that the run has a single output. Or initialize with a prepare_data:
z nly supports a single reference key. Please ensure that the example has a single output. Or create a custom evaluator yourself:
zy only supports a single input key. Please ensure that the example has a single input. Or initialize with a prepare_data:
)r   r   r   )outputslen
ValueErrorr'   requires_referencerequires_inputinputsr   nextitervalues)runexamplecustomization_error_strr,   s     r   prepare_evaluator_inputszKLangChainStringEvaluator.as_run_evaluator.<locals>.prepare_evaluator_inputs   s    {{s3;;/!3   0 1< //1  11OO(1,   0 1@ //1  --NN'!+   0 1< //1  (S[[%7%7%9 :; 99##OO	 goo44678  55'gnn gnn33567   r   )namec                    j                   	 | |      nj                  | |      } j                  j                  di |}dj                  j                  i|S Nkeyr   )r+   r'   evaluate_stringsevaluation_namer;   r<   eval_inputsresultsr>   r,   s       r   evaluatez;LangChainStringEvaluator.as_run_evaluator.<locals>.evaluate   sg     %%- )g6''W5 
 6dnn55DDG4>>99EWEEr   c                   K   j                   	 | |      nj                  | |      } j                  j                  di | d {   }dj                  j                  i|S 7 wrA   )r+   r'   aevaluate_stringsrD   rE   s       r   	aevaluatez<LangChainStringEvaluator.as_run_evaluator.<locals>.aevaluate  st      %%- )g6''W5 
 =DNN<<K{KKG4>>99EWEE Ls   AA+
A)A+)N)r;   r   r<   Optional[Example]returnr   )r;   r   r<   rL   rM   dict)r'   r6   r5   r   rD   r
   )r,   	input_strreference_strrH   rK   r=   r>   s   `    @@r   as_run_evaluatorz)LangChainStringEvaluator.as_run_evaluator   s     ~~,, ; 	 ~~00 B 	
'0 1>yk J# 
373	3	03	!3	 
3	j 
66	7	F 
8	F 
66	7	F 
8	F #8Y77r   )r'   zUnion[StringEvaluator, str]r!   zOptional[dict]r"   zBOptional[Callable[[Run, Optional[Example]], SingleEvaluatorInput]])rM   r   )r   r   r   r   r-   rQ   r   r   r   r    r       sC    qn "& *.* 	*

*:f8	f8r   r    N)
__future__r   typingr   r   r   r   r   r	   langsmith.evaluation.evaluatorr
   langsmith.run_helpersr   langsmith.schemasr   r   r%   r   r   r   r    r   r   r   <module>rW      s8    " K K > + *;;9 w8 w8r   