
    6|h/                         d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dedefd	Zd
ej6                  dedee   fdZedddddde	ej6                     dedee   dee   dededej>                  fd       Z dededeej6                     fdZ! ed      Z" ed      Z#dee"   dee#   dee
e"e#f      fdZ$edddded e%d!ee&   dee   ddf
d"       Z'y)#zfBeta utility functions to assist in common eval workflows.

These functions may change in the future.
    N)DefaultDictListOptionalSequenceTupleTypeVar)
evaluation)	warn_beta)Clientrun_dictid_mapc                     | d   }|j                         D ])  \  }}|j                  t        |      t        |            }+ || d<   | j                  d      r|| d      | d<   | j                  d      si | d<   | S )a  Convert the IDs in the run dictionary using the provided ID map.

    Parameters:
    - run_dict (dict): The dictionary representing a run.
    - id_map (dict): The dictionary mapping old IDs to new IDs.

    Returns:
    - dict: The updated run dictionary.
    dotted_orderparent_run_idextra)itemsreplacestrget)r   r   dokvs        T/var/www/html/test/engine/venv/lib/python3.12/site-packages/langsmith/beta/_evals.py_convert_idsr      s     
.	!B (1ZZAA'(!H^||O$$*8O+D$E!<< O    rootrun_to_example_mapreturnc                    | g}t        j                         }| j                  |i}g }|r|j                         }|j	                  h d      }|j                  |d   t        j                               ||d   <   ||d      |d<   ||d      |d<   |j                  r|j                  |j                         |j                  |       |r|D cg c]  }t        ||       }	}|| j                     |	d   d<   |	S c c}w )a&  Convert the root run and its child runs to a list of dictionaries.

    Parameters:
    - root (ls_schemas.Run): The root run to convert.
    - run_to_example_map (dict): The dictionary mapping run IDs to example IDs.

    Returns:
    - List[dict]: The list of converted run dictionaries.
    >   
session_idchild_run_idsparent_run_ids)excludeidtrace_idr   reference_example_id)uuiduuid4r%   popdictr   
child_runsextendappendr   r$   )
r   r   runs_r%   r   resultssrcsrc_dictrresults
             r   _convert_root_runr4   )   s     FEzz|HmmX&FG
iik88$U8V!'HTNDJJL!Ix~/%hz&:;>>LL(x   077!l1f%7F7(:477(CF1I$%M 8s   C<F)test_project_nameclientload_child_runsinclude_outputsrunsdataset_namer5   r6   r7   r8   c                <   | st        d|        |xs t        j                         }|j                  |      }|r| D cg c]  }|j                   c}nd}|j                  | D cg c]  }|j                   c}|| D cg c]  }|j                   c}|j                         |s| }	n*| D cg c]  }|j                  |j                  |      ! }	}|xs$ dt        j                         j                  dd  }t        |j                  |            }
|
D ci c]  }|j                  |j                   }}|
d   j                  r|
d   j                  n|
d   j                   }|	D cg c]  }t#        ||      D ]  }|  }}}|j%                  ||j                  d	|j'                         d
      }|D ]i  }|d   |d   z
  }t(        j(                  j+                  t(        j,                  j.                        |d<   |d   |z   |d<    |j0                  di |d|i k |j3                  |j                  t(        j(                  j+                  t(        j,                  j.                              }|S c c}w c c}w c c}w c c}w c c}w c c}}w )a  Convert the following runs to a dataset + test.

    This makes it easy to sample prod runs into a new regression testing
    workflow and compare against a candidate system.

    Internally, this function does the following:
        1. Create a dataset from the provided production run inputs.
        2. Create a new test project.
        3. Clone the production runs and re-upload against the dataset.

    Parameters:
    - runs (Sequence[ls_schemas.Run]): A sequence of runs to be executed as a test.
    - dataset_name (str): The name of the dataset to associate with the test runs.
    - client (Optional[Client]): An optional LangSmith client instance. If not provided,
        a new client will be created.
    - load_child_runs (bool): Whether to load child runs when copying runs.
        Defaults to False.

    Returns:
    - ls_schemas.TracerSession: The project containing the cloned runs.

    Examples:
    --------
    .. code-block:: python

        import langsmith
        import random

        client = langsmith.Client()

        # Randomly sample 100 runs from a prod project
        runs = list(client.list_runs(project_name="My Project", execution_order=1))
        sampled_runs = random.sample(runs, min(len(runs), 100))

        runs_as_test(runs, dataset_name="Random Runs")

        # Select runs named "extractor" whose root traces received good feedback
        runs = client.list_runs(
            project_name="<your_project>",
            filter='eq(name, "extractor")',
            trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
        )
        runs_as_test(runs, dataset_name="Extraction Good")
    z1Expected a non-empty sequence of runs. Received: )r:   N)inputsoutputssource_run_ids
dataset_id)r7   zprod-baseline-   r   zprod-baseline)whichdataset_version)project_namereference_dataset_idmetadataend_time
start_time)tzrC   )rF    )
ValueErrorrtget_cached_clientcreate_datasetr=   create_examplesr<   r$   read_runr'   r(   hexlistlist_examplessource_run_idmodified_at
created_atr4   create_project	isoformatdatetimenowtimezoneutc
create_runupdate_project)r9   r:   r5   r6   r7   r8   dsr2   r=   runs_to_copyexampleser   rB   root_runr   	to_createprojectnew_runlatency_s                        r   convert_runs_to_testrh   E   s   l NtfWXX-r++-F			L		9B+:$'Qqyy'G
"&'Q'&*++55	    MQ
GHFOOADD/OB
 
 *T~djjl>N>NrPQ>R=S-TF((l(CDH9ABA!//144/BB#+A;#:#:@V@V  %)(4FG  	I  ##&UU$.88:
 $ G  E*%(== ( 1 1 5 59J9J9N9N 5 O%l3g=
DGD2CD	E 	

X..22h6G6G6K6K2L 	 	A N_ ('+
 C
s$   I?(JJ	1$JJ0JrC   c                 d   |j                  |       }t        j                  t              }g }i }|D ]M  }|j                  ||j                     j                  |       n|j                  |       |||j                  <   O |j                         D ]  \  }}t        |d       ||   _	         |S )N)rC   c                     | j                   S N)r   )r2   s    r   <lambda>z%_load_nested_traces.<locals>.<lambda>   s
    q~~ r   )key)
	list_runscollectionsdefaultdictrQ   r   r-   r$   r   sortedr+   )	rC   r6   r9   treemapr/   all_runsrunrun_idr+   s	            r   _load_nested_tracesrv      s    6D<G<S<S=G GH (C%%&--c2NN3 &mmo W
&,Z=U&V#WNr   TUlist1list2c                 @    t        t        j                  | |            S rk   )rQ   	itertoolsproduct)ry   rz   s     r   _outer_productr~      s    	!!%/00r   
   )max_concurrencyr6   
evaluatorsr   c          
         ddl m} g }|D ]t  }t        |t        j                        r|j                  |       /t        |      r%|j                  t        j                  |             _t        dt        |              |xs t        j                         }t        | |      } ||      5 } |j                  |j                  gt        t!        ||        }	ddd       	D ]  }
 y# 1 sw Y   xY w)a  Compute test metrics for a given test name using a list of evaluators.

    Args:
        project_name (str): The name of the test project to evaluate.
        evaluators (list): A list of evaluators to compute metrics with.
        max_concurrency (Optional[int], optional): The maximum number of concurrent
            evaluations. Defaults to 10.
        client (Optional[Client], optional): The client to use for evaluations.
            Defaults to None.

    Returns:
        None: This function does not return any value.
    r   )ContextThreadPoolExecutorz5Evaluation not yet implemented for evaluator of type )max_workersN)	langsmithr   
isinstancels_evalRunEvaluatorr-   callablerun_evaluatorNotImplementedErrortyperK   rL   rv   mapevaluate_runzipr~   )rC   r   r   r6   r   evaluators_functracesexecutorr/   rg   s              r   compute_test_metricsr      s    * 4.0K dG001t$d^w44T:;%GT
|T  -r++-F v6F	"	? 
8(,,
"%~fk'J"K

  	
 
s   /.C--C6)(__doc__ro   rX   r|   r'   typingr   r   r   r   r   r   langsmith.run_trees	run_treesrK   langsmith.schemasschemas
ls_schemasr   r	   r   #langsmith._internal._beta_decoratorr
   langsmith.clientr   r*   r   Runr4   r   boolTracerSessionrh   rv   rw   rx   r~   rQ   intr   rI   r   r   <module>r      s  
     H H   & + 9 #4  ,JNN  d 8 
 (,#!!h
:>>
"h h  }	h
 Vh h h h hVc 6 d:>>>R $ CLCL1$q' 1$q' 1d5A;6G 1 
 &(#'' ' c]	'
 V' 
' 'r   