
    ih                        d Z ddlmZ ddlZddlmZmZmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ  ej:                  e      Z G d dee      Z y)zDHypothetical Document Embeddings.

https://arxiv.org/abs/2212.10496
    )annotationsN)AnyDictListOptional)CallbackManagerForChainRun)
Embeddings)BaseLanguageModel)StrOutputParser)BasePromptTemplate)Runnable)
ConfigDict)Chain)
PROMPT_MAP)LLMChainc                      e Zd ZU dZded<   ded<    edd      Zedd	       Zedd
       Z	ddZ
ddZddZ	 d	 	 	 	 	 ddZe	 	 d	 	 	 	 	 	 	 	 	 	 	 dd       Zedd       Zy)HypotheticalDocumentEmbedderzrGenerate hypothetical document for query, and then embed that.

    Based on https://arxiv.org/abs/2212.10496
    r	   base_embeddingsr   	llm_chainTforbid)arbitrary_types_allowedextrac                P    | j                   j                  j                         d   S )z Input keys for Hyde's LLM chain.required)r   input_schemamodel_json_schemaselfs    X/var/www/html/dev/engine/venv/lib/python3.12/site-packages/langchain/chains/hyde/base.py
input_keysz'HypotheticalDocumentEmbedder.input_keys(   s"     ~~**<<>zJJ    c                h    t        | j                  t              r| j                  j                  S dgS )z!Output keys for Hyde's LLM chain.text)
isinstancer   r   output_keysr   s    r   r%   z(HypotheticalDocumentEmbedder.output_keys-   s)     dnnh/>>---8Or!   c                8    | j                   j                  |      S )zCall the base embeddings.)r   embed_documents)r   textss     r   r'   z,HypotheticalDocumentEmbedder.embed_documents5   s    ##33E::r!   c                   	 ddl }t        |j                  |      j                  d            S # t        $ rQ t
        j                  d       |sg cY S t        |      }t        | D cg c]  }t        |      |z   nc c}w c}cY S w xY w)z)Combine embeddings into final embeddings.r   N)axisa*  NumPy not found in the current Python environment. HypotheticalDocumentEmbedder will use a pure Python implementation for internal calculations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpy)
numpylistarraymeanImportErrorloggerwarninglenzipsum)r   
embeddingsnpnum_vectors
dim_valuess        r   combine_embeddingsz/HypotheticalDocumentEmbedder.combine_embeddings9   s    	V,11q19:: 	VNNK 	j/KDGDTUjC
Ok1UUU	Vs!   -0 "B
B
)A?>	B
	B
c                    | j                   d   }| j                  j                  ||i      }t        | j                  t              r|| j
                  d      g}n|g}| j                  |      }| j                  |      S )z1Generate a hypothetical document and embedded it.r   )r    r   invoker$   r   r%   r'   r9   )r   r#   var_nameresult	documentsr5   s         r   embed_queryz(HypotheticalDocumentEmbedder.embed_queryL   sw    ??1%&&$'78dnnh/ 0 0 345II)))4
&&z22r!   Nc                    |xs t        j                         }| j                  j                  |d|j	                         i      S )zCall the internal llm chain.	callbacks)config)r   get_noop_managerr   r;   	get_child)r   inputsrun_manager_run_managers       r   _callz"HypotheticalDocumentEmbedder._callW   sG     #S&@&Q&Q&S~~$$K)?)?)AB % 
 	
r!   c                    ||}n>||t         v r
t         |   }n*t        dt        t        j                                d      ||z  t	               z  } | d||d|S )zILoad and use LLMChain with either a specific prompt key or custom prompt.zHMust specify prompt_key if custom_prompt not provided. Should be one of .)r   r    )r   
ValueErrorr,   keysr   )clsllmr   
prompt_keycustom_promptkwargspromptr   s           r   from_llmz%HypotheticalDocumentEmbedder.from_llmb   sy     $"F#
j(@
+F:??,-.a1 
 SL?#44	R?iR6RRr!   c                     y)N
hyde_chainrK   r   s    r   _chain_typez(HypotheticalDocumentEmbedder._chain_typey   s    r!   )return	List[str])r(   rY   rX   List[List[float]])r5   rZ   rX   List[float])r#   strrX   r[   )N)rE   zDict[str, Any]rF   z$Optional[CallbackManagerForChainRun]rX   zDict[str, str])NN)rO   r
   r   r	   rP   zOptional[str]rQ   zOptional[BasePromptTemplate]rR   r   rX   r   )rX   r\   )__name__
__module____qualname____doc____annotations__r   model_configpropertyr    r%   r'   r9   r?   rH   classmethodrT   rW   rK   r!   r   r   r      s    
   $L
 K K  ;V&	3 =A	
	
 :	
 
		
 
 %)6:SS $S "	S
 4S S 
&S S,  r!   r   )!r`   
__future__r   loggingtypingr   r   r   r   langchain_core.callbacksr   langchain_core.embeddingsr	   langchain_core.language_modelsr
   langchain_core.output_parsersr   langchain_core.promptsr   langchain_core.runnablesr   pydanticr   langchain.chains.baser   langchain.chains.hyde.promptsr   langchain.chains.llmr   	getLoggerr]   r0   r   rK   r!   r   <module>rs      sX   
 #  , , ? 0 < 9 5 -  ' 4 )			8	$a5* ar!   