
    7|hf                        d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ  G d	 d
e      Zy)    )annotationsN)Path)AnyDictIterableListOptional)CallbackManagerForRetrieverRun)Document)BaseRetriever)
ConfigDictc                     e Zd ZU dZdZded<   	 ded<   	 dZded<   	 dZd	ed
<   	  ed      Z	e
	 	 d	 	 	 	 	 	 	 	 	 dd       Ze
dd	 	 	 	 	 	 	 dd       Z	 	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZe
ddd	 	 	 	 	 	 	 dd       Zy)TFIDFRetrieverz`TF-IDF` retriever.

    Largely based on
    https://github.com/asvskartheek/Text-Retrieval/blob/master/TF-IDF%20Search%20Engine%20(SKLEARN).ipynb
    Nr   
vectorizerList[Document]docstfidf_array   intkT)arbitrary_types_allowedc                   	 ddl m} |xs i } |di |}|j                  |      }|xs	 d |D        }t	        ||      D 	cg c]  \  }}	t        ||	       }
}}	 | d||
|d|S # t        $ r t        d      w xY wc c}	}w )Nr   )TfidfVectorizerzNCould not import scikit-learn, please install with `pip install scikit-learn`.c              3      K   | ]  }i   y wN ).0_s     c/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/retrievers/tfidf.py	<genexpr>z,TFIDFRetriever.from_texts.<locals>.<genexpr>4   s     !4"!4s   page_contentmetadatar   r   r   r   )sklearn.feature_extraction.textr   ImportErrorfit_transformzipr   )clstexts	metadatastfidf_paramskwargsr   r   r   tmr   s              r   
from_textszTFIDFRetriever.from_texts!   s    	G $)r$4|4
 ..u54!4e!4	ADUIAVWAa!4WWWjtWPVWW  	! 	 Xs   A) B)A>)r,   c               R    t        d |D         \  }} | j                  d|||d|S )Nc              3  L   K   | ]  }|j                   |j                  f  y wr   r!   )r   ds     r   r    z0TFIDFRetriever.from_documents.<locals>.<genexpr>@   s      Q!!..!**!= Qs   "$)r*   r,   r+   r   )r(   r0   )r)   	documentsr,   r-   r*   r+   s         r   from_documentszTFIDFRetriever.from_documents8   s@      Qy QRys~~ 
li
KQ
 	
    c                  ddl m} | j                  j                  |g      } || j                  |      j                  d      }|j                         | j                   d  d d d   D cg c]  }| j                  |    }}|S c c}w )Nr   )cosine_similarity)r9   )	sklearn.metrics.pairwiser8   r   	transformr   reshapeargsortr   r   )selfqueryrun_managerr8   	query_vecresultsireturn_docss           r   _get_relevant_documentsz&TFIDFRetriever._get_relevant_documentsE   s     	?OO--G
	 $D$4$4i@HH
 .5__->wy-I$B$-OPtyy|PP Qs   +Btfidf_vectorizerc                d   	 dd l }t        |      }|j                  dd       |j	                  | j
                  || dz         t        || dz  d      5 }t        j                  | j                  | j                  f|       d d d        y # t        $ r t        d      w xY w# 1 sw Y   y xY w)Nr   BCould not import joblib, please install with `pip install joblib`.T)exist_okparents.joblib.pklwb)
joblibr&   r   mkdirdumpr   openpickler   r   )r>   folder_path	file_namerN   pathfs         r   
save_localzTFIDFRetriever.save_localS   s    
	 K 

D$
/ 	DOOTyk,A%AB $I;d++T2 	:aKKD$4$45q9	: 	:  	T 		: 	:s   B -B&B#&B/F)allow_dangerous_deserializationrT   c               .   	 ddl }|st        d      t        |      }|j	                  || dz        }t        || dz  d      5 }t        j                  |      \  }}	ddd        | |	      S # t        $ r t        d      w xY w# 1 sw Y   ,xY w)	a  Load the retriever from local storage.

        Args:
            folder_path: Folder path to load from.
            allow_dangerous_deserialization: Whether to allow dangerous deserialization.
                Defaults to False.
                The deserialization relies on .joblib and .pkl files, which can be
                modified to deliver a malicious payload that results in execution of
                arbitrary code on your machine. You will need to set this to `True` to
                use deserialization. If you do this, make sure you trust the source of
                the file.
            file_name: File name to load from. Defaults to "tfidf_vectorizer".

        Returns:
            TFIDFRetriever: Loaded retriever.
        r   NrH   a  The de-serialization of this retriever is based on .joblib and .pkl files.Such files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to load this retriever. If you do this, make sure you trust the source of the file, and you are responsible for validating the file came from a trusted source.rK   rL   rbr$   )rN   r&   
ValueErrorr   loadrQ   rR   )
r)   rS   rX   rT   rN   rU   r   rV   r   r   s
             r   
load_localzTFIDFRetriever.load_locali   s    0	 /.	 	 K  [[9+W(=!=>
 $I;d++T2 	/a !'AD+	/
 jtMM9  	T 	.	/ 	/s   A3 B3BB)NN)
r*   zIterable[str]r+   zOptional[Iterable[dict]]r,   Optional[Dict[str, Any]]r-   r   returnr   )r4   zIterable[Document]r,   r^   r-   r   r_   r   )r?   strr@   r
   r_   r   )rF   )rS   r`   rT   r`   r_   None)rS   r`   rX   boolrT   r`   r_   r   )__name__
__module____qualname____doc__r   __annotations__r   r   r   model_configclassmethodr0   r5   rE   rW   r]   r   r6   r   r   r      sb    J
KAsJ( $L  /315	XX ,X /	X
 X 
X X, 
 26	

%

 /	


 

 


 

*H	" ,:: : 
	:, 
 16+5N5N *.	5N
 5N 
5N 5Nr6   r   )
__future__r   rR   pathlibr   typingr   r   r   r   r	   langchain_core.callbacksr
   langchain_core.documentsr   langchain_core.retrieversr   pydanticr   r   r   r6   r   <module>rq      s0    "   6 6 C - 3 RN] RNr6   