
    7|h'                         d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
 ddlZddlZddlZddlmZ ddlmZ ddlmZmZmZ dgZ G d	 dee      Z G d
 d      Zy)z-written under MIT Licence, Michael Feil 2023.    N)ThreadPoolExecutor)AnyCallableDictListOptionalTuple)
Embeddings)get_from_dict_or_env)	BaseModel
ConfigDictmodel_validatorInfinityEmbeddingsc                       e Zd ZU dZeed<   	 dZeed<   	 dZeed<   	  e	d      Z
 ed	
      ededefd              Zdee   deee      fdZdee   deee      fdZdedee   fdZdedee   fdZy)r   aB  Self-hosted embedding models for `infinity` package.

    See https://github.com/michaelfeil/infinity
    This also works for text-embeddings-inference and other
    self-hosted openai-compatible servers.

    Infinity is a package to interact with Embedding Models on https://github.com/michaelfeil/infinity


    Example:
        .. code-block:: python

            from langchain_community.embeddings import InfinityEmbeddings
            InfinityEmbeddings(
                model="BAAI/bge-small",
                infinity_api_url="http://localhost:7997",
            )
    modelzhttp://localhost:7997infinity_api_urlNclientforbid)extrabefore)modevaluesreturnc                 J    t        |dd      |d<   t        |d         |d<   |S )z?Validate that api key and python package exists in environment.r   INFINITY_API_URL)hostr   )r   &TinyAsyncOpenAIInfinityEmbeddingClient)clsr   s     f/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/embeddings/infinity.pyvalidate_environmentz'InfinityEmbeddings.validate_environment3   s?    
 &:&(:&
!" B*+
x     textsc                 T    | j                   j                  | j                  |      }|S )zCall out to Infinity's embedding endpoint.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        r   r"   )r   embedr   selfr"   
embeddingss      r   embed_documentsz"InfinityEmbeddings.embed_documentsA   s/     [[&&** ' 

 r!   c                 p   K   | j                   j                  | j                  |       d{   }|S 7 w)zAsync call out to Infinity's embedding endpoint.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        r$   N)r   aembedr   r&   s      r   aembed_documentsz#InfinityEmbeddings.aembed_documentsP   s=       ;;--** . 
 

 	
s   +646textc                 ,    | j                  |g      d   S )zCall out to Infinity's embedding endpoint.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        r   )r)   )r'   r-   s     r   embed_queryzInfinityEmbeddings.embed_query_   s     ##TF+A..r!   c                 L   K   | j                  |g       d{   }|d   S 7 	w)zAsync call out to Infinity's embedding endpoint.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        Nr   )r,   )r'   r-   r(   s      r   aembed_queryzInfinityEmbeddings.aembed_queryj   s,       00$88
!} 9s   $"
$)__name__
__module____qualname____doc__str__annotations__r   r   r   r   model_configr   classmethodr   r    r   floatr)   r,   r/   r1    r!   r   r   r      s    & J#3c3FC L (#
$ 
3 
  $
T#Y 4U3D DI $tE{:K 	/ 	/U 	/
s 
tE{ 
r!   c            
          e Zd ZdZ	 	 ddedeej                     ddfdZe	e
fdee   dedeee   ef   fd	       Zdee   deee      fd
Ze	deee      dee   fd       Zdedee   deeef   fdZdedee   deee      fdZdedee   deee      fdZdej                  deeef   deee      fdZdedee   deee      fdZy)r   a
  Helper tool to embed Infinity.

    It is not a part of Langchain's stable API,
    direct use discouraged.

    Example:
        .. code-block:: python


            mini_client = TinyAsyncInfinityEmbeddingClient(
            )
            embeds = mini_client.embed(
                model="BAAI/bge-small",
                text=["doc1", "doc2"]
            )
            # or
            embeds = await mini_client.aembed(
                model="BAAI/bge-small",
                text=["doc1", "doc2"]
            )

    Nr   
aiosessionr   c                     || _         || _        | j                   t        | j                         dk  rt        d      d| _        y )N   z( param `host` must be set to a valid url   )r   r=   len
ValueError_batch_size)r'   r   r=   s      r   __init__z/TinyAsyncOpenAIInfinityEmbeddingClient.__init__   s?    
 	$99DII 2GHHr!   r"   sorterc                     t        |       dk(  r| d fS t        j                  | D cg c]  } ||        c}      D cg c]  }| |   	 }}|fdfS c c}w c c}w )a  Sort texts in ascending order, and
        delivers a lambda expr, which can sort a same length list
        https://github.com/UKPLab/sentence-transformers/blob/
        c5f93f70eca933c78695c5bc686ceda59651ae3b/sentence_transformers/SentenceTransformer.py#L156

        Args:
            texts (List[str]): _description_
            sorter (Callable, optional): _description_. Defaults to len.

        Returns:
            Tuple[List[str], Callable]: _description_

        Example:
            ```
            texts = ["one","three","four"]
            perm_texts, undo = self._permute(texts)
            texts == undo(perm_texts)
            ```
           c                     | S Nr;   )ts    r   <lambda>zATinyAsyncOpenAIInfinityEmbeddingClient._permute.<locals>.<lambda>   s    A r!   c                 Z    t        j                        D cg c]  }| |   	 c}S c c}w rI   )npargsort)unsorted_embeddingsidxlength_sorted_idxs     r   rK   zATinyAsyncOpenAIInfinityEmbeddingClient._permute.<locals>.<lambda>   s-    02

;L0M:
),$:
  :
s   ()rA   rM   rN   )r"   rE   senrP   texts_sortedrQ   s        @r   _permutez/TinyAsyncOpenAIInfinityEmbeddingClient._permute   sm    0 u:?+%%JJ'F'FG.?@sc
@@ 
 
 	
 (G@s   AAc                     t        |      dk(  r|gS g }t        dt        |      | j                        D ]#  }|j                  |||| j                  z           % |S )aX  
        splits Lists of text parts into batches of size max `self._batch_size`
        When encoding vector database,

        Args:
            texts (List[str]): List of sentences
            self._batch_size (int, optional): max batch size of one request.

        Returns:
            List[List[str]]: Batches of List of sentences
        rG   r   )rA   rangerC   append)r'   r"   batchesstart_indexs       r   _batchz-TinyAsyncOpenAIInfinityEmbeddingClient._batch   sb     u:?7N CJ0@0@A 	PKNN5{T=M=M/MNO	Pr!   batch_of_textsc                     t        |       dk(  rt        | d         dk(  r| d   S g }| D ]  }|j                  |        |S )NrG   r   )rA   extend)r[   r"   sublists      r   _unbatchz/TinyAsyncOpenAIInfinityEmbeddingClient._unbatch   sP    ~!#N1,=(>!(C!!$$% 	"GLL!	"r!   r   c                 R    t        | j                   dddit        ||            S )zBuild the kwargs for the Post request, used by sync

        Args:
            model (str): _description_
            texts (List[str]): _description_

        Returns:
            Dict[str, Collection[str]]: _description_
        z/embeddingszcontent-typezapplication/json)inputr   )urlheadersjson)dictr   )r'   r   r"   s      r   _kwargs_post_requestz;TinyAsyncOpenAIInfinityEmbeddingClient._kwargs_post_request   s<     99+[)  2 

 
	
r!   batch_textsc                    t        j                  di | j                  ||      }|j                  dk7  r%t	        d|j                   d|j
                         |j                         d   D cg c]  }|d   	 c}S c c}w )Nr$      5Infinity returned an unexpected response with status : data	embeddingr;   )requestspostrf   status_code	Exceptionr-   rd   )r'   r   rg   responsees        r   _sync_request_embedz:TinyAsyncOpenAIInfinityEmbeddingClient._sync_request_embed   s     == 
''e;'G
 3&G''(8==/;  )1(?@1+@@@s   0A?c                 j   | j                  |      \  }}| j                  |      }| j                  |gt        |      z  |f}t        |      dk(  rt	        t        |       }n,t        d      5 }t	         |j
                  |       }ddd       | j                        }	 ||	      }
|
S # 1 sw Y   $xY w)zcall the embedding of model

        Args:
            model (str): to embedding model
            texts (List[str]): List of sentences to embed.

        Returns:
            List[List[float]]: List of vectors for each sentence
        rG       N)rT   rZ   rt   rA   listmapr   r_   )r'   r   r"   
perm_textsunpermute_funcperm_texts_batchedmap_argsembeddings_batch_permpembeddings_permr(   s              r   r%   z,TinyAsyncOpenAIInfinityEmbeddingClient.embed   s     &*]]5%9"
N![[4 $$Gc,--

 !"a'$(h$8!#B' ?1(,UQUUH-=(>%? --(=>#O4
? ?s   -B))B2sessionkwargsc                 f  K    |j                   di |4 d {   }|j                  dk7  r%t        d|j                   d|j                         |j	                          d {   d   }|D cg c]  }|d   	 c}cd d d       d {    S 7 w7 -c c}w 7 # 1 d {  7  sw Y   y xY ww)Nri   rj   rk   rl   rm   r;   )ro   statusrq   r-   rd   )r'   r   r   rr   rm   rs   s         r   _async_requestz5TinyAsyncOpenAIInfinityEmbeddingClient._async_request  s       7<<)&) 	7 	7X#%K'r(--:   (}}.7I,56qAkN6	7 	7 	7 /6	7 	7 	7 	7si   B1BB1AB%B&B1B=B?B1BB1BBB1B."B%#B.*B1c                   K   | j                  |      \  }}| j                  |      }t        j                  dt        j                  d            4 d{   }t        j                  |D cg c]&  }| j                  || j                  ||            ( c}  d{   }ddd      d{    | j                        }	 ||	      }
|
S 7 yc c}w 7 67 (# 1 d{  7  sw Y   8xY ww)zcall the embedding of model, async method

        Args:
            model (str): to embedding model
            texts (List[str]): List of sentences to embed.

        Returns:
            List[List[float]]: List of vectors for each sentence
        Trv   )limit)	trust_env	connectorNr$   )r   r   )
rT   rZ   aiohttpClientSessionTCPConnectorasynciogatherr   rf   r_   )r'   r   r"   ry   rz   r{   r   rJ   r}   r   r(   s              r   r+   z-TinyAsyncOpenAIInfinityEmbeddingClient.aembed'  s      &*]]5%9"
N![[4 ((g&:&:&D
 	 	*1.. 0
 	 '' '#88uA8N ( + %!	 	 --(=>#O4
	%	 	 	 	sf   AC1CC1C/+C
C!C"C&C11C2 C1CC1C."C%#C.*C1)zhttp://localhost:7797/v1N)r2   r3   r4   r5   r6   r   r   r   rD   staticmethodrA   r   r   r	   rT   rZ   r   r_   r   rf   r:   rt   r%   r   r+   r;   r!   r   r   r   w   s   2 /6:

 W223
 
	
 -0
Cy
"*
	tCy("	#
 
BDI $tCy/ ( d3i T#Y  
# 
d3i 
DcN 
,AA'+CyA	d5k	A3 tCy T$u+5F :
7,,
76:38n
7	d5k	
7# d3i De<M r!   r   )r5   r   concurrent.futuresr   typingr   r   r   r   r   r	   r   numpyrM   rn   langchain_core.embeddingsr
   langchain_core.utilsr   pydanticr   r   r   __all__r   r   r;   r!   r   <module>r      sN    3  1 = =    0 5 ; ;
 cJ cLM Mr!   