
    7|h                         d dl mZ d dlmZmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ  G d dee      Z G d de      Zy)    )Enum)AnyDictListOptionalUnionN)CallbackManagerForRetrieverRun)Document)
Embeddings)BaseRetriever)
get_fields)
ConfigDict)maximal_marginal_relevancec                       e Zd ZdZdZdZy)
SearchTypez-Enumerator of the types of search to perform.
similaritymmrN)__name__
__module____qualname____doc__r   r        f/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/retrievers/docarray.pyr   r      s    7J
Cr   r   c            
       h   e Zd ZU dZdZeed<   eed<   eed<   eed<   e	j                  Ze	ed<   dZeed	<   dZee   ed
<    ed      Zdededee   fdZdej.                  d	edeeeeef   ef      fdZdej.                  dee   fdZdej.                  dee   fdZdeeeef   ef   defdZy)DocArrayRetrievera  `DocArray Document Indices` retriever.

    Currently, it supports 5 backends:
    InMemoryExactNNIndex, HnswDocumentIndex, QdrantDocumentIndex,
    ElasticDocIndex, and WeaviateDocumentIndex.

    Args:
        index: One of the above-mentioned index instances
        embeddings: Embedding model to represent text as vectors
        search_field: Field to consider for searching in the documents.
            Should be an embedding/vector/tensor.
        content_field: Field that represents the main content in your document schema.
            Will be used as a `page_content`. Everything else will go into `metadata`.
        search_type: Type of search to perform (similarity / mmr)
        filters: Filters applied for document retrieval.
        top_k: Number of documents to return
    Nindex
embeddingssearch_fieldcontent_fieldsearch_type   top_kfiltersT)arbitrary_types_allowedqueryrun_managerreturnc                P   t        j                  | j                  j                  |            }| j                  t
        j                  k(  r| j                  |      }|S | j                  t
        j                  k(  r| j                  |      }|S t        d| j                   d      )zGet documents relevant for a query.

        Args:
            query: string to find relevant documents for

        Returns:
            List of relevant documents
        zSearch type z5 does not exist. Choose either 'similarity' or 'mmr'.)nparrayr   embed_queryr!   r   r   _similarity_searchr   _mmr_search
ValueError)selfr&   r'   	query_embresultss        r   _get_relevant_documentsz)DocArrayRetriever._get_relevant_documents5   s     HHT__88?@	z444--i8G  /&&y1G  t//0 17 8 r   r1   c                 ^   ddl m}m} i }| j                  }t	        | j
                  |      r| j                  |d<   d}n5t	        | j
                  |      r| j                  |d<   n| j                  |d<   | j                  r | j
                  j                         j                  ||      j                  di |j                  |      }| j
                  j                  |      }t        |d	      r|j                  }|d
| }|S | j
                  j                  |||      j                  }|S )a  
        Perform a search using the query embedding and return top_k documents.

        Args:
            query_emb: Query represented as an embedding
            top_k: Number of documents to return

        Returns:
            A list of top_k documents matching the query
        r   )ElasticDocIndexWeaviateDocumentIndexwhere_filter r&   filter_query)r&   r   )limit	documentsN)r&   r   r:   r   )docarray.indexr5   r6   r   
isinstancer   r$   build_queryfindfilterbuildexecute_queryhasattrr;   )	r0   r1   r#   r5   r6   filter_argsr   r&   docss	            r   _searchzDocArrayRetriever._searchQ   s&    	J((djj"78*.,,K'L

O4#'<<K *.,,K'<<

&&(#,   	' &	'
 U#  ::++E2Dt[)~~<D
  ::??l% # i  r   c                     | j                  || j                        }|D cg c]  }| j                  |       }}|S c c}w )z
        Perform a similarity search.

        Args:
            query_emb: Query represented as an embedding

        Returns:
            A list of documents most similar to the query
        r1   r#   )rF   r#   _docarray_to_langchain_doc)r0   r1   rE   docr2   s        r   r-   z$DocArrayRetriever._similarity_search   sC     ||itzz|BCGHC42237HH Is   >c           
      4   | j                  |d      }t        ||D cg c]7  }t        |t              r|| j                     nt        || j                        9 c}| j                        }|D cg c]  }| j                  ||          }}|S c c}w c c}w )z
        Perform a maximal marginal relevance (mmr) search.

        Args:
            query_emb: Query represented as an embedding

        Returns:
            A list of diverse documents related to the query
           rH   )k)rF   r   r=   dictr   getattrr#   rI   )r0   r1   rE   rJ   mmr_selectedidxr2   s          r   r.   zDocArrayRetriever._mmr_search   s     ||ir|:1
  	  c4( D%%&S$"3"345 jj	
 JVV#42249=VV Ws   <B
1BrJ   c                    t        |t              r|j                         n
t        |      }| j                  |vrt        d| j                   d      t        t        |t              r|| j                     nt        || j                              }|D ]c  }t        |t              r||   nt        ||      }t        |t        t        t        t        f      sE|| j                  k7  sU||j                  |<   e |S )a;  
        Convert a DocArray document (which also might be a dict)
        to a langchain document format.

        DocArray document can contain arbitrary fields, so the mapping is done
        in the following way:

        page_content <-> content_field
        metadata <-> all other fields excluding
            tensors and embeddings (so float, int, string)

        Args:
            doc: DocArray document

        Returns:
            Document in langchain format

        Raises:
            ValueError: If the document doesn't contain the content field
        z.Document does not contain the content field - .)page_content)r=   rN   keysr   r    r/   r
   rO   strintfloatboolmetadata)r0   rJ   fieldslc_docnamevalues         r   rI   z,DocArrayRetriever._docarray_to_langchain_doc   s    ,  *#t4*S/V+@ASAS@TTUV  #t$ T//0d001
  	.D!+C!6CIGC<NE53UD"9:D...(-%	. r   )r   r   r   r   r   r   __annotations__r   rV   r   r   r!   r#   rW   r$   r   r   model_configr	   r   r
   r3   r*   ndarrayr   r   rF   r-   r.   rI   r   r   r   r   r      s   $ E3(33K3E3N!GXc]! $L 4	
 
h8,,,/,	eDcNC'(	),\BJJ 4> RZZ DN 2*eDcNC4G.H *X *r   r   )enumr   typingr   r   r   r   r   numpyr*   langchain_core.callbacksr	   langchain_core.documentsr
   langchain_core.embeddingsr   langchain_core.retrieversr   langchain_core.utils.pydanticr   pydanticr   &langchain_community.vectorstores.utilsr   rV   r   r   r   r   r   <module>rl      sB     3 3  C - 0 3 4  Md z zr   