
    7|h                     V    d dl mZmZmZmZ d dlmZ d dlmZ d dl	m
Z
  G d de      Zy)    )AnyIteratorListOptional)Document)
BaseLoader)ArxivAPIWrapperc                   R    e Zd ZdZ	 d
dedee   defdZde	e
   fdZdee
   fd	Zy)ArxivLoadera  Load a query result from `Arxiv`.
    The loader converts the original PDF format into the text.

    Setup:
        Install ``arxiv`` and ``PyMuPDF`` packages.
        ``PyMuPDF`` transforms PDF files downloaded from the arxiv.org site
        into the text format.

        .. code-block:: bash

            pip install -U arxiv pymupdf


    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import ArxivLoader

            loader = ArxivLoader(
                query="reasoning",
                # load_max_docs=2,
                # load_all_available_meta=False
            )

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python
            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }


    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }

    Use summaries of articles as docs:
        .. code-block:: python

            from langchain_community.document_loaders import ArxivLoader

            loader = ArxivLoader(
                query="reasoning"
            )

            docs = loader.get_summaries_as_docs()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Pre-trained language models (LMs) are able to perform complex reasoning
            without explicit fine-tuning
            {
                'Entry ID': 'http://arxiv.org/abs/2402.03268v2',
                'Published': datetime.date(2024, 2, 29),
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang'
            }
    Nquerydoc_content_chars_maxkwargsc                 6    || _         t        dd|i|| _        y)a$  Initialize with search query to find documents in the Arxiv.
        Supports all arguments of `ArxivAPIWrapper`.

        Args:
            query: free text which used to find documents in the Arxiv
            doc_content_chars_max: cut limit for the length of a document's content
        r   N )r   r	   client)selfr   r   r   s       i/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/arxiv.py__init__zArxivLoader.__init__   s'     
% 
"7
;A
    returnc              #   j   K   | j                   j                  | j                        E d{    y7 w)zLazy load Arvix documentsN)r   	lazy_loadr   r   s    r   r   zArxivLoader.lazy_load   s"     ;;((444s   )313c                 L    | j                   j                  | j                        S )zBUses papers summaries as documents rather than source Arvix papers)r   get_summaries_as_docsr   r   s    r   r   z!ArxivLoader.get_summaries_as_docs   s    {{00<<r   )N)__name__
__module____qualname____doc__strr   intr   r   r   r   r   r   r   r   r   r   r   r   	   sR    wt BF

19#
QT
 58H- 5=tH~ =r   r   N)typingr   r   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   #langchain_community.utilities.arxivr	   r   r   r   r   <module>r&      s"    0 0 - @ ?P=* P=r   