
    ih)                     R    d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	  G d de      Z
y)    )IteratorLiteralOptional)
BaseLoader)Document)get_from_envc            
       V    e Zd ZdZdddddedee   ded   d	ee   fd
Zde	e
   fdZy)SpiderLoaderzLoad web pages as Documents using Spider AI.

    Must have the Python package `spider-client` installed and a Spider API key.
    See https://spider.cloud for more.
    Nscrape)api_keymodeparamsurlr   r   r   crawlr   c                    |ddd}	 ddl m} |dvrt        d	| d
      |xs t	        dd      } ||      | _         || _        || _        || _        y# t        $ r t        d      w xY w)a  Initialize with API key and URL.

        Args:
            url: The URL to be processed.
            api_key: The Spider API key. If not specified, will be read from env
            var `SPIDER_API_KEY`.
            mode: The mode to run the loader in. Default is "scrape".
                 Options include "scrape" (single page) and "crawl" (with deeper
                 crawling following subpages).
            params: Additional parameters for the Spider API.
        NmarkdownT)return_formatmetadatar   )SpiderzB`spider` package not found, please run `pip install spider-client`r   zUnrecognized mode 'z%'. Expected one of 'scrape', 'crawl'.r   SPIDER_API_KEY)r   )spiderr   ImportError
ValueErrorr   r   r   r   )selfr   r   r   r   r   s         i/var/www/html/dev/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/spider.py__init__zSpiderLoader.__init__   s    & >!+ F
	%
 **%dV+PQ 
 F\)5EFW-	  	T 	s   A A+returnc              #     K   g }| j                   dk(  rE| j                  j                  | j                  | j                        }|re|j                  |       nS| j                   dk(  rD| j                  j                  | j                  | j                        }|r|j                  |       |D ]  }| j                   dk(  r;|d   j                  dd      }|d   j                  di       }|t        ||	       | j                   dk(  s]|j                  dd      }|j                  di       }|t        ||	        yw)
z+Load documents based on the specified mode.r   )r   r   r   content r   N)page_contentr   )
r   r   
scrape_urlr   r   append	crawl_urlextendgetr   )r   spider_docsresponsedocr"   r   s         r   	lazy_loadzSpiderLoader.lazy_load:   s&    99 {{--dhht{{-KH""8,YY'!{{,,TXXdkk,JH""8, 	CyyH$"1vzz)R8 q6::j"5+"xPPyyG#"wwy"5 77:r2+"%1!) %	s   DE&E2E)__name__
__module____qualname____doc__strr   r   dictr   r   r   r+        r   r
   r
      s]     "&+3!%)) #	)
 '() )V$8H- $r3   r
   N)typingr   r   r   langchain_core.document_loadersr   langchain_core.documentsr   langchain_core.utilsr   r
   r2   r3   r   <module>r8      s"    . . 6 - -V: Vr3   