
    ih	                         d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ  ej                  e      Z G d de      Zy)	zScrapfly Web Reader.    N)IteratorListLiteralOptional)
BaseLoader)Document)get_from_envc                   f    e Zd ZdZddddddee   dee   ded	   d
ee   de	ddfdZ
dee   fdZy)ScrapflyLoaderzTurn a url to llm accessible markdown with `Scrapfly.io`.

    For further details, visit: https://scrapfly.io/docs/sdk/python
    NmarkdownT)api_keyscrape_formatscrape_configcontinue_on_failureurlsr   r   )r   textr   r   returnc                    	 ddl m} |st        d      |xs t	        dd      } ||      | _         || _        || _        || _        || _        y# t        $ r t        d      w xY w)	a  Initialize client.

        Args:
            urls: List of urls to scrape.
            api_key: The Scrapfly API key. If not specified must have env var
                SCRAPFLY_API_KEY set.
            scrape_format: Scrape result format, one or "markdown" or "text".
            scrape_config: Dictionary of ScrapFly scrape config object.
            continue_on_failure: Whether to continue if scraping a url fails.
        r   )ScrapflyClientzC`scrapfly` package not found, please run `pip install scrapfly-sdk`zURLs must be provided.r   SCRAPFLY_API_KEY)keyN)	scrapflyr   ImportError
ValueErrorr	   r   r   r   r   )selfr   r   r   r   r   r   s          k/var/www/html/dev/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/scrapfly.py__init__zScrapflyLoader.__init__   sy    &	/
 566H\)5GH&73	**#6   	U 	s   A A%c              #     K   ddl m} | j                  | j                  ni }| j                  D ]O  }	 | j                   j	                   ||fd| j
                  i|      }t        |j                  d   d|i       Q y # t        $ r4}| j                  rt        j                  d| d|        n|Y d }~d }~ww xY ww)	Nr   )ScrapeConfigformatcontenturl)page_contentmetadatazError fetching data from z, exception: )r   r   r   r   scraper   r   scrape_result	Exceptionr   loggererror)r   r   r   r"   responsees         r   	lazy_loadzScrapflyLoader.lazy_load5   s     ).2.@.@.L**RT99 	C==// QT-?-?Q=Q !)!7!7	!B#S\ 	  ++LL#<SEqc!RSG Ts/   0CAB?C	B?*B:5C:B??C)__name__
__module____qualname____doc__r   strr   r   dictboolr   r   r   r,        r   r   r      sx     "&5?(,$( 73i 7 #	 7
 12 7  ~ 7 " 7 
 7D8H- r5   r   )r0   loggingtypingr   r   r   r   langchain_core.document_loadersr   langchain_core.documentsr   langchain_core.utilsr	   	getLogger__file__r(   r   r4   r5   r   <module>r=      s8      4 4 6 - -			8	$9Z 9r5   