
    7|h                         d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ  e j                  e      Z G d de
      Zy)    N)AnyIteratorListOptionalSequence)Document)
BaseLoader)NewsURLLoaderc                       e Zd ZdZ	 	 	 	 ddeee      dee   dedededdfd	Z	de
e   fd
Zedee   fd       Zdee   fdZy)RSSFeedLoaderaQ  Load news articles from `RSS` feeds using `Unstructured`.

    Args:
        urls: URLs for RSS feeds to load. Each articles in the feed is loaded into its own document.
        opml: OPML file to load feed urls from. Only one of urls or opml should be provided.  The value
        can be a URL string, or OPML markup contents as byte or string.
        continue_on_failure: If True, continue loading documents even if
            loading fails for a particular URL.
        show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
            tqdm to be installed, ``pip install tqdm``.
        **newsloader_kwargs: Any additional named arguments to pass to
            NewsURLLoader.

    Example:
        .. code-block:: python

            from langchain_community.document_loaders import RSSFeedLoader

            loader = RSSFeedLoader(
                urls=["<url-1>", "<url-2>"],
            )
            docs = loader.load()

    The loader uses feedparser to parse RSS feeds.  The feedparser library is not installed by default so you should
    install it if using this loader:
    https://pythonhosted.org/feedparser/

    If you use OPML, you should also install listparser:
    https://pythonhosted.org/listparser/

    Finally, newspaper is used to process each article:
    https://newspaper.readthedocs.io/en/latest/
    Nurlsopmlcontinue_on_failureshow_progress_barnewsloader_kwargsreturnc                 r    |du |du k(  rt        d      || _        || _        || _        || _        || _        y)zInitialize with urls or OPML.Nz;Provide either the urls or the opml argument, but not both.)
ValueErrorr   r   r   r   r   )selfr   r   r   r   r   s         g/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/rss.py__init__zRSSFeedLoader.__init__/   sQ     DLDL
 M  		#6 !2!2    c                     | j                         }| j                  r	 ddlm}  ||      }t	        |      S # t        $ r}t        d      |d }~ww xY w)Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   ImportErrorlist)r   iterr   es       r   loadzRSSFeedLoader.loadD   s]    ~~!!% :DDz  !/ 	s   7 	A AAc                     | j                   r| j                   S 	 dd l}|j                  | j                        }|j
                  D cg c]  }|j                   c}S # t        $ r}t        d      |d }~ww xY wc c}w )Nr   zPackage listparser must be installed if the opml arg is used. Please install with 'pip install listparser' or use the urls arg instead.)r   
listparserr   parser   feedsurl)r   r"   r   rssfeeds        r   	_get_urlszRSSFeedLoader._get_urlsR   sw    9999	 tyy)%(YY/T//  	$ 		 0s   A A:	A7&A22A7c              #     K   	 dd l }| j                  D ]  }	 |j                  |      }t	        |dd      rt        d| d|j                         	 	 |j                  D ]J  }t        d
d|j                  gi| j                  }|j                         d   }||j                   d<   | L  y # t        $ r t        d      w xY w# t        $ r3}| j                  r t        j                  d| d|        Y d }~|d }~ww xY w# t        $ r>}| j                  r+t        j                  d	j                   d|        Y d }~2|d }~ww xY ww)Nr   zMfeedparser package not found, please install it with `pip install feedparser`bozoFzError fetching z, exception: r   r'   zError processing entry  )
feedparserr   r(   r#   getattrr   bozo_exception	Exceptionr   loggererrorentriesr
   linkr   r    metadata)r   r,   r%   r'   r   entryloaderarticles           r   r   zRSSFeedLoader.lazy_loada   sq    	 >> 	C!'',4/$)#mD<O<O;PQ  0!\\ "E* #jj\00F %kkmA.G/2G$$V,!M"	  	+ 	  ++LL?3%}QC!HIG  ++LL#:5::,mTUSV!WXGs{   EB/ E9CEAD,E/CE	D'C>7E<C>>DE	E1E EEEE)NNTF)__name__
__module____qualname____doc__r   r   strboolr   r   r   r   r    propertyr(   r   r   r+   r   r   r   r      s     H )-"$("'3x}%3 sm3 "	3
  3 !3 
3*d8n  08C= 0 0$8H- $r   r   )loggingtypingr   r   r   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser	   )langchain_community.document_loaders.newsr
   	getLoggerr8   r0   r   r+   r   r   <module>rE      s7     : : - @ C			8	$yJ yr   