
    7|h=                     >    d Z ddlmZmZmZ ddlmZ  G d de      Zy)z+Load Documents from Docusarus Documentation    )AnyListOptional)SitemapLoaderc                   P     e Zd ZdZ	 d	dedeee      def fdZdedefdZ	 xZ
S )
DocusaurusLoadera  Load from Docusaurus Documentation.

    It leverages the SitemapLoader to loop through the generated pages of a
    Docusaurus Documentation website and extracts the content by looking for specific
    HTML tags. By default, the parser searches for the main content of the Docusaurus
    page, which is normally the <article>. You can also define your own
    custom HTML tags by providing them as a list, for example: ["div", ".main", "a"].
    urlcustom_html_tagskwargsc                     |j                  d      s| d}|xs dg| _        t        |   |fd|j                  d      xs | j                  i| y)aq  Initialize DocusaurusLoader

        Args:
            url: The base URL of the Docusaurus website.
            custom_html_tags: Optional custom html tags to extract content from pages.
            kwargs: Additional args to extend the underlying SitemapLoader, for example:
                filter_urls, blocksize, meta_function, is_local, continue_on_failure
        is_localz/sitemap.xmlzmain articleparsing_functionN)getr
   super__init___parsing_function)selfr	   r
   r   	__class__s       n/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/docusaurus.pyr   zDocusaurusLoader.__init__   sd     zz*%E&C 0 D^4D	
#ZZ(:;Ut?U?U	
 	
    contentreturnc                     |j                  dj                  | j                              }|D ]  }||vs|j                           t	        |j                               S )z0Parses specific elements from a Docusaurus page.,)selectjoinr
   	decomposestrget_text)r   r   relevant_elementselements       r   r   z"DocusaurusLoader._parsing_function+   sZ    #NN388D4I4I+JK( 	$G//!!#	$ 7##%&&r   )N)__name__
__module____qualname____doc__r   r   r   r   r   r   __classcell__)r   s   @r   r   r      sJ     15

 #49-
 	
2' ' 'r   r   N)r%   typingr   r   r   ,langchain_community.document_loaders.sitemapr   r    r   r   <module>r*      s    1 & & F+'} +'r   