
    7|h                     B    d dl mZmZ d dlmZ d dlmZ  G d de      Zy)    )AnyList)Document)WebBaseLoaderc                   N    e Zd ZdZdee   fdZdedee   fdZdedee   fdZ	y)	HNLoaderz_Load `Hacker News` data.

    It loads data from either main page results or the comments page.returnc                     | j                         }d| j                  v r| j                  |      S | j                  |      S )a  Get important HN webpage information.

        HN webpage components are:
            - title
            - content
            - source url,
            - time of post
            - author of the post
            - number of comments
            - rank of the post
        item)scrapeweb_pathload_commentsload_results)self	soup_infos     f/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/hn.pyloadzHNLoader.load   s=     KKM	T]]"%%i00$$Y//    r   c           	          |j                  d      }|j                  d      j                  d      }|D cg c]4  }t        |j                  j                         | j                  |d      6 c}S c c}w )zLoad comments from a HN post.ztr[class='athing comtr']ztr[id='pagespace']title)sourcer   )page_contentmetadata)select
select_onegetr   textstripr   )r   r   commentsr   comments        r   r   zHNLoader.load_comments   st    ##$>?$$%9:>>wG $

 	 $\\//1$(MMEB
 	
 
s   9A2soupc           	         |j                  d      }g }|D ]  }|j                  d      j                  }|j                  dddi      j                  d      j	                  d      }|j                  dddi      j                  j                         }| j                  |||d}|j                  t        ||||	              |S )
zLoad items from an HN page.ztr[class='athing']zspan[class='rank']spanclass	titlelineahref)r   r   linkranking)r   r(   r)   r   )	r   r   r   findr   r   r   appendr   )	r   r!   items	documentslineItemr)   r(   r   r   s	            r   r   zHNLoader.load_results+   s    01	 	H))*>?DDG==';)?@EEcJNNvVDMM&7K*@AFFLLNE--"	H !&T7X	 r   N)
__name__
__module____qualname____doc__r   r   r   r   r   r    r   r   r   r      sF    I0d8n 0$

s 

tH~ 

 h r   r   N)typingr   r   langchain_core.documentsr   -langchain_community.document_loaders.web_baser   r   r3   r   r   <module>r7      s     - G6} 6r   