
    7|h                         d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
 dZedz   Zedz   Zedz   Z e j                  e j                   	        e j"                  e      Z G d
 de
      Zy)    N)AnyDictListOptional)Document)
BaseLoaderzhttps://api.notion.com/v1z/databases/{database_id}/queryz/pages/{page_id}z/blocks/{block_id}/children)levelc                   "   e Zd ZdZ	 ddddededee   deeeef      ddf
d	Z	de
e   fd
Zddifdeeef   de
eeef      fdZdeeef   defdZddededefdZdi fdddededeeef   deeeef      def
dZde
eeef      defdZy)NotionDBLoaderaY  Load from `Notion DB`.

    Reads content from pages within a Notion Database.
    Args:
        integration_token (str): Notion integration token.
        database_id (str): Notion database id.
        request_timeout_sec (int): Timeout for Notion requests in seconds.
            Defaults to 10.
        filter_object (Dict[str, Any]): Filter object used to limit returned
            entries based on specified criteria.
            E.g.: {
                "timestamp": "last_edited_time",
                "last_edited_time": {
                    "on_or_after": "2024-02-07"
                }
            } -> will only return entries that were last edited
                on or after 2024-02-07
            Notion docs: https://developers.notion.com/reference/post-database-query-filter
            Defaults to None, which will return ALL entries.
    N)filter_objectintegration_tokendatabase_idrequest_timeout_secr   returnc                    |st        d      |st        d      || _        || _        d| j                  z   ddd| _        || _        |xs i | _        y)zInitialize with parameters.z"integration_token must be providedzdatabase_id must be providedzBearer zapplication/jsonz
2022-06-28)AuthorizationzContent-TypezNotion-VersionN)
ValueErrortokenr   headersr   r   )selfr   r   r   r   s        l/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/notiondb.py__init__zNotionDBLoader.__init__)   sc     !ABB;<<&
&&3.*

 $7 *0b    c                 L      j                         }t         fd|D              S )zqLoad documents from the Notion database.
        Returns:
            List[Document]: List of documents.
        c              3   @   K   | ]  }j                  |        y w)N)	load_page).0page_summaryr   s     r   	<genexpr>z&NotionDBLoader.load.<locals>.<genexpr>G   s     T\DNN<0Ts   )_retrieve_page_summarieslist)r   page_summariess   ` r   loadzNotionDBLoader.loadA   s$    
 668T^TTTr   	page_sized   
query_dictc                    g }	 | j                  t        j                  | j                        d|| j                        }|j                  |j                  d             |j                  d      s	 |S |j                  d      |d<   )zi
        Get all the pages from a Notion database
        OR filter based on specified criteria.
        )r   POST)methodr&   r   resultshas_morenext_cursorstart_cursor)_requestDATABASE_URLformatr   r   extendget)r   r&   pagesdatas       r   r    z'NotionDBLoader._retrieve_page_summariesI   s     ')==##0@0@#A%"00	 ! D LL),-88J'  *.-)@J~& r   r   c                 |   |d   }i }|d   j                         D ]y  \  }}|d   }|dk(  r| j                  |d         }n@|dk(  r| j                  |d         }n%|dk(  r|d   r|d   D cg c]  }|d   	 c}ng }n|dk(  r|d   }n|d	k(  r|d	   r|d	   d
    d|d	   d    nd}n|dk(  r|d   r|d   d   nd}n|dk(  rNg }|d   r|d   D ]>  }|j                  d      }	|	st        j	                  d|        |j                  |	       @ nn|dk(  r|d   r|d   nd}n\|dk(  r|d   r|d   nd}nJ|dk(  r|d   r|d   nd}n8|dk(  r|d   }n-|dk(  r|d   }n"|dk(  r|d   }n|dk(  r|d   r|d   d   nd}nd}|||j                         <   | ||d<   t        | j                  |      |      S c c}w )z\Read a page.

        Args:
            page_summary: Page summary from Notion API.
        id
propertiestype	rich_texttitlemulti_selectnameurl	unique_idprefix-numberNstatuspeoplez-Missing 'name' in 'people' property for page datelast_edited_timecreated_timecheckboxemailselect)page_contentmetadata)	items_concatenate_rich_textr2   loggerwarningappendlowerr   _load_blocks)
r   r   page_idrK   	prop_name	prop_data	prop_typevalueitemr<   s
             r   r   zNotionDBLoader.load_pagec   s    t$ $& %1$>$D$D$F 7	0 Iy!&)IK'33Ik4JKg%33Ig4FGn, !0 /8.GHdT&\H 
 e#!%(k) !- !-h78)K:PQY:Z9[\ 
 h&7@7J	(+F3PTh&X& )( 3 +#xx/#"NN!,,39!6 T*+ f$-6v->	&)D00 !!34 01 
 n,5>~5N	.1TXj(!*-g%!'*h&!(+h&7@7J	(+F3PT*/HY__&'o7	0r !T%6%6w%?(SSe Is   .F9block_idnum_tabsc                    g }|}|r| j                  t        j                  |            }|d   D ]  }||d      }d|vrg }|d   D ]$  }	d|	v s|j                  d|z  |	d   d   z          & |d   r*| j	                  |d	   |d
z         }
|j                  |
       |j                  dj                  |              |j                  d      }|rdj                  |      S )zRead a block and its children.)rY   r*   r8   r9   text	contenthas_childrenr6      )rZ   
r,   )r.   	BLOCK_URLr0   rP   rR   joinr2   )r   rY   rZ   result_lines_arrcur_block_idr4   result
result_objcur_result_text_arrr9   children_texts              r   rR   zNotionDBLoader._load_blocks   s   &($==!1!1<!1!HIDy/ H#F6N3
j013#!+K!8 I*+22 8Oi.?	.JJ .)$($5$5tx!| %6 %M (..}= ''		2E(FG)H,  88M2L3 6 yy)**r   GETr=   r)   c                    |j                         }|r||d<   t        j                  ||| j                  || j                        }|j                          |j                         S )Nfilter)r   jsontimeout)copyrequestsrequestr   r   raise_for_statusrm   )r   r=   r)   r&   r   json_payloadress          r   r.   zNotionDBLoader._request   s`     "(%2L"LL,,
 	xxzr   rich_text_arrayc                 2    dj                  d |D              S )z4Concatenate all text content from a rich_text array. c              3   &   K   | ]	  }|d      yw)
plain_textN )r   rX   s     r   r   z8NotionDBLoader._concatenate_rich_text.<locals>.<genexpr>   s     FdtL)Fs   )rc   )r   ru   s     r   rM   z%NotionDBLoader._concatenate_rich_text   s    wwFoFFFr   )
   )r   )__name__
__module____qualname____doc__strr   intr   r   r   r   r   r#   r    r   rR   r.   rM   rz   r   r   r   r      s^   2 .0	1 3711 1 &c]	1  S#X/1 
10Ud8n U -8+=sCx.	d38n	4HTd38n HT HTT +S  +C  +  +J %'	 37  cN	  S#X/ 
*Gd4S>6J Gs Gr   r   )loggingtypingr   r   r   r   rp   langchain_core.documentsr   )langchain_community.document_loaders.baser   NOTION_BASE_URLr/   PAGE_URLrb   basicConfigWARNING	getLoggerr|   rN   r   rz   r   r   <module>r      su     , ,  - @-!AA//;;	   '// *			8	$SGZ SGr   