
    7|hw#                        d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ er
d dlZd dlmZ ddZ G d	 d
e      ZddZ G d de      ZeZy)    )annotationsN)Path)TYPE_CHECKINGDictListOptionalUnion)Document)
BaseLoader)
EntityLikec                6    | d   }| d   }| d   }| d| d| dS )zBCombine message information in a readable format ready to be used.datefromtextz on z: 

 )rowr   senderr   s       l/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/telegram.pyconcatenate_rowsr      s6    v;D[Fv;DXT$r$t,,    c                       e Zd ZdZddZddZy)TelegramChatFileLoaderzLoad from `Telegram chat` dump.c                    || _         y)zInitialize with a path.N)	file_path)selfpaths     r   __init__zTelegramChatFileLoader.__init__   s	    r   c                   t        | j                        }t        |d      5 }t        j                  |      }ddd       dj                  d d   D              }dt        |      i}t        ||      gS # 1 sw Y   ?xY w)	Load documents.utf8encodingN c              3  h   K   | ]*  }|d    dk(  r t        |d   t              rt        |       , yw)typemessager   N)
isinstancestrr   ).0r'   s     r   	<genexpr>z.TelegramChatFileLoader.load.<locals>.<genexpr>'   s6      
v)+
76?C0P W%
s   02messagessourcepage_contentmetadata)r   r   openjsonloadjoinr)   r
   )r   pfdr   r0   s         r   r3   zTelegramChatFileLoader.load    s     !f% 			!A	 ww 
Z=
 

 c!f%dX>??	 	s   A77B N)r   zUnion[str, Path]returnList[Document])__name__
__module____qualname____doc__r   r3   r   r   r   r   r      s    )@r   r   c                   ddl m}  |dg dd      }t        | t              r| g} | D cg c]  }t	        |       }}t        |      D ]  \  }}|dz   |j                  d	<    g }|D ]  }|j                  |j                        }t        |      D ]a  \  }}	t	        |	|j                  d	   |d
      }|j                  d	    d|j                  d    |j                  d<   |j                  |       c  |S c c}w )zIConvert a string or list of strings to a list of Documents with metadata.r   )RecursiveCharacterTextSplitteri   )r   
.!?, r$      )
chunk_size
separatorschunk_overlap)r/      page)rL   chunkr.   -rM   r-   )
langchain_text_splittersr@   r(   r)   r
   	enumerater0   
split_textr/   append)
r   r@   text_splitterrL   	page_docsidoc
doc_chunkschunksrM   s
             r   text_to_docsrY   1   s   G2>M $v9=>t,>I> I& %3 1uV% J #))#*:*:;!&) 	#HAu"cll66JUV-WC ),V(<'=Qs||G?T>U%VCLL"c"	## % ?s   C:c                  b    e Zd ZdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 d	dZd
dZddZ	 	 	 	 	 	 ddZddZy)TelegramChatApiLoaderz)Load `Telegram` chat json directory dump.Nc                J    || _         || _        || _        || _        || _        y)aI  Initialize with API parameters.

        Args:
            chat_entity: The chat entity to fetch data from.
            api_id: The API ID.
            api_hash: The API hash.
            username: The username.
            file_path: The file path to save the data to. Defaults to
                 "telegram_data.json".
        N)chat_entityapi_idapi_hashusernamer   )r   r]   r^   r_   r`   r   s         r   r   zTelegramChatApiLoader.__init__V   s(    $ '  "r   c                  K   ddl m} g } || j                  | j                  | j                        4 d{   }|j                  | j                        2 3 d{   }|j                  du}|r|j                  j                  nd}|j                  |j                  |j                  |j                  j                         |j                  ||d       7 7 }6 ddd      d{  7   n# 1 d{  7  sw Y   nxY wt        | j                   dd      5 }t#        j$                  ||dd	
       ddd       y# 1 sw Y   yxY ww)z8Fetch data from Telegram API and save it as a JSON file.r   )TelegramClientN)	sender_idr   r   
message.idis_replyreply_to_idwzutf-8r"   F   )ensure_asciiindent)telethon.syncrb   r`   r^   r_   iter_messagesr]   reply_toreply_to_msg_idrR   rc   r   r   	isoformatidr1   r   r2   dump)r   rb   dataclientr'   re   rf   r6   s           r   fetch_data_from_telegramz.TelegramChatApiLoader.fetch_data_from_telegramn   s    0!$--dmmL 	 	PV!'!5!5d6F6F!G  g"++47BJg..>>PT%,%6%6 ' ' 6 6 8&-jj$,'2			!G	 	 	 	 	 $..#8 	=AIIdAE!<	= 	= 	=s   5E
CE
C.CCCA8C.E
CC.E
'C*(E
.D 4C75D <E
D>5	E
>EE
c           	         dfd||d       }||d      j                  dg      }|d   j                  t              |d<   |d   D ci c]  }||g ||      z    }}|S c c}w )a
  Create a dictionary of message threads from the given data.

        Args:
            data (pd.DataFrame): A DataFrame containing the conversation                 data with columns:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            dict: A dictionary where the key is the parent message ID and                 the value is a list of message IDs in ascending order.
        c                r    ||d   | k(     d   j                         }g }|D ]  }||g ||      z   z  } |S )a^  
            Recursively find all replies to a given parent message ID.

            Args:
                parent_id (int): The parent message ID.
                reply_data (pd.DataFrame): A DataFrame containing reply messages.

            Returns:
                list: A list of message IDs that are replies to the parent message ID.
            rf   rd   )tolist)	parent_id
reply_datadirect_repliesall_repliesreply_idfind_repliess        r   r}   z@TelegramChatApiLoader._get_message_threads.<locals>.find_replies   sc     (
=(AY(NOfh 
 K* OzL:,NNNO r   re   rf   )subsetrd   )rx   intry   pd.DataFramer9   z	List[int])dropnaastyper   )r   rr   parent_messagesreply_messagesrx   message_threadsr}   s         @r   _get_message_threadsz*TelegramChatApiLoader._get_message_threads   s    $	0 Z 001 d:./66}o6N )7}(E(L(LS(Q}% -\:
 	{\)^%LLL
 

 
s   A%c                .   d}|j                         D ]l  \  }}||d   j                  |         j                  d      d   j                         }|D cg c]  }t	        |       }}|dj                  |      dz   z  }n |j                         S c c}w )aw  
        Combine the message texts for each parent message ID based             on the list of message threads.

        Args:
            message_threads (dict): A dictionary where the key is the parent message                 ID and the value is a list of message IDs in ascending order.
            data (pd.DataFrame): A DataFrame containing the conversation data:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            str: A combined string of message texts sorted by date.
        r$   rd   r   )byr   rF   z.
)itemsisinsort_valuesrw   r)   r4   strip)r   r   rr   combined_textrx   message_idsmessage_textselems           r   _combine_message_textsz,TelegramChatApiLoader._combine_message_texts   s    *  '6&;&;&= 
	="I{ T,',,[9:'0 
 4AA4SYAMA SXXm4u<<M
	= ""$$ Bs   Bc                0   | j                   8	 ddl}|j                          t        j                  | j                                t        | j                        }t        |d      5 }t        j                  |      }ddd       	 ddl} |j                        } |j                  |      }| j                  |      }| j!                  ||      }	t#        |	      S # t        $ r t        d      w xY w# 1 sw Y   xxY w# t        $ r t        d      w xY w)r    Nr   zy`nest_asyncio` package not found.
                    please install with `pip install nest_asyncio`
                    r!   r"   zf`pandas` package not found. 
                please install with `pip install pandas`
                )r]   nest_asyncioapplyasynciorunrt   ImportErrorr   r   r1   r2   r3   pandasjson_normalize	DataFramer   r   rY   )
r   r   r5   r6   r7   pdnormalized_messagesdfr   combined_textss
             r   r3   zTelegramChatApiLoader.load   s    '
#""$D99;<  !f% 			!A		 0b//2R\\-.33B744_bIN++3  ! 	 	  	 	s#   7C 'C4D  C14C= D)NNNNztelegram_data.json)
r]   zOptional[EntityLike]r^   zOptional[int]r_   Optional[str]r`   r   r   r)   )r9   None)rr   r   r9   dict)r   zDict[int, List[int]]rr   r   r9   r)   r8   )	r;   r<   r=   r>   r   rt   r   r   r3   r   r   r   r[   r[   S   sy    3 -1 $"&"&-#)# #  	#
  # #0=.9v$%3$%;G$%	$%L",r   r[   )r   r   r9   r)   )r   zUnion[str, List[str]]r9   r:   )
__future__r   r   r2   pathlibr   typingr   r   r   r   r	   langchain_core.documentsr
   )langchain_community.document_loaders.baser   r   r   telethon.hintsr   r   r   rY   r[   TelegramChatLoaderr   r   r   <module>r      sV    "    = = - @)-@Z @0Du,J u,r , r   