
    7|h1                         d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
  ej                  d      Z ej                  d      Z G d de
      Zy)	    N)ListTuple)Document)
BaseLoaderzBV\w+zav[0-9]+c            	       b    e Zd ZdZ	 	 	 ddee   dededefdZdee   fdZd	ede	ee
f   fd
Zy)BiliBiliLoaderz9
    Load fetching transcripts from BiliBili videos.
    
video_urlssessdatabili_jctbuvid3c                     || _         d| _        	 ddlm} |r |r|r|j                  |||      | _        yyyy# t        $ r t	        d      w xY w)a  
        Initialize the loader with BiliBili video URLs and authentication cookies.
        if no authentication cookies are provided, the loader can't get transcripts
        and will only fetch videos info.

        Args:
            video_urls (List[str]): List of BiliBili video URLs.
            sessdata (str): SESSDATA cookie value for authentication.
            bili_jct (str): BILI_JCT cookie value for authentication.
            buvid3 (str): BUVI3 cookie value for authentication.
        Nr   )videoTrequests package not found, please install it with `pip install bilibili-api-python`)r
   r   r   )r	   
credentialbilibili_apir   ImportError
Credential)selfr	   r
   r   r   r   s         l/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/bilibili.py__init__zBiliBiliLoader.__init__   sm    $ %	* V#..!HV / DO &,8  	4 	s	   9 Areturnc                     g }| j                   D ]4  }| j                  |      \  }}t        ||      }|j                  |       6 |S )z
        Load and return a list of documents containing video transcripts.

        Returns:
            List[Document]: List of Document objects transcripts and metadata.
        )page_contentmetadata)r	   _get_bilibili_subs_and_infor   append)r   resultsurl
transcript
video_infodocs         r   loadzBiliBiliLoader.load5   sP     ?? 	 C%)%E%Ec%J"J

ZHCNN3	 
     r   c                 R   t         j                  |      }	 ddlm}m} |r,|j                  |j                         | j                        }n]t        j                  |      }|r8|j                  t        |j                         dd       | j                        }nt        d|        ||j                               }|j                  d	|i       | j                  sd
|fS  ||j                  |d               }|j                  dg       }	|	r|	d   j                  dd
      }
|
j!                  d      sd|
z   }
t#        j                  |
      }|j$                  dk(  rft'        j(                  |j*                        j                  dg       }dj-                  |D cg c]  }|d   	 c}      }d|d    d|d    d| }||fS t/        j0                  d| d|j$                          d
|fS t/        j0                  d| d       d
|fS # t
        $ r t        d      w xY wc c}w )zU
        Retrieve video information and transcript for a given BiliBili URL.
        r   )syncr   r   )bvidr      N)aidr   z(Unable to find a valid video ID in URL: r    cid	subtitlessubtitle_urlhttpzhttps:   body contentzVideo Title: titlez, description: descz

Transcript: zFailed to fetch subtitles for z. HTTP Status Code: zNo subtitles found for video: z. Returning empty transcript.)
BV_PATTERNsearchr   r%   r   r   Videogroupr   
AV_PATTERNint
ValueErrorget_infoupdateget_subtitleget
startswithrequestsstatus_codejsonloadsr1   joinwarningswarn)r   r   r&   r%   r   vr(   r    subsub_listsub_urlresponseraw_sub_titlescraw_transcriptraw_transcript_with_meta_infos                   r   r   z*BiliBiliLoader._get_bilibili_subs_and_infoD   s:      %	0 $//JA##C(CKKC		AB$8T__KU #KC5!QRR!**,'
5#,' z>! 1>>*U"34577;+qkoonb9G%%f-"W,||G,H##s*!%H,<,<!=!A!A&"!M!$*PA1Y<*P!Q $Jw$7#8 9$$.v$6#7 8##1"24 .
 5j@@4SE :))1)=)=(>@ :~ MM05RS
 :~g  	4 	@ +Qs   H H$H!N)r)   r)   r)   )__name__
__module____qualname____doc__r   strr   r   r"   r   dictr    r#   r   r   r      sn     I  	
 @d8n :s :uS$Y7G :r#   r   )rB   rerE   typingr   r   r@   langchain_core.documentsr   )langchain_community.document_loaders.baser   compiler4   r8   r   rV   r#   r   <module>r\      sJ     	    - @ RZZ!
RZZ$
nZ nr#   