
    7|h#                         d dl Z d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZ d dlmZ d dlmZ  e j                   e      ZdZ G d d	e      Zy)
    N)BytesIO)ListOptionalSequence)ElementTree)Document)
BaseLoader@   c                   (   e Zd ZdZ	 ddddededee   defdZ	 	 	 	 	 	 dd
ee	e      dee	e      dee   dededede	e
   fdZdedede	e   dd	fdZdee   dedede	e
   fdZdedededee
   fdZdedefdZdedefdZededefd       Zy	) 
QuipLoaderz_Load `Quip` pages.

    Port of https://github.com/quip/quip-api/tree/master/samples/baqup
    F)allow_dangerous_xml_parsingapi_urlaccess_tokenrequest_timeoutr   c                |    	 ddl m}  ||||      | _        |st	        d      y# t        $ r t        d      w xY w)a  
        Args:
            api_url: https://platform.quip.com
            access_token: token of access quip API. Please refer:
                https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
            request_timeout: timeout of request, default 60s.
            allow_dangerous_xml_parsing: Allow dangerous XML parsing, defaults to False
        r   )
QuipClientz?`quip_api` package not found, please run `pip install quip_api`)r   base_urlr   ac  The quip client uses the built-in XML parser which may causesecurity issues when parsing XML data in some cases. Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities For more information, set `allow_dangerous_xml_parsing` as True if you are sure that your distribution of the standard library is not vulnerable to XML vulnerabilities.N)quip_api.quipr   ImportErrorquip_client
ValueError)selfr   r   r   r   r   s         h/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/quip.py__init__zQuipLoader.__init__   s]     	0 &%
 +<  +  	Q 	s   & ;N
folder_ids
thread_idsmax_docsinclude_all_foldersinclude_commentsinclude_imagesreturnc                 \   |s|s|st        d      |xs g }|r|D ]  }| j                  |d|        |rN| j                  j                         }d|v r| j                  |d   d|       d|v r| j                  |d   d|       t	        t        |d|             }| j                  |||      S )aA  
        Args:
            :param folder_ids: List of specific folder IDs to load, defaults to None
            :param thread_ids: List of specific thread IDs to load, defaults to None
            :param max_docs: Maximum number of docs to retrieve in total, defaults 1000
            :param include_all_folders: Include all folders that your access_token
                   can access, but doesn't include your private folder
            :param include_comments: Include comments, defaults to False
            :param include_images: Include images, defaults to False
        z_Must specify at least one among `folder_ids`, `thread_ids` or set `include_all`_folders as Truer   group_folder_idsshared_folder_idsN)r   get_thread_ids_by_folder_idr   get_authenticated_userlistsetprocess_threads)	r   r   r   r   r   r   r    	folder_idusers	            r   loadzQuipLoader.load=   s    & *5H7 
  %2
' K	00AzJK ##::<D!T)00+,a #d*00,-q* #j(345
##J@PQQ    r*   depthc           	      n   ddl m}m} 	 | j                  j	                  |      }|d   j                  dd|z        }t        j                  d| d|        |d   D ]9  }	d|	v r| j                  |	d   |dz   |       !d|	v s&|j                  |	d          ; y	# |$ r`}|j
                  dk(  rt        j                  d| d| d|        n(t        j                  d| d| d|j
                          Y d	}~y	d	}~w|$ r2}t        j                  d| d| d
|j
                          Y d	}~y	d	}~ww xY w)z4Get thread ids by folder id and update in thread_idsr   )	HTTPError	QuipErrori  zdepth z!, Skipped over restricted folder z, z, Skipped over folder z due to unknown error Nz due to HTTP error foldertitlez	Folder %sz, Processing folder childrenr*      	thread_id)r   r0   r1   r   
get_foldercodeloggingwarninggetinfor%   append)
r   r*   r.   r   r0   r1   r2   er3   childs
             r   r%   z&QuipLoader.get_thread_ids_by_folder_idj   sd    	7	%%00;F& x $$WkI.EFveW$8@AJ' 	6Ee#00+&	: %!!%"45	6+  
	vv}UG#DYKrRSQTU UG#9) E,,-FF85  	OO5i[ A%%&VVH. 	s$   B D4AC::D4(D//D4include_messagesc                 f    g }|D ])  }| j                  |||      }||j                  |       + |S )z2Process a list of thread into a list of documents.)process_threadr=   )r   r   r    r@   docsr6   docs          r   r)   zQuipLoader.process_threads   sE     # 	!I%%iAQRCC 	! r-   r6   c           
      `   | j                   j                  |      }|d   d   }|d   d   }|d   d   }|d   d   }t        j                  |      }t        j                  d| d| d| d	|        d
|v rg	 | j                   j                  |d
         }	||||d}d}|r| j                  |	      }|r|dz   | j                  |      z   }t        |d
   |z   |      S y # t        j                  j                  j                  $ r(}
t        j                  d| d| d|
        Y d }
~
y d }
~
ww xY w)Nthreadidr3   linkupdated_useczprocessing thread z title z link z update_ts htmlzError parsing thread  z, skipping, )r3   	update_tsrG   source z/n)page_contentmetadata)r   
get_threadr   _sanitize_titleloggerr<   parse_document_htmlxmletreecElementTree
ParseErrorerrorprocess_thread_imagesprocess_thread_messagesr   )r   r6   r    r@   rF   r3   rH   rL   sanitized_titletreer>   rP   texts                r   rB   zQuipLoader.process_thread   sr    !!,,Y78$T*	x )h'8$^4	$44U; 7?2C D6YK1	

 V'';;F6NK )&	H D11$7d{T%A%A)%LL#F^d2!  / 99))44 4UG1YK|TUSVWXs   7C 'D-D((D-r]   c                    d}	 ddl m} ddlm} |j	                  d      D ]  }|j                  d      }|r|j                  d      s(|j                  d	      \  }}}}	| j                  j                  ||	      }
	 |j                  t        |
j                                     }|d
z   |j                  |      z   } |S # t        $ r t        d      w xY w# t        $ r}t        j!                  d|        |d }~ww xY w)NrN   r   )Image)pytesseractzg`Pillow or pytesseract` package not found, please run `pip install Pillow` or `pip install pytesseract`imgsrcz/blob/
z!failed to convert image to text, )PILr`   ra   r   iterr;   
startswithsplitr   get_blobopenr   readimage_to_stringOSErrorrS   rY   )r   r]   r^   r`   ra   rb   rc   _r6   blob_idblob_responseimager>   s                r   rZ   z QuipLoader.process_thread_images   s   	!/ 99U# 	C''%.CcnnW5'*yy~$Aq)W ,,55iIM

7=+=+=+?#@Ad{[%@%@%GG	 '  	D 	   @DEs#   B? <?C?C	C? C::C?c                     d }g }	 | j                   j                  ||d      }|j                  |       |r|d   d   dz
  }nn?|j                          |D cg c]  }|d   	 }}dj	                  |      S c c}w )Nd   )max_created_useccountcreated_usecr5   r^   re   )r   get_messagesextendreversejoin)r   r6   ru   messageschunkmessagetextss          r   r[   z"QuipLoader.process_thread_messages   s    $$11,<C 2 E OOE"#(9^#<q#@   	089W99yy :s   A8r3   c                     t        j                  dd|       }t        j                  dd|      }t        |      t        kD  r	|d t         }|S )Nz\srK   z(?u)[^- \w.]rN   )resublen_MAXIMUM_TITLE_LENGTH)r3   r\   s     r   rR   zQuipLoader._sanitize_title   sH    &&U3&&"oF"77-.D/DEOr-   )<   )NNi  FFF)__name__
__module____qualname____doc__strr   intboolr   r   r   r,   r%   r   r)   rB   r   rZ   r[   staticmethodrR    r-   r   r   r      s    *,	$ -2$$ $ "#	$ &*$P +/*."&$)!&$+RT#Y'+R T#Y'+R 3-	+R
 "+R +R +R 
h+RZ#6#6%(#66:3i#6	#6J	"3-	9=	QU		h	**.2*FJ*	(	*X+ # 6     $ s s  r-   r   )r9   r   xml.etree.cElementTreerU   ior   typingr   r   r   xml.etree.ElementTreer   langchain_core.documentsr   )langchain_community.document_loaders.baser	   	getLoggerr   rS   r   r   r   r-   r   <module>r      sF     	   + + - - @			8	$ h hr-   