
    ih                     r    d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ  G d de      Zy)	    N)AnyDictIteratorListOptionalUnion)Document)get_from_dict_or_env)PyPDFLoader)
BaseLoaderc                       e Zd ZdZ	 ddedee   dee   fdZededefd	       Z	de
fd
Zde
de
deeef   defdZdee   fdZdee   fdZdee   fdZdee   fdZy)RSpaceLoadera'  Load content from RSpace notebooks, folders, documents or PDF Gallery files.

    Map RSpace document <-> Langchain Document in 1-1. PDFs are imported using PyPDF.

    Requirements are rspace_client (`pip install rspace_client`) and PyPDF if importing
     PDF docs (`pip install pypdf`).

    N	global_idapi_keyurlc                 v    |||d}t         j                  |      }|d   | _        |d   | _        |d   | _        y)a  api_key: RSpace API key - can also be supplied as environment variable
        'RSPACE_API_KEY'
        url: str
        The URL of your RSpace instance - can also be supplied as environment
        variable 'RSPACE_URL'
        global_id: str
         The global ID of the resource to load,
        e.g. 'SD12344' (a single document); 'GL12345'(A PDF file in the gallery);
        'NB4567' (a notebook); 'FL12244' (a folder)
        )r   r   r   r   r   r   N)r   validate_environmentr   r   r   )selfr   r   r   argsverified_argss         i/var/www/html/dev/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/rspace.py__init__zRSpaceLoader.__init__   sJ     "*

 )5(I(I$(O$Y/ '+K8    valuesreturnc                 n    t        |dd      |d<   t        |dd      |d<   d|vs|d   t        d      |S )z3Validate that API key and URL exist in environment.r   RSPACE_API_KEYr   
RSPACE_URLr   zBNo value supplied for global_id. Please supply an RSpace global ID)r
   
ValueError)clsr   s     r   r   z!RSpaceLoader.validate_environment,   sS     1DTUy,VULIuf${(;(CT  r   c                    	 ddl m}m} 	 |j	                  | j
                  | j                        }|j                          ||j                  fS # t        $ r t        d      w xY w# t        $ r t        d| j
                   d      w xY w)zCreate a RSpace client.r   )elnfield_contentz(You must run `pip install rspace_client`z%Unable to initialize client - is url z or api key  correct?)
rspace_client.elnr"   r#   ImportError	ELNClientr   r   
get_status	ExceptionFieldContent)r   r"   r#   s      r   _create_rspace_clientz"RSpaceLoader._create_rspace_client7   s    	J<
	--$,,7CNN M....  	JHII	J  	7zAVW 	s   A 6A' A$'#B
clir#   d_idc                     d}|j                  |      }|d|d    dz  }|d   D ]0  }||d    dz  } ||d         }||j                         z  }|dz  }2 t        dd	|d    d
|d    i|      S )N z<h2>namez<h2/>fields
contentsourcezrspace: -globalId)metadatapage_content)get_documentget_textr	   )r   r+   r#   r,   r2   docffcs           r   _get_doczRSpaceLoader._get_docJ   s    t$T#f+e,,X 	A!F)B''Gq|,Br{{}$GtOG		
 (3v;-qZ8I JK 
 	
r   c              #   p   K   | j                         \  }}| j                  ||| j                         y w)N)r*   r=   r   )r   r+   r#   s      r   _load_structured_docz!RSpaceLoader._load_structured_docX   s0     !779]mmC??s   46c              #      K   | j                         \  }}| j                  r!|j                  | j                  dd  dg      }d   D cg c]  }|d   	 }}|D ]  }| j                  |||        y c c}w w)N   document)	folder_idtypesToIncluderecordsid)r*   r   list_folder_treer=   )r   r+   r#   docs_in_folderddoc_idsdoc_ids          r   _load_folder_treezRSpaceLoader._load_folder_tree\   s     !779]>> 11..,j\ 2 N 0>i/HI!agII 	<F--]F;;	< Js   AA:
A5$A:c              #     K   | j                         \  }}|j                  | j                        }t        j                  j                  |d         \  }}|j                         dk(  ri| j                   d}|j                  | j                  |       t        |      }|j                         D ]  }| j                  |j                  d<   | ! y y w)Nr/   z.pdf
rspace_src)r*   get_file_infor   ospathsplitextlowerdownload_filer   	lazy_loadr6   )	r   r+   r#   	file_info_extoutfile
pdf_loaderpdfs	            r   	_load_pdfzRSpaceLoader._load_pdff   s     !779]%%dnn5	!!)F"34399;& (-Gdnng6$W-J!++- -1^^\*		 !s   CCc              #   ^  K   | j                   r(d| j                   v r| j                         D ]  }|  y | j                   r(d| j                   v r| j                         D ]  }|  y | j                   r+| j                   dd dv r| j                         D ]  }|  y t	        d      w)NGLSDr   rA   )FLNBzUnknown global ID type)r   r\   r?   rL   r   )r   rI   s     r   rU   zRSpaceLoader.lazy_loadr   s     >>ddnn4^^% ^^ 6..0 ^^q 3| C++-  566s   B+B-)NN)__name__
__module____qualname____doc__strr   r   classmethodr   r   r   r*   r   intr	   r=   r   r?   rL   r\   rU    r   r   r   r      s     SW99'/}9BJ3-9. $ 4  /s /&
C 
 
5c? 
x 
@hx&8 @<8H#5 <
8H- 
78H- 7r   r   )rP   typingr   r   r   r   r   r   langchain_core.documentsr	   langchain_core.utilsr
   $langchain_community.document_loadersr   )langchain_community.document_loaders.baser   r   ri   r   r   <module>ro      s(    	 = = - 5 < @r7: r7r   