
    ihY$                        d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ  G d
 dee      Zy)z7Loader that loads data from Sharepoint Document Library    )annotationsN)Path)AnyDictIteratorListOptional)
BaseLoader)Document)Field)O365BaseLoaderc                      e Zd ZU dZ ed      Zded<   	 dZded<   	 dZded	<   	 dZ	ded
<   	 dZ
ded<   	  ej                         dz  dz  Zded<   	 dZded<   	 edd       ZddZddZddZddZy)SharePointLoaderzLoad  from `SharePoint`..strdocument_library_idNzOptional[str]folder_pathzOptional[List[str]]
object_ids	folder_idFzOptional[bool]	load_authz.credentialszo365_token.txtr   
token_pathload_extended_metadatac                
    ddgS )zcReturn required scopes.
        Returns:
            List[str]: A list of required scopes.
        
sharepointbasic )selfs    m/var/www/html/dev/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/sharepoint.py_scopeszSharePointLoader._scopes%   s     g&&    c              #  *  K   	 ddl m}m} | j	                         j                         j                  | j                        }t        ||      st        d| j                   d      | j                  r?|j                  | j                        }t        ||      st        d| j                   d      | j                  |      D ]  }t        |j                  j                  d            }| j                   du r| j#                  |      }| j$                  du r.| j'                  |      }|j)                  d	|j*                  i       | j,                  j/                  |      D ]L  }	| j                   du r|	j                  d
<   | j$                  du r|	j                  j)                         |	 N  | j0                  r?|j3                  | j0                        }t        ||      st        d| j                   d      | j                  |      D ]  }t        |j                  j                  d            }| j                   du r| j#                  |      }| j$                  du r.| j'                  |      }|j)                  d	|j*                  i       | j,                  j/                  |      D ]L  }	| j                   du r|	j                  d
<   | j$                  du r|	j                  j)                         |	 N  | j4                  r| j7                  || j4                        D ]  }t        |j                  j                  d            }| j                   du r| j#                  |      }| j$                  du r| j'                  |      }| j,                  j/                  |      D ]L  }	| j                   du r|	j                  d
<   | j$                  du r|	j                  j)                         |	 N  | j                  st| j0                  sf| j4                  sX|j9                         }t        ||      st        d      | j                  |      D ]  }t        |j                  j                  d            }| j                   du r| j#                  |      }| j$                  du r| j'                  |      }| j,                  j/                  |      D ]  }
|
j                  j)                  |j                         | j                   du r|
j                  d
<   | j$                  du rB|
j                  j)                         |
j                  j)                  d	|j*                  i       |
   yyyy# t        $ r t        d      w xY ww)z
        Load documents lazily. Use this when working at a large scale.
        Yields:
            Document: A document object representing the parsed blob.
        r   )DriveFolderzAO365 package not found, please install it with `pip install o365`zThere isn't a Drive with id .zThere isn't a folder with path idTsource_full_urlauthorized_identitieszUnable to fetch root folderN)
O365.driver!   r"   ImportError_authstorage	get_driver   
isinstance
ValueErrorr   get_item_by_path_load_from_folderr   metadatagetr   r&   r   get_extended_metadataupdateweb_url_blob_parser
lazy_parser   get_itemr   _load_from_object_idsget_root_folder)r   r!   r"   drivetarget_folderblobfile_idauth_identitiesextended_metadataparsed_blob	blob_parts              r   	lazy_loadzSharePointLoader.lazy_load-   s    	0
 

$$&001I1IJ%';D<T<T;UUVWXX!2243C3CDMmV4 #B4CSCSBTTU!VWW..}= &dmm//56>>T)&*&@&@&IO..$6(,(B(B7(K%%,,.?AVAV-WX#'#4#4#?#?#E &K~~-HW,,-DE22d:#,,334EF%%&& >>!NN4>>:MmV4 #B4CSCSBTTU!VWW..}= &dmm//56>>T)&*&@&@&IO..$6(,(B(B7(K%%,,.?AVAV-WX#'#4#4#?#?#E &K~~-HW,,-DE22d:#,,334EF%%&& ??225$//J &dmm//56>>T)&*&@&@&IO..$6(,(B(B7(K%#'#4#4#?#?#E &K~~-HW,,-DE22d:#,,334EF%%&&   DNNdoo!113MmV4 !>??..}= $dmm//56>>T)&*&@&@&IO..$6(,(B(B7(K%!%!2!2!=!=d!C 	$I&&--dmm<~~-FU	**+BC22d:!**112CD!**11.0E0EF $O	$$	 7FN o  	S 	s   VU; U/V;VVc                8   | j                         }|j                  d      }d| j                   d| d}dd| i}t        j                  d||      }|j                         }g }|j                  d	      D ]  }	|	j                  d
      s|	j                  d
      j                  d      xsB |	j                  d
      j                  d      xs  |	j                  d
      j                  d      }
|
s||
j                  d      }|s|j                  |        |S )a  
        Retrieve the access identities (user/group emails) for a given file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            List: A list of group names (email addresses) that have
                  access to the file.
        access_token(https://graph.microsoft.com/v1.0/drives//items/z/permissionsAuthorizationBearer GETheadersvaluegrantedToV2siteUserusergroupemail)_fetch_access_tokenr1   r   requestsrequestjsonappend)r   r=   datarD   urlrK   responseaccess_listgroup_namesaccess_data	site_datarQ   s               r   r&   z&SharePointLoader.authorized_identities   s!    '')xx/(()	G 	 #gl^$<=##E3@mmo&??73 
	2K}- __]377
C E#6::6BE#6::7C 
 %MM'2E#**51
	2 r   c                    t        | j                  d      5 }|j                         }ddd       t        j                        }|S # 1 sw Y    xY w)z|
        Fetch the access token from the token file.
        Returns:
            The access token as a dictionary.
        zutf-8)encodingN)openr   readrU   loads)r   fsrW   s       r   rR   z$SharePointLoader._fetch_access_token   sF     $//G4 	A	zz!}	 	s   AAc                   | j                         }|j                  d      }d| j                   d| d}dd| i}t        j                  d||      }|j                         }|j                  d	d
      |j                  di       j                  di       j                  dd      |j                  di       j                  dd      j                  d      d   dz   |j                  dd      z   d}|S )a  
        Retrieve extended metadata for a file in SharePoint.
        As of today, following fields are supported in the extended metadata:
        - size: size of the source file.
        - owner: display name of the owner of the source file.
        - full_path: pretty human readable path of the source file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            dict: A dictionary containing the extended metadata of the file,
                  including size, owner, and full path.
        rD   rE   rF   z,?$select=size,createdBy,parentReference,namerG   rH   rI   rJ   sizer   	createdByrO   displayName parentReferencepath:/name)rf   owner	full_path)rR   r1   r   rS   rT   rU   split)	r   r=   rW   rD   rX   rK   rY   r0   staged_metadatas	            r   r2   z&SharePointLoader.get_extended_metadata   s     '')xx/6''(y:; 	
 #gl^$<=##E3@==?LL+\\+r2S_S#!&7<S_U3Z  ll62&	'

 r   )returnz	List[str])rt   zIterator[Document])r=   r   rt   r   )rt   r   )r=   r   rt   r   )__name__
__module____qualname____doc__r   r   __annotations__r   r   r   r   r   homer   r   propertyr   rB   r&   rR   r2   r   r   r   r   r      s    "$Sz)G!%K%4&*J#*4#I}#2 %I~%4 tyy{^36FFJF2-2N2H' 'R$h D	"r   r   )rx   
__future__r   rU   pathlibr   typingr   r   r   r   r	   rS   langchain_core.document_loadersr
   langchain_core.documentsr   pydanticr   .langchain_community.document_loaders.base_o365r   r   r   r   r   <module>r      s8    = "   6 6  6 - 
}~z }r   