
    7|h                         d dl Z d dlZd dlZd dlmZmZmZ d dlmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ  G d d	      Z G d
 de      Z G d de      Zy)    N)AnyListOptional)urljoin)Document)HTTPBasicAuth)
BaseLoader)UnstructuredBaseLoaderc            
       R    e Zd ZdZdededefdZdededed	ee   d
ef
dZ	d
efdZ
y)LakeFSClientzClient for lakeFS.lakefs_access_keylakefs_secret_keylakefs_endpointc                    dj                  |ddg      | _        t        ||      | _        	 t	        j
                  t        | j                  d      | j                        }|j                          y # t        $ r t        d      w xY w)N/apizv1/healthcheckauthz<lakeFS server isn't accessible. Make sure lakeFS is running.)
join_LakeFSClient__endpointr   _LakeFSClient__authrequestsgetr   raise_for_status	Exception
ValueError)selfr   r   r   health_checks        j/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/lakefs.py__init__zLakeFSClient.__init__   s     ((OUE#BC#$57HI	#<<7dkkL ))+ 	N 	s   AA2 2Breporefpathpresignreturnc           	      <   ||d}t         j                  j                  |      }t        | j                  d| d| d|       }t        j                  || j                        }|j                          |j                         }	t        t        d |	d               S )N)prefixr%   zrepositories/z/refs/z/objects/ls?r   c                     | d   | d   fS )Nr$   physical_address )ress    r    <lambda>z)LakeFSClient.ls_objects.<locals>.<lambda>1   s    S[#.@*AB     results)urllibparse	urlencoder   r   r   r   r   r   jsonlistmap)
r   r"   r#   r$   r%   qpeqpobjects_ls_endpointolsr	olsr_jsons
             r    
ls_objectszLakeFSClient.ls_objects$   s     1ll$$R(%OO}TF&\#O
 ||/dkkBIIK	BIiDX
 	
r.   c                     | j                   dz   }t        j                  || j                        }|j	                          |j                         }|d   d   S )Nconfigr   storage_configpre_sign_support)r   r   r   r   r   r3   )r   config_endpointresponser=   s       r    is_presign_supportedz!LakeFSClient.is_presign_supported5   sM    //H4<<dkkB!!#&'(:;;r.   N)__name__
__module____qualname____doc__strr!   r   boolr   r;   rB   r+   r.   r    r   r      sc      	$

!
),
7?~
	
"<d <r.   r   c                       e Zd ZU dZeed<   eed<   eed<   	 	 	 ddedededee   dee   dee   fd	Zded
dfdZded
dfdZ	ded
dfdZ
d
ee   fdZddZy)LakeFSLoaderzLoad from `lakeFS`.r"   r#   r$   Nr   r   r   c                     t        |||      | _        ||dk(  rdn
t        |      | _        ||dk(  rdn
t        |      | _        |d| _        yt        |      | _        y)a  

        :param lakefs_access_key: [required] lakeFS server's access key
        :param lakefs_secret_key: [required] lakeFS server's secret key
        :param lakefs_endpoint: [required] lakeFS server's endpoint address,
               ex: https://example.my-lakefs.com
        :param repo: [optional, default = ''] target repository
        :param ref: [optional, default = 'main'] target ref (branch name,
               tag, or commit ID)
        :param path: [optional, default = ''] target path
        N main)r   _LakeFSLoader__lakefs_clientrG   r"   r#   r$   )r   r   r   r   r"   r#   r$   s          r    r!   zLakeFSLoader.__init__D   s^    *  ,0/ 
 ,$"*B#d)	 [C2I63s8,B	CI	r.   r&   c                     || _         y N)r$   )r   r$   s     r    set_pathzLakeFSLoader.set_path`   	    	r.   c                     || _         y rP   )r#   )r   r#   s     r    set_refzLakeFSLoader.set_refc   s	    r.   c                     || _         y rP   )r"   )r   r"   s     r    set_repozLakeFSLoader.set_repof   rR   r.   c                 v   | j                          | j                  j                         }g }| j                  j                  | j                  | j
                  | j                  |      }|D ]J  }t        |d   | j                  | j
                  |d   |      }|j                  |j                                L |S )N)r"   r#   r$   r%      r   )
 _LakeFSLoader__validate_instancerN   rB   r;   r"   r#   r$   UnstructuredLakeFSLoaderextendload)r   	presigneddocsobjsobjlakefs_unstructured_loaders         r    r\   zLakeFSLoader.loadi   s      "((==?	!##..tyy) / 
  	;C)AA		488SVY*& KK2779:		;
 r.   c                     | j                   | j                   dk(  rt        d      | j                  | j                  dk(  rt        d      | j                  t        d      y )NrL   zBno repository was provided. use `set_repo` to specify a repositoryz3no ref was provided. use `set_ref` to specify a refz6no path was provided. use `set_path` to specify a path)r"   r   r#   r$   r   s    r    __validate_instancez LakeFSLoader.__validate_instancew   sc    99		RT  88txx2~RSS99UVV r.   )NrM   rL   )r&   N)rC   rD   rE   rF   rG   __annotations__r   r!   rQ   rT   rV   r   r   r\   rY   r+   r.   r    rJ   rJ   =   s    
I	H
I ## 66 6 	6
 sm6 c]6 sm68S T 3 4 S T d8n Wr.   rJ   c                   \     e Zd ZdZ	 	 	 ddedededededef fdZd	efd
Z	d	e
fdZ xZS )rZ   z(Load from `lakeFS` as unstructured data.urlr"   r#   r$   r%   unstructured_kwargsc                 j    t        |   di | || _        || _        || _        || _        || _        y)zInitialize UnstructuredLakeFSLoader.

        Args:

        :param lakefs_access_key:
        :param lakefs_secret_key:
        :param lakefs_endpoint:
        :param repo:
        :param ref:
        Nr+   )superr!   rg   r"   r#   r$   r%   )r   rg   r"   r#   r$   r%   rh   	__class__s          r    r!   z!UnstructuredLakeFSLoader.__init__   s9    ( 	/./		r.   r&   c                 J    | j                   | j                  | j                  dS )Nr"   r#   r$   rm   rc   s    r    _get_metadataz&UnstructuredLakeFSLoader._get_metadata   s    		$((DIIFFr.   c                    ddl m} d}| j                  rt        j                         5 }| d| j
                  j                  d      d    }t        j                  t        j
                  j                  |      d       t        j                  | j                        }|j                          t        |d	      5 }|j                  |j                          d d d         ||
      cd d d        S | j                  j#                  |      st%        d      | j                  t'        |      d  } ||
      S # 1 sw Y   bxY w# 1 sw Y   y xY w)Nr   )	partitionzlocal://r   T)exist_okwb)mode)filenamez>Non pre-signed URLs are supported only with 'local' blockstore)unstructured.partition.autorp   r%   tempfileTemporaryDirectoryr$   splitosmakedirsdirnamer   r   rg   r   openwritecontent
startswithr   len)r   rp   local_prefixtemp_dir	file_pathrA   file
local_paths           r    _get_elementsz&UnstructuredLakeFSLoader._get_elements   s   9!<<,,. 5('j$))//#*>r*B)CD	BGGOOI6F#<<1))+)$/ 14JJx//01 )45 5 $$\2P  #l"3"56Jj111 15 5s$   BE=D:E:E	?EE)rM   rL   T)rC   rD   rE   rF   rG   rH   r   r!   dictrn   r   r   __classcell__)rk   s   @r    rZ   rZ      sj    2   	
    #6Gt G2t 2r.   rZ   )rz   rw   urllib.parser0   typingr   r   r   r   r   langchain_core.documentsr   requests.authr   )langchain_community.document_loaders.baser	   1langchain_community.document_loaders.unstructuredr
   r   rJ   rZ   r+   r.   r    <module>r      sO    	   & &    - ' @ T+< +<\BW: BWJ525 52r.   