
    ihi                    v    d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ  G d de      Zy)    )annotationsN)AnyDictIteratorListOptionalTuple)Document)
BaseLoaderc                  h    e Zd ZdZ	 	 d
	 	 	 	 	 	 	 	 	 ddZddZddZddZddZ	 	 	 	 ddZ	dd	Z
y)AthenaLoaderaY  Load documents from `AWS Athena`.

    Each document represents one row of the result.
    - By default, all columns are written into the `page_content` of the document
    and none into the `metadata` of the document.
    - If `metadata_columns` are provided then these columns are written
    into the `metadata` of the document while the rest of the columns
    are written into the `page_content` of the document.

    To authenticate, the AWS client uses this method to automatically load credentials:
    https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html

    If a specific credential profile should be used, you must pass
    the name of the profile from the ~/.aws/credentials file that is to be used.

    Make sure the credentials / roles used have the required policies to
    access the Amazon Textract service.
    Nc                Z   || _         || _        || _        ||ng | _        	 ddl}	 ||j                  |      n|j                         }|j                  d      | _
        |j                  d      | _        y# t
        $ r t        d      w xY w# t        $ r}t        d      |d}~ww xY w)ag  Initialize Athena document loader.

        Args:
            query: The query to run in Athena.
            database: Athena database.
            s3_output_uri: Athena output path.
            profile_name: Optional. AWS credential profile, if profiles are being used.
            metadata_columns: Optional. Columns written to Document `metadata`.
        Nr   zRCould not import boto3 python package. Please install it with `pip install boto3`.)profile_namezCould not load credentials to authenticate with AWS client. Please check that credentials in the specified profile name are valid.athenas3)querydatabases3_output_urimetadata_columnsboto3ImportErrorSession	Exception
ValueErrorclientathena_client	s3_client)	selfr   r   r   r   r   r   sessiones	            i/var/www/html/dev/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/athena.py__init__zAthenaLoader.__init__!   s    " 
 *4D4P 0VX		  + <8]]_  %^^H5 -)  	> 	  	* 		s"   A8 $B 8B	B*B%%B*c                   | j                   j                  | j                  d| j                  id| j                  i      }|d   }	 | j                   j                  |      }|d   d   d   }|d	k(  rnH|d
k(  r|d   d   }|d   }d| }t        |      |dk(  rt        d      t        j                  d       u| j                  |      }t        j                  |j                  d            S )NDatabaseOutputLocation)QueryStringQueryExecutionContextResultConfigurationQueryExecutionId)r)   QueryExecutionStatusState	SUCCEEDEDFAILEDStateChangeReasonzQuery Failed: 	CANCELLEDz Query was cancelled by the user.   records)orient)r   start_query_executionr   r   r   get_query_executionr   timesleep_get_result_setjsonloadsto_json)r   responsequery_execution_idstateresp_statusstate_change_reasonerr
result_sets           r!   _execute_queryzAthenaLoader._execute_queryO   s   %%;;

#-t}}"=!143E3E F < 

 &&89))==!3 > H -.x8AE#("&'78B&12E&F#&':&;<n$+% BCCJJqM   ))*<=
zz*,,I,>??    c                J    |r |j                  |      r|d t        |        S |S N)endswithlenr   input_stringsuffixs      r!   _remove_suffixzAthenaLoader._remove_suffixi   s+    l++F33v;,//rD   c                H    |r|j                  |      r|t        |      d  S |S rF   )
startswithrH   rI   s      r!   _remove_prefixzAthenaLoader._remove_prefixn   s)    l--f5F..rD   c                   	 dd l }| j                  }| j                  | j	                  |d      d      j                  d      }|d   }dj                  |dd  |gz         dz   }| j                  j                  ||      }|j                  t        j                  |d   j                               d	
      }|S # t        $ r t        d      w xY w)Nr   zTCould not import pandas python package. Please install it with `pip install pandas`./zs3://r1   z.csv)BucketKeyBodyutf8)encoding)pandasr   r   rO   rL   splitjoinr   
get_objectread_csvioBytesIOread)	r   r=   pd
output_uritokensbucketkeyobjdfs	            r!   r8   zAthenaLoader._get_result_sets   s    	 ''
$$
C0'

%* 	 hhvabz%7$889FBnn''v3'?[[CK$4$4$67&[I	  	? 	s   B9 9Cc                    g }g }t        |d   j                               }|D ]3  }|| j                  v r|j                  |       #|j                  |       5 ||fS )Nr   )listkeysr   append)r   query_resultcontent_columnsr   all_columnsrc   s         r!   _get_columnszAthenaLoader._get_columns   sm     <?//12 	,Cd+++ '',&&s+		,  000rD   c              #  8  	K   | j                         }| j                  |      \  	}|D ]f  }dj                  	fd|j                         D              }|j                         D ci c]  \  }}||v s||| }}}t	        ||      }| h y c c}}w w)N
c              3  <   K   | ]  \  }}|v s| d |   yw)z: N ).0kvrk   s      r!   	<genexpr>z)AthenaLoader.lazy_load.<locals>.<genexpr>   s+      % $1qO7K1#Rs%s   )page_contentmetadata)rC   rm   rY   itemsr
   )
r   rj   r   rowrv   rs   rt   rw   docrk   s
            @r!   	lazy_loadzAthenaLoader.lazy_load   s     **,,0,=,=l,K)) 	C99 %(+		% L "%A5E0E!-1H  xHCI	s   A%B(B5B8B=B)NN)
r   strr   r|   r   r|   r   zOptional[str]r   zOptional[List[str]])returnList[Dict[str, Any]])rJ   r|   rK   r|   r}   r|   )r=   r|   r}   r   )rj   r~   r}   zTuple[List[str], List[str]])r}   zIterator[Document])__name__
__module____qualname____doc__r"   rC   rL   rO   r8   rm   r{   rq   rD   r!   r   r      st    0 '+04,.,. ,. 	,.
 $,. .,.\@4

(101	$1rD   r   )
__future__r   r\   r9   r6   typingr   r   r   r   r   r	   langchain_core.documentsr
   )langchain_community.document_loaders.baser   r   rq   rD   r!   <module>r      s+    " 	   = = - @S: SrD   