
    7|hZ                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ  ej"                  e      Z G d de      Zy)    N)AnyDictIteratorList)Document)	BaseModelmodel_validatorc                   l   e Zd ZU dZeed<   dZeed<   dZeed<   dZ	e
ed<   d	Zeed
<   dZe
ed<   dZe
ed<   dZe
ed<   dZeed<   dZeed<    ed      ededefd              ZdedefdZdedee   fdZdedee   fdZdedefdZdedee   fd Zdedee   fd!Zd"ed#edefd$Z d"ed%edefd&Z!y')(PubMedAPIWrappera`  
    Wrapper around PubMed API.

    This wrapper will use the PubMed API to conduct searches and fetch
    document summaries. By default, it will return the document summaries
    of the top-k results of an input search.

    Parameters:
        top_k_results: number of the top-scored document used for the PubMed tool
        MAX_QUERY_LENGTH: maximum length of the query.
          Default is 300 characters.
        doc_content_chars_max: maximum length of the document content.
          Content will be truncated if it exceeds this length.
          Default is 2000 characters.
        max_retry: maximum number of retries for a request. Default is 5.
        sleep_time: time to wait between retries.
          Default is 0.2 seconds.
        email: email address to be used for the PubMed API.
        api_key: API key to be used for the PubMed API.
    parsez;https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?base_url_esearchz:https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?base_url_efetch   	max_retryg?
sleep_time   top_k_resultsi,  MAX_QUERY_LENGTHi  doc_content_chars_maxzyour_email@example.comemail api_keybefore)modevaluesreturnc                 ^    	 ddl }|j                  |d<   |S # t        $ r t        d      w xY w)z7Validate that the python package exists in environment.r   Nr   zZCould not import xmltodict python package. Please install it with `pip install xmltodict`.)	xmltodictr   ImportError)clsr   r   s      c/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/utilities/pubmed.pyvalidate_environmentz%PubMedAPIWrapper.validate_environment5   sB    	'ooF7O   	B 	s    ,queryc                    	 | j                  |d| j                         D cg c]  }d|d    d|d    d|d    d|d	     }}|rd
j                  |      d| j                   S dS c c}w # t        $ r}d| cY d}~S d}~ww xY w)z
        Run PubMed search and get the article meta information.
        See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
        It uses only the most informative fields of article meta information.
        NzPublished: 	Publishedz
Title: Titlez
Copyright Information: Copyright Informationz
Summary::
Summaryz

zNo good PubMed Result was foundzPubMed exception: )loadr   joinr   	Exception)selfr#   resultdocsexs        r!   runzPubMedAPIWrapper.runD   s    	- #ii.E0E0E(FG
 	 f[12 3 /* +**01H*I)J K$Y/02D   D!">D$>$>? 7  	-'t,,	-s4    A, !A'!A, %A, 'A, ,	B5A?9B?Bc              #     K   | j                   dz   t        t        j                  j	                  |      h      z   d| j
                   dz   }| j                  dk7  r|d| j                   z  }t        j                  j                  |      }|j                         j                  d      }t        j                  |      }|d   d   }|d   d	   D ]  }| j                  ||        y
w)z
        Search PubMed for documents matching the query.
        Return an iterator of dictionaries containing the document metadata.
        zdb=pubmed&term=z&retmode=json&retmax=z&usehistory=yr   	&api_key=utf-8esearchresultwebenvidlistN)r   strurllibr   quoter   r   requesturlopenreaddecodejsonloadsretrieve_article)r,   r#   urlr-   text	json_textr5   uids           r!   	lazy_loadzPubMedAPIWrapper.lazy_load^   s      !! 6<<%%e,-./ &d&8&8%9GH 	 <<2Yt||n--C'',{{}##G,JJt$	?+H5_-h7 	5C''V44	5s   C)C+c                 6    t        | j                  |            S )z
        Search PubMed for documents matching the query.
        Return a list of dictionaries containing the document metadata.
        )listrE   r,   r#   s     r!   r)   zPubMedAPIWrapper.loadt   s    
 DNN5)**    docc                 >    |j                  d      }t        ||      S )Nr(   )page_contentmetadata)popr   )r,   rJ   summarys      r!   _dict2documentzPubMedAPIWrapper._dict2document{   s    '')$Ws;;rI   c              #   `   K   | j                  |      D ]  }| j                  |        y wN)r#   )rE   rP   )r,   r#   ds      r!   lazy_load_docszPubMedAPIWrapper.lazy_load_docs   s1     e, 	)A%%a((	)s   ,.c                 8    t        | j                  |            S rR   )rG   rT   rH   s     r!   	load_docszPubMedAPIWrapper.load_docs   s    D''e'455rI   rD   r5   c                    | j                   dz   |z   dz   |z   }| j                  dk7  r|d| j                   z  }d}	 	 t        j                  j	                  |      }	 |j                         j                  d      }| j                  |      }| j!                  ||      S # t        j
                  j                  $ r~}|j                  dk(  rc|| j                  k  rTt        d| j                  dd	       t        j                  | j                         | xj                  d
z  c_
        |dz  }n|Y d }~nd }~ww xY w)Nzdb=pubmed&retmode=xml&id=z&webenv=r   r2   r   i  zToo Many Requests, waiting for z.2fz seconds...      r3   )r   r   r8   r:   r;   error	HTTPErrorcoder   printr   timesleepr<   r=   r   _parse_article)	r,   rD   r5   rA   retryr-   exml_text	text_dicts	            r!   r@   z!PubMedAPIWrapper.retrieve_article   sA     )*  	 	 <<2Yt||n--C//4 ;;=''0JJx(	""3	22! <<)) 66S=UT^^%; ''+s&;;H JJt/OOq(OQJEG 	 s   B   D;=A4D66D;rd   c                    	 |d   d   d   d   }|j                  di       j                  dg       }|D cg c]  }d	|v rd
|v r|d
    d|d	     }}|rdj                  |      nIt        |t              r|n7t        |t
              r&dj                  d |j                         D              nd}|j                  di       }dj                  |j                  dd      |j                  dd      |j                  dd      g      }	||j                  dd      |	|j                  di       j                  dd      |dS # t         $ r |d   d   d   }Y Fw xY wc c}w )NPubmedArticleSetPubmedArticleMedlineCitationArticlePubmedBookArticleBookDocumentAbstractAbstractTextz#textz@Labelz: 
c              3   2   K   | ]  }t        |        y w)N)r7   ).0values     r!   	<genexpr>z2PubMedAPIWrapper._parse_article.<locals>.<genexpr>   s     MUc%jMs   zNo abstract availableArticleDate-Yearr   MonthDayArticleTitleCopyrightInformation)rD   r&   r%   r'   r(   )KeyErrorgetr*   
isinstancer7   dictr   )
r,   rD   rd   arabstract_texttxt	summariesrO   a_dpub_dates
             r!   r`   zPubMedAPIWrapper._parse_article   s   	T-.?@QRB
 z2.22>2F %
#~(c/ 8}oRG~.
	 
  IIi  mS1  "-6 IIMm6J6J6LMM0 	 ff]B'88#$r"
 VVNB/!%'VVJ%;%?%?&& 
 	
=  	T-./BCNSB	T
s   D? E?EEN)"__name__
__module____qualname____doc__r   __annotations__r   r7   r   r   intr   floatr   r   r   r   r   r	   classmethodr   r"   r0   r   r}   rE   r   r)   r   rP   rT   rV   r@   r`    rI   r!   r   r      s\   * J 	F c  XOSWIsJ M3c!%3%)E3)GS(#$ 3   $- - -45s 5x~ 5,+# +$t* +<$ <8 <)C )HX,> )6s 6tH~ 6 3C  3  3  3D+
# +
$ +
4 +
rI   r   )r>   loggingr^   urllib.errorr8   urllib.parseurllib.requesttypingr   r   r   r   langchain_core.documentsr   pydanticr   r	   	getLoggerr   loggerr   r   rI   r!   <module>r      sC          , , - /			8	$D
y D
rI   