
    7|h(                        d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ  G d
 de      Zy)    )annotationsN)StringIO)AnyDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)get_pydantic_field_names)
ConfigDictc                     e Zd ZU dZdZded<   	 dZded<   	 dZd	ed
<   	 dZded<   	 dZ	ded<   	 dZ
ded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded <   	 d!Zd	ed"<   	 d#Zded$<   	 d#Zded%<   	 dZded&<   	 d'Zded(<   	 d)Zded*<   	  ed+,      Zed4d-       Zed5d.       Zed6d/       Z	 d7	 	 	 	 	 d8d0Z	 	 d9	 	 	 	 	 	 	 	 	 d:d1Z 	 	 d9	 	 	 	 	 	 	 	 	 d;d2Z!d<d3Z"y)=	LlamafileaO  Llamafile lets you distribute and run large language models with a
    single file.

    To get started, see: https://github.com/Mozilla-Ocho/llamafile

    To use this class, you will need to first:

    1. Download a llamafile.
    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
    3. Start the llamafile in server mode:

        `./path/to/model.llamafile --server --nobrowser`

    Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile()
            llm.invoke("Tell me a joke.")
    zhttp://localhost:8080strbase_urlNzOptional[int]request_timeoutFbool	streamingintseedg?floattemperature(   top_kgffffff?top_pg?min_p	n_predictr   n_keepg      ?tfs_z	typical_pg?repeat_penalty@   repeat_last_nTpenalize_nlg        presence_penaltyfrequency_penaltymirostatg      @mirostat_taug?mirostat_etaforbid)extrac                     y)N	llamafile )selfs    a/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/llms/llamafile.py	_llm_typezLlamafile._llm_typex   s        c                f    g d}t        | j                        D cg c]	  }||vs| }}|S c c}w )N)r   cachecallback_manager	callbacksmetadatanamer   r   tagsverbosecustom_get_token_ids)r   	__class__)r1   ignore_keyskattrss       r2   _param_fieldnameszLlamafile._param_fieldnames|   sA    
 0?
1KCWA
 
 
s   	..c                J    i }| j                   D ]  }t        | |      ||<    |S N)rB   getattr)r1   params	fieldnames      r2   _default_paramszLlamafile._default_params   s2    // 	9I 'i 8F9	9r4   c                    | j                   }|j                         D ]  \  }}||v s|||<    |t        |      dkD  r||d<   | j                  rd|d<   |S )Nr   stopTstream)rH   itemslenr   )r1   rJ   kwargsrF   r@   vs         r2   _get_parameterszLlamafile._get_parameters   sk     %%
 LLN 	DAqF{q		 D	A!F6N>>#F8r4   c                z   | j                   r[t               5 } | j                  |f||d|D ]  }|j                  |j                          |j                         }ddd       |S  | j                  dd|i|}d|i|}		 t        j                  | j                   dddi|	d| j                  	      }
|
j                          d|
_        |
j                         d   }|S # 1 sw Y   S xY w# t        j                  j                  $ r. t        j                  j                  d
| j                   d      w xY w)a  Request prompt completion from the llamafile server and return the
        output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            The string generated by the model.

        )rJ   run_managerNrJ   prompt/completionContent-Typeapplication/jsonFurlheadersjsonrK   timeoutTCould not connect to Llamafile server. Please make sure that a server is running at .zutf-8contentr0   )r   r   _streamwritetextgetvaluerP   requestspostr   r   
exceptionsConnectionErrorraise_for_statusencodingrZ   )r1   rS   rJ   rR   rN   buffchunkra   rF   payloadresponses              r2   _callzLlamafile._call   s]   , >> 't)T\\!%;BH +E JJuzz*+
 }}' K *T))>t>v>F262G#====/5&(: !  00 %%' 'H==?9-DKG' K  &&66 ))99337==/D s   AC"3C/ "C,/AD:c              +    K    | j                   dd|i|}d|vrd|d<   d|i|}	 t        j                  | j                   dddi|d| j                        }d|_        |j                  d      D ]A  }| j                  |      }	t        |	      }
|r|j                  |
j                         |
 C y# t        j
                  j                  $ r. t        j
                  j                  d	| j                   d
      w xY ww)a"  Yields results objects as they are generated in real time.

        It also calls the callback manager's on_llm_new_token event with
        similar parameters to the OpenAI LLM class method of the same name.

        Args:
            prompt: The prompts to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            A generator representing the stream of tokens being generated.

        Yields:
            Dictionary-like objects each containing a token

        Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile(
                temperature = 0.0
            )
            for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
                    stop=["'","
"]):
                result = chunk["choices"][0]
                print(result["text"], end='', flush=True)

        rJ   rK   TrS   rT   rU   rV   rW   r\   r]   utf8)decode_unicode)ra   )tokenNr0   )rP   rc   rd   r   r   re   rf   rh   
iter_lines_get_chunk_contentr   on_llm_new_tokenra   )r1   rS   rJ   rR   rN   rF   rk   rl   	raw_chunkr^   rj   s              r2   r_   zLlamafile._stream   s*    L &%%:4:6:6!#F8V.v.	}}}}o[1"$6 ,,H #!,,D,A 	I--i8G#1E,,5::,>K	 ""22 	%%55//3}}oQ@ 	s   $D3B8 AD8ADDc                ~    |j                  d      r+|j                  d      }t        j                  |      }|d   S |S )zWhen streaming is turned on, llamafile server returns lines like:

        'data: {"content":" They","multimodal":true,"slot_id":0,"stop":false}'

        Here, we convert this to a dict and return the value of the 'content'
        field
        zdata:zdata: r^   )
startswithlstriprZ   loads)r1   rj   cleaneddatas       r2   rs   zLlamafile._get_chunk_content1  s=     G$ll8,G::g&D	?"Lr4   )returnr   )r|   z	List[str])r|   Dict[str, Any]rD   )rJ   Optional[List[str]]rN   r   r|   r}   )NN)
rS   r   rJ   r~   rR   "Optional[CallbackManagerForLLMRun]rN   r   r|   r   )
rS   r   rJ   r~   rR   r   rN   r   r|   zIterator[GenerationChunk])rj   r   r|   r   )#__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r   r   r   r   r    r!   r"   r#   r%   r&   r'   r(   r)   r*   r+   r   model_configpropertyr3   rB   rH   rP   rm   r_   rs   r0   r4   r2   r   r      s0   * ,Hc+;%)O])%ItJ
 D#N# K#E3O E57 E5@ Is<
 FCO
 E5NIu   NE M3$ K "e!A"u"AHc L%GL%FL    .   +/':=	. %):>	:: ": 8	:
 : 
:~ %):>	DD "D 8	D
 D 
#DLr4   r   )
__future__r   rZ   ior   typingr   r   r   r   r	   rc    langchain_core.callbacks.managerr
   #langchain_core.language_models.llmsr   langchain_core.outputsr   langchain_core.utilsr   pydanticr   r   r0   r4   r2   <module>r      s4    "   6 6  E 3 2 9 p pr4   