
    7|h3                        d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZ erd dlmZmZ d dlmZ  G d	 d
e      Zy)    )annotationsN)
TYPE_CHECKINGAnyAsyncIteratorDict	GeneratorIteratorListMappingOptionalUnion)AsyncCallbackManagerForLLMRunCallbackManagerForLLMRun)LLM)GenerationChunk)RESTfulChatModelHandleRESTfulGenerateModelHandle)LlamaCppGenerateConfigc                  `    e Zd ZU dZdZded<   ded<   	 ded<   	 ded	<   	 	 	 	 d	 	 	 	 	 	 	 d fd
Zedd       Zedd       Z	ddZ
	 	 d	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZe	 	 	 	 d d       Z	 	 d	 	 	 	 	 	 	 	 	 d!dZ	 d	 	 	 	 	 d"dZ xZS )#
Xinferencea	  `Xinference` large-scale model inference service.

    To use, you should have the xinference library installed:

    .. code-block:: bash

       pip install "xinference[all]"

    If you're simply using the services provided by Xinference, you can utilize the xinference_client package:

    .. code-block:: bash

        pip install xinference_client

    Check out: https://github.com/xorbitsai/inference
    To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers

    Example:
        To start a local instance of Xinference, run

        .. code-block:: bash

           $ xinference

        You can also deploy Xinference in a distributed cluster. Here are the steps:

        Starting the supervisor:

        .. code-block:: bash

           $ xinference-supervisor

        Starting the worker:

        .. code-block:: bash

           $ xinference-worker

    Then, launch a model using command line interface (CLI).

    Example:

    .. code-block:: bash

       $ xinference launch -n orca -s 3 -q q4_0

    It will return a model UID. Then, you can use Xinference with LangChain.

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid = {model_uid} # replace model_uid with the model UID return from launching the model
        )

        llm.invoke(
            prompt="Q: where can we visit in the capital of France? A:",
            generate_config={"max_tokens": 1024, "stream": True},
        )

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference
        from langchain.prompts import PromptTemplate

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid={model_uid}, # replace model_uid with the model UID return from launching the model
            stream=True
        )
        prompt = PromptTemplate(
            input=['country'],
            template="Q: where can we visit in the capital of {country}? A:"
        )
        chain = prompt | llm
        chain.stream(input={'country': 'France'})


    To view all the supported builtin models, run:

    .. code-block:: bash

        $ xinference list --all

    NzOptional[Any]clientOptional[str]
server_url	model_uidzDict[str, Any]model_kwargsc                   	 ddl m} |xs i }t	        |   d
i |||d | j                  t        d      | j                  t        d      i | _	        d| _
        | j                          || j                  rd| | j                  d	<    |||      | _        y # t        $ r( 	 ddlm} n# t        $ r}t        d      |d }~ww xY wY w xY w)Nr   )RESTfulClientzCould not import RESTfulClient from xinference. Please install it with `pip install xinference` or `pip install xinference_client`.r   r   r   zPlease provide server URLzPlease provide the model UIDFzBearer Authorization )xinference.clientr   ImportErrorxinference_clientsuper__init__r   
ValueErrorr   _headers_cluster_authed_check_cluster_authenticatedr   )selfr   r   api_keyr   r   e	__class__s          b/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/llms/xinference.pyr%   zXinference.__init__   s    		7 $)r 	
(& ,	
 ??"899>>!;<<(*$))+4#7#7/6wi-@DMM/*#J8?  	; !Y  <	s5   B 	C'B.-C.	C7CCCCc                     y)zReturn type of llm.
xinferencer    r*   s    r.   	_llm_typezXinference._llm_type   s         c                Z    i d| j                   id| j                  id| j                  iS )zGet the identifying parameters.r   r   r   r   r1   s    r.   _identifying_paramszXinference._identifying_params   sC    
T__-
DNN+
 t001
 	
r3   c                   | j                    d}t        j                  |      }|j                  dk(  rd| _        y |j                  dk7  rt        d|j                         d          |j                         }t        |d         | _        y )Nz/v1/cluster/auth  F   z+Failed to get cluster information, detail: detailauth)r   requestsgetstatus_coder(   RuntimeErrorjsonbool)r*   urlresponseresponse_datas       r.   r)   z'Xinference._check_cluster_authenticated   s    !!12<<$3&#(D ##s*"'}}x89;  %MMOM#'f(=#>D r3   c                t   | j                   t        d      | j                   j                  | j                        }|j	                  di       }i | j
                  |}|r||d<   |r4|j	                  d      r#d}| j                  ||||      D ]  }||z  }	 |S |j                  ||      }	|	d   d	   d
   S )aq  Call the xinference model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Returns:
            The generated string by the model.
        Client is not initialized!generate_configstopstream )modelpromptrun_managerrF   rK   rF   choicesr   text)r   r&   	get_modelr   r<   r   _stream_generategenerate)
r*   rK   rG   rL   kwargsrJ   rF   combined_text_outputtoken
completions
             r.   _callzXinference._call   s    $ ;;9::%%dnn54:JJ?PRT4UBT..B/B&*OF#228<#% ..' /	 /  . %-$. (' vWJi(+F33r3   c              #  F  K   |j                  ||      }|D ]  }t        |t              s|j                  dg       }|s)|d   }t        |t              s?|j                  dd      }	|j                  d      }
|r|j	                  |	| j
                  |
       |	  yw)	a^  
        Args:
            prompt: The prompt to use for generation.
            model: The model used for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Yields:
            A string token.
        rM   rN   r   rO   rI   logprobs)rU   verbose	log_probsN)rR   
isinstancedictr<   on_llm_new_tokenrZ   )r*   rJ   rK   rL   rF   streaming_responsechunkrN   choicerU   r[   s              r.   rQ   zXinference._stream_generate   s     $ #^^? , 
 ( 	$E%&))Ir2$QZF!&$/ &

62 6$*JJz$:	&'88&+T\\Y 9  $	$s   )B!B!B!A
B!c              +    K   |j                  di       }i | j                  |}|r||d<   | j                  ||      D ]C  }|s| j                  |      }|r'|j	                  |j
                  | j                         | E y wNrF   rG   )rZ   )r<   r   _create_generate_stream$_stream_response_to_generation_chunkr^   rO   rZ   r*   rK   rG   rL   rS   rF   stream_respr`   s           r.   _streamzXinference._stream  s      !**%6;BT..B/B&*OF#77P 	KAA+N00

 $ 1  	s   ABABc              #     K   | j                   t        d      | j                   j                  | j                        }|j	                  ||      E d {    y 7 w)NrE   rM   )r   r&   rP   r   rR   )r*   rK   rF   rJ   s       r.   rd   z"Xinference._create_generate_stream.  sL      ;;9::%%dnn5>>>QQQs   AAAAc           
     P   d}t        | t              r| j                  dg       }|rj|d   }t        |t              rJ|j                  dd      }t        |t        |j                  dd      |j                  dd            	      S t	        d
      t        |      S t	        d      )z0Convert a stream response to a generation chunk.rI   rN   r   rO   finish_reasonNrY   )rk   rY   )rO   generation_infozchoice type error!)rO   zstream_response type error!)r\   r]   r<   r   	TypeError)stream_responserU   rN   ra   s       r.   re   z/Xinference._stream_response_to_generation_chunk6  s    
 ot,%)))R8G fd+"JJvr2E*"(,*0**_d*K%+ZZ
D%A)  $$899&E229::r3   c               4  K   |j                  di       }i | j                  |}|r||d<   | j                  ||      2 3 d {   }|s| j                  |      }|r/|j	                  |j
                  | j                         d {    | T7 O7 6 y wrc   )r<   r   _acreate_generate_streamre   r^   rO   rZ   rf   s           r.   _astreamzXinference._astreamQ  s      !**%6;BT..B/B&*OF#!%!>!>v!W 	 	+AA+N%66

 $ 7    		 "Xs@   <BBBBB
=BB
BBBBc                 K   | j                   |d}||j                         D ]
  \  }}|||<    t        |xr |j                  d            }t	        j
                         4 d {   }|j                  | j                   d|      4 d {   }|j                  dk7  rA|j                  dk(  rt        d      |j                  }	t        d|j                   d	|	       |j                  2 3 d {   }
|st        j                  |
       $|
j                  d
      }|
j!                  d      sG|t#        d      d  j%                         }|sft        j                  |       7 	7 7 6 d d d       d {  7   n# 1 d {  7  sw Y   nxY wd d d       d {  7   y # 1 d {  7  sw Y   y xY ww)N)rJ   rK   rH   z/v1/completions)rA   r?   r8   r7   z)astream call failed with status code 404.z%astream call failed with status code z. Details: zutf-8s   data:)r   itemsr@   r<   aiohttpClientSessionpostr   statusFileNotFoundErrorrO   r&   contentr?   loadsdecode
startswithlenstrip)r*   rK   rF   request_bodykeyvaluerH   sessionrB   optional_detaillinejson_strs               r.   rp   z#Xinference._acreate_generate_streamf  s     266'R&-335 *
U$)S!* oG/*=*=h*GH((* 	7 	7g||'7! $  7 7 ??c)#-/G  +3--(CHOOCT U))8(9; 
 #+"2"2 	7 	7$!"jj..#';;w#7??84'/H'@'F'F'HH#+ ("&**X"665	77 	7"2!7 7 7 7 7	7 	7 	7 	7 	7s   A%G'E0(G+%F1E3F1AF
1E75E5
6E79>F
88F
0G3F15E77F
8F1FF1
F	FF	F1G*F-+G1G7F:8G?G)NNN)r   r   r   r   r+   r   r   r   )returnstr)r   zMapping[str, Any])r   None)NN)
rK   r   rG   Optional[List[str]]rL   "Optional[CallbackManagerForLLMRun]rS   r   r   r   )
rJ   z=Union['RESTfulGenerateModelHandle', 'RESTfulChatModelHandle']rK   r   rL   r   rF   z"Optional['LlamaCppGenerateConfig']r   zGenerator[str, None, None])
rK   r   rG   r   rL   r   rS   r   r   zIterator[GenerationChunk])N)rK   r   rF   Optional[Dict[str, List[str]]]r   zIterator[str])rn   r   r   r   )
rK   r   rG   r   rL   z'Optional[AsyncCallbackManagerForLLMRun]rS   r   r   zAsyncIterator[GenerationChunk])rK   r   rF   r   r   zAsyncIterator[str])__name__
__module____qualname____doc__r   __annotations__r%   propertyr2   r5   r)   rW   rQ   rh   rd   staticmethodre   rq   rp   __classcell__)r-   s   @r.   r   r      s   Zx !FM &#  : %)#'!%	(9!(9 !(9 	(9
 (9T   
 
?" %):>	*4*4 "*4 8	*4
 *4 
*4` ;?>B!$L!$ !$ 8	!$
 <!$ 
$!$L %):>	 " 8	
  
#, NRRR,JR	R ;;	; ;: %)?C	 " =	
  
(, NR#7#7,J#7	#7r3   r   )
__future__r   r?   typingr   r   r   r   r   r	   r
   r   r   r   rt   r;   langchain_core.callbacksr   r   #langchain_core.language_models.llmsr   langchain_core.outputsr   r!   r   r   xinference.model.llm.corer   r   r    r3   r.   <module>r      sF    "      4 2T@j7 j7r3   