
    7|h`                     v    d dl mZmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ  G d de
      Zy	)
    )AnyCallableDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)pre_init)Fieldc                      e Zd ZU dZdZeed<   eed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   eZeed
<    eg       Zee   ed<   	  ed      Zeed<   	  ed      Zeed<   	  ed      Zeed<   	  ed      Zeee      ed<   	 edeeef   deeef   fd       Zedefd       ZdedefdZ	 	 ddedeee      dee    dedef
dZ!	 	 ddedeee      dee    dede"e#   f
dZ$y)	ExLlamaV2a+  ExllamaV2 API.

    - working only with GPTQ models for now.
    - Lora models are not supported yet.

    To use, you should have the exllamav2 library installed, and provide the
    path to the Llama model as a named parameter to the constructor.
    Check out:

    Example:
        .. code-block:: python

            from langchain_community.llms import Exllamav2

            llm = Exllamav2(model_path="/path/to/llama/model")

    #TODO:
    - Add loras support
    - Add support for custom settings
    - Add support for custom stop sequences
    Nclient
model_pathexllama_cacheconfig	generator	tokenizersettingslogfuncstop_sequences   max_new_tokensT	streamingverbosedisallowed_tokensvaluesreturnc                 T   	 dd l }|j                  j                         st	        d      	 ddlm}m}m}m	} ddl
m}m}	 |d   }
|
sd |d	<   |d	   }|d
   r|d
   } ||j                         nt        d       |       }|d   |_        |j!                           ||      } ||d      }|j#                  |        ||      }|d   r |	|||      }n
 ||||      }|d   D cg c]   }|j%                         j'                         " c}|d<   t)        |d|d           |d|d           |j+                  d      }|r|j-                  ||       ||d<   ||d<   ||d<   ||d<   ||d<   |S # t        $ r}t        d      |d }~ww xY w# t        $ r t        d      w xY wc c}w )Nr   z@Unable to import torch, please install with `pip install torch`.z/CUDA is not available. ExllamaV2 requires CUDA.)r   ExLlamaV2CacheExLlamaV2ConfigExLlamaV2Tokenizer)ExLlamaV2BaseGeneratorExLlamaV2StreamingGeneratorzCould not import exllamav2 library. Please install the exllamav2 library with (cuda 12.1 is required)example : !python -m pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whlr   c                       y )N )argskwargss     a/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/llms/exllamav2.py<lambda>z0ExLlamaV2.validate_environment.<locals>.<lambda>_   s        r   r   z<settings is required. Custom settings are not supported yet.r   T)lazyr   r   zstop_sequences r   r   r   r   r   r   )torchImportErrorcudais_availableEnvironmentError	exllamav2r   r!   r"   r#   exllamav2.generatorr$   r%   __dict__NotImplementedError	model_dirprepareload_autosplitstriplowersetattrgetdisallow_tokens)clsr   r.   er   r!   r"   r#   r$   r%   r   r   r   r   modelr   r   r   x
disalloweds                       r*   validate_environmentzExLlamaV2.validate_environment>   s    	 zz&&("#TUU	  # <F9#*j)HH%%&%N  !"!,/&!&u48]+&v.	+3E=)TI.umYOI @FFV?W#X!AGGIOO$5#X *F3C,DE/&)9":!;<=ZZ 34
$$Y
; x'{!x'{"/M  	R	$  	^ 	L $Ys(   E0 F '%F%0	F
9FF
F"c                      y)zReturn type of llm.r   r'   )selfs    r*   	_llm_typezExLlamaV2._llm_type   s     r,   textc                 L    | j                   j                  j                  |      S )z-Get the number of tokens present in the text.)r   r   
num_tokens)rF   rH   s     r*   get_num_tokenszExLlamaV2.get_num_tokens   s    ~~''22488r,   promptstoprun_managerr)   c                     | j                   }| j                  r,d}| j                  ||||      D ]  }|t        |      z  } |S |j	                  || j
                  | j                        }|t        |      d  }|S )N )rL   rM   rN   r)   )rL   gen_settingsrJ   )r   r   _streamstrgenerate_simpler   r   len)	rF   rL   rM   rN   r)   r   combined_text_outputchunkoutputs	            r*   _callzExLlamaV2._call   s     NN	>>#% Dk& &  3 %E
2$3 ('..!]].. / F CKM*FMr,   c              +     K   | j                   j                  |      }| j                  j                          | j                  j	                  g        | j                  j                  || j                         d}	 | j                  j                         \  }}}	|dz  }|r|j                  || j                         | |s|| j                  k(  r	 y Zw)Nr      )tokenr   )r   encoder   warmupset_stop_conditionsbegin_streamr   streamon_llm_new_tokenr   r   )
rF   rL   rM   rN   r)   	input_idsgenerated_tokensrW   eos_s
             r*   rR   zExLlamaV2._stream   s      NN))&1	**2.##It}}= NN113ME3!,, LL -  K&$*=*== s   CC)NN)%__name__
__module____qualname____doc__r   r   __annotations__rS   r   r   r   r   r   printr   r   r   r   r   r   intr   boolr   r   r   r   r   rD   propertyrG   rK   r	   rY   r   r   rR   r'   r,   r*   r   r   
   s   , FCOM3FCIsIs Hc GX %b	NDI)=*NC$/DkIt!8$KGT- .34[xS	*87I$sCx. IT#s(^ I IV 3  93 93 9 %):>	 tCy! 67	
  
: %):>	 tCy! 67	
  
/	"r,   r   N)typingr   r   r   r   r   r   langchain_core.callbacksr	   langchain_core.language_modelsr
   langchain_core.outputsr   langchain_core.utilsr   pydanticr   r   r'   r,   r*   <module>rv      s(    @ @ = . 2 ) ~ ~r,   