
    7|h                    B    d dl mZ d dlmZmZ d dlmZ  G d de      Zy)    )annotations)AnyList)TextSplitterc                  J     e Zd ZdZ	 	 ddd	 	 	 	 	 	 	 	 	 d fdZddZ xZS )	NLTKTextSplitterz"Splitting text using NLTK package.F)use_span_tokenizec               :   t        |   di | || _        || _        || _        | j                  r| j                  dk7  rt        d      	 | j                  rddlm}  || j                        | _        yddlm	} || _        y# t        $ r t        d      w xY w)	zInitialize the NLTK splitter. z6When use_span_tokenize is True, separator should be ''r   )_get_punkt_tokenizer)sent_tokenizezANLTK is not installed, please install it with `pip install nltk`.N )super__init__
_separator	_language_use_span_tokenize
ValueErrornltk.tokenizer   
_tokenizerr   ImportError)self	separatorlanguager	   kwargsr   r   	__class__s          \/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_text_splitters/nltk.pyr   zNLTKTextSplitter.__init__   s     	"6"#!"3""t"'<UVV	&&>"6t~~"F7"/ 	S 	s   )B 7B Bc                j   | j                   rot        | j                  j                  |            }g }t	        |      D ]:  \  }\  }}|dkD  r||dz
     d   }||| ||| z   }n||| }|j                  |       < n| j                  || j                        }| j                  || j                        S )z&Split incoming text and return chunks.r      )r   )	r   listr   span_tokenize	enumerateappendr   _merge_splitsr   )	r   textspanssplitsistartendprev_endsentences	            r   
split_textzNLTKTextSplitter.split_text(   s     ""66t<=EF#,U#3 (<E3q5$QU|AH#HU3d5oEH#E#Hh'( __TDNN_CF!!&$//::    )z

english)
r   strr   r0   r	   boolr   r   returnNone)r%   r0   r2   z	List[str])__name__
__module____qualname____doc__r   r-   __classcell__)r   s   @r   r   r      sR    ,  !
 #( 
    
:;r.   r   N)
__future__r   typingr   r   langchain_text_splitters.baser   r   r   r.   r   <module>r<      s    "  6/;| /;r.   