
    7|h\                        d dl mZ d dlZd dlmZmZmZmZmZ d dl	m
Z
mZ  G d de      Z	 	 	 	 	 	 	 	 d
dZ G d d	e      Zy)    )annotationsN)AnyListLiteralOptionalUnion)LanguageTextSplitterc                  >     e Zd ZdZ	 d	 	 	 	 	 	 	 d fdZddZ xZS )CharacterTextSplitterz(Splitting text that looks at characters.c                @    t        |   di | || _        || _        y)Create a new TextSplitter.N )super__init__
_separator_is_separator_regex)self	separatoris_separator_regexkwargs	__class__s       a/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_text_splitters/character.pyr   zCharacterTextSplitter.__init__   s$     	"6"##5     c                    | j                   r| j                  nt        j                  | j                        }t	        ||| j
                        }| j
                  rdn| j                  }| j                  ||      S )&Split incoming text and return chunks. )r   r   reescape_split_text_with_regex_keep_separator_merge_splits)r   textr   splitsr   s        r   
split_textz CharacterTextSplitter.split_text   sc      $77DOORYYt=W 	 (i9M9MN//RT__
!!&*55r   )

F)r   strr   boolr   r   returnNoner#   r'   r)   	List[str])__name__
__module____qualname____doc__r   r%   __classcell__r   s   @r   r   r   	   s7    2 CH66;?6SV6	66r   r   c                   |r|rt        j                  d| d|       }|dk(  r3t        dt        |      dz
  d      D cg c]  }||   ||dz      z    c}n/t        dt        |      d      D cg c]  }||   ||dz      z    c}}t        |      dz  dk(  r||dd  z  }|dk(  r	||d   gz   n|d   g|z   }n"t        j                  ||       }nt	        |       }|D cg c]
  }|dk7  s	| c}S c c}w c c}w c c}w )	N()endr         r   )r   splitrangelenlist)r#   r   keep_separator_splitsir$   ss          r   r    r       s#    hh9+Q/6G "U* 8=QGq@PRS7TU!'!*wq1u~-U<A!S\ST<UVqwqzGAEN2V 
 7|a1$'"#,& "U* 72;-'qzlV+  XXi.Fd)!bA)) VV *s   C0/C5
C:)C:c                  ~     e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 	 	 d fdZd	dZd
dZe	 	 	 	 	 	 dd       Ze	dd       Z
 xZS )RecursiveCharacterTextSplitterzSplitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
    c                P    t        |   dd|i| |xs g d| _        || _        y)r   r>   )r&   
 r   Nr   )r   r   _separatorsr   )r   
separatorsr>   r   r   r   s        r   r   z'RecursiveCharacterTextSplitter.__init__A   s0     	AA&A%@)@#5 r   c                   g }|d   }g }t        |      D ]S  \  }}| j                  r|nt        j                  |      }|dk(  r|} n$t        j                  ||      sI|}||dz   d } n | j                  r|nt        j                  |      }t        ||| j                        }	g }
| j                  rdn|}|	D ]  }| j                  |      | j                  k  r|
j                  |       3|
r%| j                  |
|      }|j                  |       g }
|s|j                  |       n| j                  ||      }|j                  |        |
r#| j                  |
|      }|j                  |       |S )r   r9   r   r7   N)	enumerater   r   r   searchr    r!   _length_function_chunk_sizeappendr"   extend_split_text)r   r#   rH   final_chunksr   new_separatorsr@   _sr   r$   _good_splitsrA   merged_text
other_infos                 r   rP   z*RecursiveCharacterTextSplitter._split_textM   sw   rN	z* 	EAr#77RYYr]JRx	yyT*	!+AEG!4	 #'":":Y		)@T
'j$:N:NO //RY
 	4A$$Q'$*:*::##A&"&"4"4\:"NK ''4#%L% ''*!%!1!1!^!DJ ''
3	4 ,,\:FK,r   c                :    | j                  || j                        S )zSplit the input text into smaller chunks based on predefined separators.

        Args:
            text (str): The input text to be split.

        Returns:
            List[str]: A list of text chunks obtained after splitting.
        )rP   rG   )r   r#   s     r   r%   z)RecursiveCharacterTextSplitter.split_textu   s     d&6&677r   c                :    | j                  |      } | d|dd|S )a  Return an instance of this class based on a specific language.

        This method initializes the text splitter with language-specific separators.

        Args:
            language (Language): The language to configure the text splitter for.
            **kwargs (Any): Additional keyword arguments to customize the splitter.

        Returns:
            RecursiveCharacterTextSplitter: An instance of the text splitter configured
            for the specified language.
        T)rH   r   r   )get_separators_for_language)clslanguager   rH   s       r   from_languagez,RecursiveCharacterTextSplitter.from_language   s)      44X>
LjTLVLLr   c                   | t         j                  k(  s| t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j
                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg d	S | t         j                  k(  rg d
S | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                   k(  rg dS | t         j"                  k(  rg dS | t         j$                  k(  rg dS | t         j&                  k(  rg dS | t         j(                  k(  rg dS | t         j*                  k(  rg dS | t         j,                  k(  rg dS | t         j.                  k(  rg dS | t         j0                  k(  rg dS | t         j2                  k(  rg dS | t         j4                  v rt7        d|  d      t7        d|  dt9        t                      )a
  Retrieve a list of separators specific to the given language.

        Args:
            language (Language): The language for which to get the separators.

        Returns:
            List[str]: A list of separators appropriate for the specified language.
        )
class z
void z
int z
float z
double 
if 
for 
while 
switch 
case r&   rE   rF   r   )
func 
var 
const 
type r_   r`   rb   rc   r&   rE   rF   r   )r^   
public 
protected 	
private 
static r_   r`   ra   rb   rc   r&   rE   rF   r   )r^   rh   ri   rj   z

internal z
companion z
fun 
val re   r_   r`   ra   z
when rc   
else r&   rE   rF   r   )

function rf   
let re   r^   r_   r`   ra   rb   rc   	
default r&   rE   rF   r   )
enum 
interface z
namespace rg   r^   rn   rf   ro   re   r_   r`   ra   rb   rc   rp   r&   rE   rF   r   )rn   r^   r_   	
foreach ra   
do rb   rc   r&   rE   rF   r   )
z	
message z	
service rq   z
option 
import z
syntax r&   rE   rF   r   )r^   
def z
	def r&   rE   rF   r   )z
=+
z
-+
z
\*+
z

.. *

r&   rE   rF   r   )rv   r^   r_   
unless ra   r`   rt   z
begin z
rescue r&   rE   rF   r   )rv   z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop r_   rw   ra   rc   z
cond z
with r`   rt   r&   rE   rF   r   )z
fn rf   ro   r_   ra   r`   z
loop 
match rf   r&   rE   rF   r   )r^   z
object rv   rl   re   r_   r`   ra   rx   rc   r&   rE   rF   r   )rd   r^   
struct rq   r_   r`   ra   rt   rb   rc   r&   rE   rF   r   )	z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
r&   rE   rF   r   )z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\\begin{align}z$$$rF   r   )z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titler   )rr   rq   z
implements z

delegate 
event r^   z

abstract rh   ri   rj   rk   z
return r_   z

continue r`   rs   ra   rb   z
break rc   rm   
try z
throw 	
finally 
catch r&   rE   rF   r   )z
pragma z
using z

contract rr   z	
library z
constructor rg   rn   r{   z

modifier z
error ry   rq   r_   r`   ra   z

do while z

assembly r&   rE   rF   r   )z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.rE   rF   r   )
z
local rn   r_   r`   ra   z
repeat r&   rE   rF   r   )z	
main :: z
main = ro   z
in rt   z
where 
:: z
= 
data z	
newtype rg   r   z
module ru   z
qualified z
import qualified r^   z

instance rc   z
| r   z
= {z
, r&   rE   rF   r   )rn   z
param r_   rs   r`   ra   rb   r^   r|   r~   r}   r&   rE   rF   r   z	Language z is not implemented yet!z& is not supported! Please choose from )r	   CCPPGOJAVAKOTLINJSTSPHPPROTOPYTHONRSTRUBYELIXIRRUSTSCALASWIFTMARKDOWNLATEXHTMLCSHARPSOLCOBOLLUAHASKELL
POWERSHELL_value2member_map_
ValueErrorr=   )r[   s    r   rY   z:RecursiveCharacterTextSplitter.get_separators_for_language   s    xzz!X%= ( $ " & ( ( 2 $ ( $ 2 % $ ' & (
 
 %  & $ ( . & $ ' ( ' ( *** & ' . & > (# #H % : ' > %   )))% %L ,,, . 444y
2JKLLH: &&&*8n%57 r   )NTF)
rH   zOptional[List[str]]r>   $Union[bool, Literal['start', 'end']]r   r(   r   r   r)   r*   )r#   r'   rH   r,   r)   r,   r+   )r[   r	   r   r   r)   rC   )r[   r	   r)   r,   )r-   r.   r/   r0   r   rP   r%   classmethodr\   staticmethodrY   r1   r2   s   @r   rC   rC   :   s     +/?C#(	
6'
6 =
6 !	
6
 
6 

6&P	8 MM+.M	'M M$ | |r   rC   )r#   r'   r   r'   r>   r   r)   r,   )
__future__r   r   typingr   r   r   r   r   langchain_text_splitters.baser	   r
   r   r    rC   r   r   r   <module>r      sS    " 	 6 6 @6L 6,*
**/S**6V
\ V
r   