
    7|hO                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ  ej                   e      Z G d de      Zy)    N)Path)AnyDictIteratorPatternUnion)Document)
BaseLoaderc                   j   e Zd ZU dZ ej
                  dej                        Zee	d<    ej
                  dej                        Z
ee	d<    ej
                  d      Zee	d<    ej
                  dej                        Zee	d	<    ej
                  d
ej                        Zee	d<    ej
                  dej                        Zee	d<   	 	 d!deeef   dedefdZdeeef   dej,                  defdZdedeeef   defdZdedefdZdedefdZdedefdZdedefdZdedefdZ de!e"   fdZ#y )"ObsidianLoaderz%Load `Obsidian` files from directory.z^---\n(.*?)\n---\nFRONT_MATTER_REGEXz	{{(.*?)}}TEMPLATE_VARIABLE_REGEXz[^\S\/]#([a-zA-Z_]+[-_/\w]*)	TAG_REGEXz^\s*(\w+)::\s*(.*)$DATAVIEW_LINE_REGEXz\[(\w+)::\s*(.*)\]DATAVIEW_INLINE_BRACKET_REGEXz\((\w+)::\s*(.*)\)DATAVIEW_INLINE_PAREN_REGEXpathencodingcollect_metadatac                 .    || _         || _        || _        y)a%  Initialize with a path.

        Args:
            path: Path to the directory containing the Obsidian files.
            encoding: Charset encoding, defaults to "UTF-8"
            collect_metadata: Whether to collect metadata from the front matter.
                Defaults to True.
        N)	file_pathr   r   )selfr   r   r   s       l/var/www/html/test/engine/venv/lib/python3.12/site-packages/langchain_community/document_loaders/obsidian.py__init__zObsidianLoader.__init__   s       0    placeholdersmatchreturnc                 L    dt        |       d}|j                  d      ||<   |S )z/Replace a template variable with a placeholder.__TEMPLATE_VAR___   )lengroup)r   r   r   placeholders       r   _replace_template_varz$ObsidianLoader._replace_template_var/   s0     (L(9':"=$)KKN[!r   objc                 t   t        |t              r0|j                         D ]  \  }}|j                  |d| d      } |S t        |t              r/|j                         D ]  \  }}| j                  ||      ||<    |S t        |t              r(t        |      D ]  \  }}| j                  ||      ||<    |S )zIRestore template variables replaced with placeholders to original values.z{{z}})
isinstancestritemsreplacedict_restore_template_varslist	enumerate)r   r'   r   r%   valuekeyiitems           r   r.   z%ObsidianLoader._restore_template_vars7   s    c3&2&8&8&: C"Ukk+eWD/ABC 
 T"!iik L
U66ulKCL
 
 T"$S> I444T<HAI
r   contentc                    | j                   si S | j                  j                  |      }|si S i }t        j                  | j
                  |      }| j                  j                  ||j                  d            }	 t        j                  |      }| j                  ||      }d|v r*t        |d   t              r|d   j                  d      |d<   |S # t        j                  j                   $ r t"        j%                  d       i cY S w xY w)zEParse front matter metadata from the content and return it as a dict.r"   tagsz, z Encountered non-yaml frontmatter)r   r   search	functoolspartialr&   r   subr$   yaml	safe_loadr.   r)   r*   splitparserParserErrorloggerwarning)r   r5   r   r   replace_template_varfront_matter_textfront_matters          r   _parse_front_matterz"ObsidianLoader._parse_front_matterD   s    $$I''..w7I')(00&& 
 !88<< %++a.
	>>*;<L66|\RL %*\&5I3*O'3F';'A'A$'GV${{&& 	NN=>I	s   <AC 4D
	D
metadatac                     i }|j                         D ]5  \  }}t        |      t        t        t        hv r|||<   (t        |      ||<   7 |S )z4Convert a dictionary to a compatible with langchain.)r+   typer*   intfloat)r   rG   resultr2   r1   s        r   !_to_langchain_compatible_metadataz0ObsidianLoader._to_langchain_compatible_metadatab   sQ    "..* 	)JCE{sC//#s!%js		)
 r   c                     | j                   s
t               S | j                  j                  |      }|s
t               S |D ch c]  }| c}S c c}w )z0Return a set of all tags in within the document.)r   setr   findall)r   r5   r   tags       r   _parse_document_tagsz#ObsidianLoader._parse_document_tagsl   sD    $$5L&&w/5L$%%%%s   	Ac                 `   | j                   si S i | j                  j                  |      D ci c]  }|d   |d    c}| j                  j                  |      D ci c]  }|d   |d    c}| j                  j                  |      D ci c]  }|d   |d    c}S c c}w c c}w c c}w )zWParse obsidian dataview plugin fields from the content and return it
        as a dict.r   r"   )r   r   rP   r   r   )r   r5   r   s      r   _parse_dataview_fieldsz%ObsidianLoader._parse_dataview_fieldsw   s     $$I
 "55==gF a%("
 "==EEgN a%("
 "??GGP a%("
 	
s   B!B&B+c                 V    | j                   s|S | j                  j                  d|      S )z4Remove front matter metadata from the given content. )r   r   r;   )r   r5   s     r   _remove_front_matterz#ObsidianLoader._remove_front_matter   s)    $$N&&**2w77r   c           
   #   $  K   t        t        | j                        j                  d            }|D ]L  }t	        || j
                        5 }|j                         }d d d        | j                        }| j                  |      }| j                  |      }| j                  |      }t        |j                        t        |      |j                         j                  |j                         j                  |j                         j                   d| j#                  |      |}|s|j%                  d      r4dj'                  |t)        |j%                  dg       xs g       z        |d<   t+        ||       O y # 1 sw Y   'xY ww)Nz**/*.md)r   )sourcer   createdlast_modifiedlast_accessedr7   ,)page_contentrG   )r/   r   r   globopenr   readrF   rR   rT   rW   r*   namestatst_ctimest_mtimest_atimerM   getjoinrO   r	   )	r   pathsr   ftextrE   r7   dataview_fieldsrG   s	            r   	lazy_loadzObsidianLoader.lazy_load   s^    T$..)..y9: 	ADdT]]3  qvvx   33D9L,,T2D"99$?O,,T2Ddii.D	99;//!%!5!5!%!5!5 88F "H |''/#&883|//;ArBB$  x@@/	A   s   AFFD%FF	FN)zUTF-8T)$__name__
__module____qualname____doc__recompileDOTALLr   r   __annotations__r   r   	MULTILINEr   r   r   r   r*   r   boolr   r   Matchr&   r   r.   r-   rF   rM   rO   rR   rT   rW   r   r	   rm    r   r   r   r      s   /","**-BBII"NN'1rzz,		'JWJ#$CDIwD#-2::.Dbll#SS-7RZZr||.!7  ,62::r||,   !%	1CI1 1 	1$ cN3588	# T#s(^ PS 3 4 <$ 4 	&C 	&C 	&
c 
d 
*8C 8C 8A8H- Ar   r   )r9   loggingrr   pathlibr   typingr   r   r   r   r   r<   langchain_core.documentsr	   )langchain_community.document_loaders.baser
   	getLoggerrn   rA   r   ry   r   r   <module>r      sB      	  6 6  - @			8	$\AZ \Ar   