
    $|h                        d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZ  ej@                  e!      Z" ed
      Z# ed      Z$ G d d      Z%y)    N)	AnyBinaryIO	ContainerDictIteratorListOptionalSetTuple)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)
parse_rectPagePagesc                       e Zd ZdZdedededee   ddf
dZdefd	Z	h d
Z
ededed    fd       Ze	 	 	 	 	 ddedeee      dededededed    fd       Zy)PDFPageaz  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes
    ----------
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).

    docpageidattrslabelreturnNc                    || _         || _        t        |      | _        || _        t        | j                  j                  d            | _        t        | j                  j                  dt                           | _	        t        | j                  d         D cg c]  }t        |       }}t        t        |            | _        | j                  | _        d| j                  v r'	 t        t        | j                  d               | _        t        | j                  j                  dd            dz   dz  | _        | j                  j                  d      | _        | j                  j                  d	      | _        d
| j                  v rt        | j                  d
         }ng }t%        |t&              s|g}|| _        yc c}w # t        $ r Y w xY w)zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxRotater   ih  AnnotsBContentsN)r   r   r   r   r    r   getlastmoddict	resourcesr   mediaboxcropboxr   r   rotateannotsbeads
isinstancelistcontents)selfr   r   r   r    mediabox_parammediabox_paramsr6   s           O/var/www/html/test/engine/venv/lib/python3.12/site-packages/pdfminer/pdfpage.py__init__zPDFPage.__init__0   s}    &



~ >?/7JJNN;/0

 #+4::j+A"B&
 ^$&
 &
 #8O#<=}}

")(4::i3H*IJ !!!<=CsJjjnnX.ZZ^^C(
#

: 67HH(D) zH&.+&
 ! s   G(&G 	GGc                 <    d| j                   d| j                  dS )Nz<PDFPage: Resources=z, MediaBox=>)r.   r/   )r7   s    r:   __repr__zPDFPage.__repr__]   s"    %dnn%7{4==BSSTUU    >   r'   r&   r%   r$   documentc              #      	K   	 d
dt         dt        t        t         f   dt        t        t               dt
        t        t        t        t         t        t         t         f   f   f      f 	fd		 j                         }d}dj                  v rB 	j                  d   j                        }|D ]  \  }}  ||t        |             d} |svj                  D ]f  }|j                         D ]Q  }	 j!                  |      }t#        |t$              r-|j'                  d	      t(        u r  ||t        |             S h y y # t        $ r t        j                  d       }Y w xY w# t*        $ r Y w xY ww)Nobjparentvisitedr!   c              3     K   t        | t              r+| }t        j                  |            j	                         }n%| j
                  }t        |       j	                         }|
t               }||v ry |j                  |       |j                         D ]  \  }}|	j                  v s||vs|||<     |j                  d      }|!t        j                  s|j                  d      }|t        u rCd|v r?t        j                  d|d          t!        |d         D ]  } 
|||      E d {     y |t"        u rt        j                  d|       ||f y y 7 -w)NTypetypeKidszPages: Kids=%rzPage: %r)r4   intr   getobjcopyobjidsetadditemsINHERITABLE_ATTRSr+   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)rB   rC   rD   	object_idobject_propertieskvobject_typechildclsdepth_first_searchr@   s            r:   r]   z0PDFPage.create_pages.<locals>.depth_first_searchd   sd    
 #s#	$.xy/I$J$O$O$Q!  II	$.sO$8$8$:! %G#KK	" -1---!;L2L+,%a(- ,//7K"8??/33F;m+:K0K		*,=f,EF'(9&(AB UE1%9JGTTTU ,		*&78 "344 - Us   B'E+E0A>E.E/.EFr   TrF   )N)r   r   strr	   r
   r   r   rI   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrJ   r4   r-   r+   rU   r   )
r\   r@   page_labelspagesobjectsrL   treexrefrB   r]   s
   ``       @r:   create_pageszPDFPage.create_pagesb   s    
 +/$	5$	5cN$	5 c#h'$	5 eCc4S>&9!::;<	$	5L	13;3K3K3MK h&&&()9)9')BHDTDTUG& t(E4k1BCC   !__. E&ooe4%c40SWWV_5T"%hsD<M"NN	   	1#**40K	1" - sP   A,F 2E A6F 9AE1F E.+F -E..F 1	E=:F <E==F fppagenosmaxpagespasswordcachingcheck_extractablec              #   "  K   t        |      }t        |||      }|j                  s,|rd|z  }	t        |	      d|z  }
t        j                  |
       t        | j                  |            D ]  \  }}|r||vr| |s||dz   k  s y  y w)N)ro   rp   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r   is_extractabler   rS   warning	enumeraterk   )r\   rl   rm   rn   ro   rp   rq   parserr   	error_msgwarning_msgpagenopages                r:   	get_pageszPDFPage.get_pages   s      2&8WE !! @2E	1)<<A DF	F  K(%c&6&6s&;< 	LFDF'1JH
2	s   A?BBB)Nr    TF)__name__
__module____qualname____doc__r   objectr	   r^   r;   r>   rP   classmethodr   rk   r   r   rI   boolr|    r?   r:   r   r      s    .+/+/ +/ 	+/
 }+/ 
+/ZV# V G;K ;HY4G ; ;z  -1"'"" )C.)" 	"
 " "  " 
)	" "r?   r   )&r`   loggingtypingr   r   r   r   r   r   r	   r
   r   pdfminerr   pdfminer.pdfdocumentr   r   r   pdfminer.pdfexceptionsr   r   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   pdfminer.utilsr   	getLoggerr~   rS   rU   rR   r   r   r?   r:   <module>r      sh      W W W  
 D ( I I ! %g! 6{Gk kr?   