
    '}h:O                        U d dl Z d dlmZ d dlmZmZmZ d dlmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d
dgZe j6                  j8                  Zd Zi Ze
e	e	f   ed<   d Z d*dZ! e!ejD                        ddde#fd       Z$ e!ejJ                        d+de#fd       Z& e!ejN                        d+de#fd       Z( e!ejR                        d+de#fd       Z*	 d*dee#   dee#   dee#   de+de#f
dZ, e!ejZ                  ej\                  g      ddde#fd       Z/ e!ej`                        de#fd       Z1d Z2 e!ejf                  ejh                  g      ddde#fd       Z5d Z6 e!ejn                  ejp                  g      ddde#fd        Z9ejD                  e$ejJ                  e&ejN                  e(ejR                  e*ejZ                  e/ej\                  e/ej`                  e1ejf                  e5ejh                  e5ejn                  e9ejp                  e9iZd! Z:g d"Z;d# Z<d$ Z=d% Z>d& Z? G d' d
e      Z@ G d( d)e      ZAy),    N)tree_maptree_flattentree_unflatten)ListAnyDictOptionalUnion
NamedTuple)defaultdict)TorchDispatchMode)RemovableHandle)register_decompositionprodwrapsFlopCounterModeregister_flop_formulac                 R    t        | t        j                        r| j                  S | S N)
isinstancetorchTensorshape)is    W/var/www/html/test/engine/venv/lib/python3.12/site-packages/torch/utils/flop_counter.py	get_shaper      s    !U\\"wwH    flop_registryc                 4     t               d d fd
       }|S )N)outc                 F    t        t        ||| f      \  }}} |d|i|S )N	out_shape)r   r   )r"   argskwargsr$   fs       r   nfzshape_wrapper.<locals>.nf   s2    "*9tVS6I"Jfi$6)6v66r   r   r'   r(   s   ` r   shape_wrapperr*      s#    
1X 7 7 Ir   c                       fd}|S )Nc                 R    st        |       }  t        t        d      |        | S )NT)registryunsafe)r*   r   r    )flop_formulaget_rawtargetss    r   register_funz+register_flop_formula.<locals>.register_fun!   s*    (6LLwtL\Zr    )r1   r0   r2   s   `` r   r   r       s     r   )r$   returnc                :    | \  }}|\  }}||k(  sJ ||z  dz  |z  S )zCount flops for matmul.   r3   )	a_shapeb_shaper$   r%   r&   mkk2ns	            r   mm_flopr=   )   s3    
 DAqEB7N7q519q=r   c                     t        ||      S )zCount flops for addmm.)r=   
self_shaper7   r8   r$   r&   s        r   
addmm_floprA   4   s     7G$$r   c                 V    | \  }}}|\  }}}	||k(  sJ ||k(  sJ ||z  |	z  dz  |z  }
|
S )z"Count flops for the bmm operation.r6   r3   )r7   r8   r$   r&   br9   r:   b2r;   r<   flops              r   bmm_floprF   9   sK    
 GAq!IBA7N77N7q519q=1DKr   c                     t        ||      S )z&Count flops for the baddbmm operation.rF   r?   s        r   baddbmm_floprI   F   s    
 GW%%r   x_shapew_shaper$   
transposedc                 t    | d   }|r| n|dd }|^}}}	 t        |      t        |      z  |z  |z  |z  dz  }	|	S )a  Count flops for convolution.

    Note only multiplication is
    counted. Computation for bias are ignored.
    Flops for a transposed convolution are calculated as
    flops = (x_shape[2:] * prod(w_shape) * batch_size).
    Args:
        x_shape (list(int)): The input shape before convolution.
        w_shape (list(int)): The filter shape.
        out_shape (list(int)): The output shape after convolution.
        transposed (bool): is the convolution transposed
    Returns:
        int: the number of flops
    r   r6   Nr   )
rJ   rK   r$   rL   
batch_size
conv_shapec_outc_infilter_sizerE   s
             r   conv_flop_countrS   N   s]    * J''Y;J 'E4+ 
d;//*<uDtKaODKr   c                     t        | |||      S )zCount flops for convolution.rL   )rS   )
rJ   rK   _bias_stride_padding	_dilationrL   r$   r%   r&   s
             r   	conv_floprZ   u   s     7GY:NNr   c                    d }d}	 |
d   r t        |d         }|t        | |||       z  }|
d   rZt        |d         }|r&|t         ||        ||       ||      d      z  }|S |t         ||       ||        ||      d      z  }|S )Nc                 4    | d   | d   gt        | dd        z   S )N   r   r6   )list)r   s    r   tzconv_backward_flop.<locals>.t   s$    a%(#d59o55r   r   r]   FrU   )r   rS   )grad_out_shaperJ   rK   rV   rW   rX   rY   rL   _output_padding_groupsoutput_maskr$   r_   
flop_countgrad_input_shapegrad_weight_shapes                   r   conv_backward_floprg   {   s    6JDL 1~$Yq\2ong?OU_Q_``
1~%il3/!N*;QwZK\I]joppJ
  /!G*a6GK\I]joppJr   c                     | \  }}}}|\  }}}	}
|\  }}}}||cxk(  r|k(  r"n J ||cxk(  r|k(  rn J ||
k(  r
|	|k(  r||
k(  sJ d}|t        ||z  ||f||z  ||	f      z  }|t        ||z  ||	f||z  |	|f      z  }|S )z^
    Count flops for self-attention.

    NB: We can assume that value_shape == key_shape
    r   rH   )query_shape	key_shapevalue_shaperC   hs_qd_q_b2_h2s_k_d2_b3_h3_s3d_vtotal_flopss                   r   sdpa_flop_countrx      s     !NAq#s"Cc3$Cc3?s?[[qC3[[3#:#*QTX[Q[[[K8QUC-AsC/@AAK8QUC-AsC/@AAKr   c                    t        | ||      S )zCount flops for self-attention.)rx   )ri   rj   rk   r$   r%   r&   s         r   	sdpa_floprz      s     ;	;??r   c                    d}|\  }}}}|\  }	}
}}|\  }}}}| \  }}}}||	cxk(  r|cxk(  r|k(  rn J ||
cxk(  r|cxk(  r|k(  r	n J ||k(  sJ ||k(  r
||k(  r||k(  sJ d}|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|S )Nr   rH   )r`   ri   rj   rk   rw   rC   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   _b4_h4_s4_d4s                        r   sdpa_backward_flop_countr      sf   K NAq#s"Cc3$Cc3'Cc3!s!c!KKa3&<#&<&<KKKK#:#*33K 8QUC-AsC/@AAK 8QUC-AsC/@AAK8QUC-AsC/@AAK 8QUC-AsC/@AAK8QUC-AsC/@AAKr   c                    t        | |||      S )z(Count flops for self-attention backward.)r   )r`   ri   rj   rk   r$   r%   r&   s          r   sdpa_backward_flopr     s     $NKKXXr   c                 ,    t        | t              s| fS | S r   )r   tuplexs    r   normalize_tupler   *  s    atHr   ) KMBTc                     t        dt        t        t              dz
  t        t	        |             dz
  dz              }t        |   S )Nr   r]   r6      )maxminlensuffixesstr)numberindexs     r   get_suffix_strr   3  s=     3s8}q(3s6{+;a+?A*EFGEE?r   c                 X    t         j                  |      }| d|z  z  d}|t         |   z   S )Ni  z.3f)r   r   )r   suffixr   values       r   convert_num_with_suffixr   :  s2    NN6"E%c*E8E?""r   c                     |dk(  ry| |z  dS )Nr   0%z.2%r3   )numdenoms     r   convert_to_percent_strr   A  s    zEk#r   c                 .     t                fd       }|S )Nc                 B    t        |       \  }} | }t        ||      S r   )r   r   )r%   	flat_argsspecr"   r'   s       r   r(   z)_pytreeify_preserve_structure.<locals>.nfG  s'    &t,	4mc4((r   r   r)   s   ` r   _pytreeify_preserve_structurer   F  s     
1X) )
 Ir   c                   $    e Zd ZdZ	 	 	 	 ddeeej                  j                  e	ej                  j                     f      de
dedeeeef      fdZd Zd Zd	 Zd
 Zd Zd Zde
fdZdeeeee
f   f   fdZddZ fdZ fdZddZ xZS )r   a  
    ``FlopCounterMode`` is a context manager that counts the number of flops within its context.

    It does this using a ``TorchDispatchMode``.

    It also supports hierarchical output by passing a module (or list of
    modules) to FlopCounterMode on construction. If you do not need hierarchical
    output, you do not need to use it with a module.

    Example usage

    .. code-block:: python

        mod = ...
        flop_counter = FlopCounterMode(mod)
        with flop_counter:
            mod.sum().backward()

    modsdepthdisplaycustom_mappingc                 p   t        d       | _        || _        dg| _        d| _        || _        |i }t        |t        j                  j                        r|g}|| _
        i | _        i t        |j                         D ci c]   \  }}|t        |dd      r|n
t        |      " c}}| _        y c c}}w )Nc                       t        t              S r   )r   intr3   r   r   <lambda>z*FlopCounterMode.__init__.<locals>.<lambda>k  s    +VYJZ r   GlobalF_get_raw)r   flop_countsr   parentsin_backwardr   r   r   nnModuler   _module_to_forward_hook_handlesr    itemsgetattrr*   )selfr   r   r   r   r:   vs          r   __init__zFlopCounterMode.__init__e  s     7BBZ6[
 z !NdEHHOO,6D	UW,

WeWkWkWmntqRSqwq*e4!-:JJn
ns   %B2c                    | j                   y | j                   D ]  }t        |      j                  }t        |j	                               j                         D ]y  \  }}|dk(  r|}ndj                  ||g      }|j                  | j                  |            }|j                  | j                  |            }t        ||      | j                  |<   {  y )Nr   .)r   type__name__dictnamed_modulesr   joinregister_forward_pre_hook_enter_moduleregister_forward_hook_exit_module_ForwardHookHandlesr   )r   modprefixnamemoduleforward_pre_hook_handleforward_hook_handles          r   _register_forward_hooksz'FlopCounterMode._register_forward_hooks|  s    9999 	C#Y''F $S%6%6%8 9 ? ? A 
f2:!D88VTN3D*0*J*J4K]K]^bKc*d'&,&B&B4CTCTUYCZ&[#?R+-@@44V<
	r   c                     | j                   j                         D ](  }|d   j                          |d   j                          * | j                   j                          y )Nr   r]   )r   valuesremoveclear)r   forward_hook_handless     r   _deregister_forward_hooksz)FlopCounterMode._deregister_forward_hooks  sW    $($H$H$O$O$Q 	-  #**, #**,	- 	,,224r   c                       fd}|S )Nc                 H     t        j                              |      }|S r   )r   _create_pre_module)r   inputsr"   r   r   s      r   r'   z(FlopCounterMode._enter_module.<locals>.f  s%    N/0G0G0MNvVCJr   r3   r   r   r'   s   `` r   r   zFlopCounterMode._enter_module  s    	 r   c                       fd}|S )Nc                 H     t        j                              |      }|S r   )r   _create_post_module)r   r   outputsr   r   s      r   r'   z'FlopCounterMode._exit_module.<locals>.f  s&    S3D4L4LT4RST[\GNr   r3   r   s   `` r   r   zFlopCounterMode._exit_module  s    	 r   c                 j      G  fddt         j                  j                        }|j                  S )Nc                   :    e Zd Ze fd       Ze fd       Zy)6FlopCounterMode._create_post_module.<locals>.PushStatec                     j                   d   k(  sJ j                   d    d        j                   j                          t        d |      }|S )Nz is not c                 Z    t        | t        j                        r| j                         S | S r   r   r   r   cloner   s    r   r   zPFlopCounterMode._create_post_module.<locals>.PushState.forward.<locals>.<lambda>      z!U\\7R!'') XY r   )r   popr   ctxr%   r   r   s     r   forwardz>FlopCounterMode._create_post_module.<locals>.PushState.forward  sW    ||B'4/TDLL4D3EXdV1TT/  " Y[_`r   c                 L    d_         j                  j                         |S )NT)r   r   appendr   	grad_outsr   r   s     r   backwardz?FlopCounterMode._create_post_module.<locals>.PushState.backward  s#    #' ##D)  r   Nr   
__module____qualname__staticmethodr   r   r   r   s   r   	PushStater     s)      ! !r   r   r   autogradFunctionapply)r   r   r   s   `` r   r   z#FlopCounterMode._create_post_module  s%    	!// 	! r   c                 j      G  fddt         j                  j                        }|j                  S )Nc                   :    e Zd Ze fd       Ze fd       Zy)4FlopCounterMode._create_pre_module.<locals>.PopStatec                     j                   rdg_        d_         j                  j                         t        d |      }|S )Nr   Tc                 Z    t        | t        j                        r| j                         S | S r   r   r   s    r   r   zNFlopCounterMode._create_pre_module.<locals>.PopState.forward.<locals>.<lambda>  r   r   )r   r   r   r   r   s     r   r   z<FlopCounterMode._create_pre_module.<locals>.PopState.forward  sB    ##$,:DL'+D$##D) Y[_`r   c                 d    j                   d   k(  sJ j                   j                          |S )Nr   )r   r   r   s     r   r   z=FlopCounterMode._create_pre_module.<locals>.PopState.backward  s0    ||B'4///  "  r   Nr   r   s   r   PopStater     s)      ! !r   r   r   )r   r   r   s   `` r   r   z"FlopCounterMode._create_pre_module  s%    	!u~~.. 	!  ~~r   r4   c                 N    t        | j                  d   j                               S )Nr   )sumr   r   )r   s    r   get_total_flopszFlopCounterMode.get_total_flops  s!    4##H-44677r   c                 |    | j                   j                         D ci c]  \  }}|t        |       c}}S c c}}w )a  Return the flop counts as a dictionary of dictionaries.

        The outer
        dictionary is keyed by module name, and the inner dictionary is keyed by
        operation name.

        Returns:
            Dict[str, Dict[Any, int]]: The flop counts as a dictionary.
        )r   r   r   )r   r:   r   s      r   get_flop_countszFlopCounterMode.get_flop_counts  s3     (,'7'7'='='?@tq!47
@@@s   8c                      | j                   }|d}dd l}d|_        g d}g } j                         t	              d fd} j
                  j                         D ]?  }|dk(  r	|j                  d      d	z   }||kD  r# |||d	z
        }|j                  |       A d j
                  v r2s0t        |      D ]  \  }	}
d
||	   d   z   ||	   d<     |dd      |z   }t        |      dk(  rg dg}|j                  ||d      S )Ni?B r   T)r   FLOPz% TotalFc           	         t        
j                  |    j                               }	|k\  z  	d|z  }g }|j                  || z   t	        |      t        |      g       
j                  |    j                         D ]<  \  }}|j                  |dz   t        |      z   t	        |      t        |      g       > |S )N z - )r  r   r   r   r   r   r   r   )mod_namer   rw   paddingr   r:   r   global_flopsglobal_suffixis_global_subsumedr   s          r   process_modz.FlopCounterMode.get_table.<locals>.process_mod  s     d..x8??ABK+"==EkGFMM("']C&{LA 
 ((288: 1eOc!f,+A}=*1l;  Mr   r   r   r]   r  )r   0r   )leftrightr  )headerscolalign)r   tabulatePRESERVE_WHITESPACEr  r   r   keyscountextend	enumerater   )r   r   r  headerr   r  r   	mod_depth
cur_valuesidxr   r  r  r  s   `          @@@r   	get_tablezFlopCounterMode.get_table  s;   =JJE=E'+$.++-&|4"	, ##((* 	&Ch		#*I5 $S)a-8JMM*%	& t'''0B'/ 6
U!$vc{1~!5sA6 !1-6Fv;!+,F  B\ ]]r   c                 x    | j                   j                          | j                          t        |           | S r   )r   r   r   super	__enter__)r   	__class__s    r   r!  zFlopCounterMode.__enter__  s1     $$&r   c                     | j                   r$t        | j                  | j                               | j	                          t        |   |  y r   )r   printr  r   r   r   __exit__)r   r%   r"  s     r   r%  zFlopCounterMode.__exit__  s8    <<$..,-&&($r   c                 r   |r|ni } ||i |}|j                   }|| j                  v r| j                  |   } ||i |d|i}t        t        | j                              t        | j                        k7  rt        d       t        | j                        D ]  }	| j                  |	   |xx   |z  cc<    |S )Nr"   zThe module hierarchy tracking seems to be messed up.Please file a bug or just run the flop counter withouttracking the module hierarchy (i.e. `with FlopCounterMode():`))_overloadpacketr    r   setr   r$  r   )
r   functypesr%   r&   r"   func_packetflop_count_funcrd   pars
             r   __torch_dispatch__z"FlopCounterMode.__torch_dispatch__  s    !rD#F#**$,,,"00=O($B&BcBJ3t||$%T\\)::U
 4<<( A  %k2j@2A 
r   )Nr6   TNr   )r3   N)r   r   r   __doc__r	   r
   r   r   r   r   r   boolr   r   r   r   r   r   r   r   r   r  r   r  r  r!  r%  r.  __classcell__)r"  s   @r   r   r   P  s    , MQ 7;
5$uxx2G!GHI
 
 	

 %T#s(^4
."5"&8 8
Ac4S>&9!: 
A:^x r   c                   "    e Zd ZU eed<   eed<   y)r   r   r   N)r   r   r   r   __annotations__r3   r   r   r   r   -  s    ,,((r   r   )Fr   )Br   torch.nnr   torch.utils._pytreer   r   r   typingr   r   r   r	   r
   r   collectionsr   torch.utils._python_dispatchr   torch.utils.hooksr   torch._decompr   mathr   	functoolsr   __all__opsatenr   r    r3  r*   r   mmr   r=   addmmrA   bmmrF   baddbmmrI   r0  rS   convolution_convolutionrZ   convolution_backwardrg   rx   '_scaled_dot_product_efficient_attention#_scaled_dot_product_flash_attentionrz   r   0_scaled_dot_product_efficient_attention_backward,_scaled_dot_product_flash_attention_backwardr   r   r   r   r   r   r   r   r   r3   r   r   <module>rK     s     F F ? ? # : - 0   5
6yy~~
 !#tCH~ " tww/3 #    tzz"%# % #% txx 
C 
 !
 t||$&C & %& 	%#Y%#Y% Cy% 	%
 	%N (($*;*;<=bf Oux O >O
 t001e e 2eN$ DDdFnFnopDH @WZ @ q@6 MMt  PA  PA  B  C]a Yps Y CY
 	GGWJJ
HHhLL,iy100),,i99;M557I $# 
[' [z)* )r   