
    '}h                     h   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlmZ g dZ G d de      Zd	 Zd
 Zd Zd Z G d d      Z G d d      Z edg d      Z G d de      Z G d de      Z G d de      Z G d d      Zd ZdZ dZ!d Z"d!dZ#	 	 	 	 	 	 	 	 	 d"d Z$y)#    N)defaultdict
namedtuple)
attrgetter)AnyDictListOptionalTuple)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                        e Zd ZdZ fdZd Zd Zd Zd Zd Z	e
d        Z	 	 	 	 	 	 	 dd	Zd
 Zd ZdedefdZddZd Z xZS )r   z'A list of Events (for pretty printing).c                     |j                  dd      }|j                  dd       }|j                  dd      }|j                  dd      }t        |   |i | || _        || _        || _        d| _        || _        y )Nuse_cudaT
use_deviceprofile_memoryF
with_flops)popsuper__init__	_use_cuda_use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   r   	__class__s          [/var/www/html/test/engine/venv/lib/python3.12/site-packages/torch/autograd/profiler_util.pyr   zEventList.__init__   s}    ::j$/ZZd3
$4e<ZZe4
$)&)!%- %    c                 r    | j                          | j                          | j                          d| _        y )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr    r"   s    r&   _build_treezEventList._build_tree(   s.    ##% &&(r'   c                 "    | j                         S N)tabler,   s    r&   __str__zEventList.__str__.   s    zz|r'   c                    	 t               }t        t        |             D ]  }| |   j                  | |   j                  j                  | |   j                  k(  s=t        | |   j                  j
                        dk(  sc| |   j
                  | |   j                  _        | |   j                  | |   j                  _        | |   j
                  D ]  }| |   j                  |_         |j                  |        t        |      dk(  ry t        |       D cg c]  \  }}||vs| }}}| j                          | j                  |       Rc c}}w )N   r   )setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r"   	to_deleteidxchindevnew_evtss          r&   r*   zEventList._remove_dup_nodes1   s2   ISY' 
'I((4S	,,11T#Y^^CDI00==>!C8<S	8N8NDI((53793D3DDI((0"3i44 =(,S	(<(<=MM#&
' 9~"*3D/RwsBS	=QRHRJJLKK!#  Ss   E*Ec                    | D cg c]-  }|j                   s|j                  t        j                  k(  r|/ }}t	        |t        d            }t        j                  |d       }|D ]   \  }}t	        |d       }g }d}	|D ]  }
t        |      dkD  r|d   }|
j                  j                  |j                  j                  k\  s-|
j                  j                  |j                  j                  kD  r|j                          nC|j                  |
       |
j                  J d|
j                          |
j!                  |       nt        |      dkD  r|j#                  |
         yc c}w )	a4  Populate child events into each underlying FunctionEvent object.

        One event is a child of another if [s1, e1) is inside [s2, e2). Where
        s1 and e1 would be start and end of the child event's interval. And
        s2 and e2 start and end of the parent event's interval

        Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
        be a parent of two other intervals.

        If for any reason two intervals intersect only partially, this function
        will not record a parent child relationship between then.
        thread)keyc                 2    | j                   | j                  fS r/   )rF   node_idevents    r&   <lambda>z2EventList._populate_cpu_children.<locals>.<lambda>b   s    u||U]]&C r'   c                 \    | j                   j                  | j                   j                   gS r/   )
time_rangestartendrJ   s    r&   rL   z2EventList._populate_cpu_children.<locals>.<lambda>t   s&    5#3#3#9#9E<L<L<P<P;P"Q r'   r   Nz(There is already a CPU parent event for )is_asyncdevice_typer   CPUsortedr   	itertoolsgroupbyr6   rN   rO   rP   r   append_cpu_childr7   rG   set_cpu_parentappend)r"   evtsync_eventseventsthreads	thread_idthread_eventsthread_events_current_eventscur_endrK   parents               r&   r)   z EventList._populate_cpu_childrenE   sx   $ 
<<COOz~~$E 
 

 8$
 ##C
  )0 	-$I}#QN 35NG' -.)A-+B/F((..&2C2C2G2GG ++//&2C2C2G2GG '**,//6!,,4REeii[QR4,,V4 .)A-  %%e,#-	-9
s   2E.c                 B   fdi }| D ]D  } |      |j                   |j                  |j                  f}||vs6|j                   ||<   F | D ]I  } |      }||j                  J |j                  |j                  f}||v r||   |_         Cg |_         K y )Nc                 P    | y | j                   dk(  r| S  | j                        S Nr3   )scoper7   )r[   	bw_parents    r&   ri   z6EventList._set_backward_stacktraces.<locals>.bw_parent   s*    {a
 00r'   )stacksequence_nrrF   
fwd_thread)r"   
fwd_stacksr[   tpri   s        @r&   r+   z#EventList._set_backward_stacktraces   s    	1 
 	.C~%#))*?__cjj1J&$'IIJqM		.  	#C#A}||///]]ALL1
? *1CI "CI	#r'   c                 R    t        | D cg c]  }|j                   c}      S c c}w r/   )sumself_cpu_time_total)r"   rK   s     r&   rr   zEventList.self_cpu_time_total   s!    4@%E--@AA@s   $c                 T    t        | ||||||| j                  | j                  |
      S )a  Print an EventList as a nicely formatted table.

        Args:
            sort_by (str, optional): Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``cpu_time_total``,
                ``cuda_time_total``, ``cpu_memory_usage``, ``cuda_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``, ``count``.
            top_level_events_only(bool, optional): Boolean flag to determine the
                selection of events to display. If true, the profiler will only
                display events at top level like top-level invocation of python
                `lstm`, python `add` or other functions, nested events like low-level
                cpu/cuda ops events are omitted for profiler result readability.

        Returns:
            A string containing the table.
        )	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r!   )r"   rt   ru   rv   rw   rx   ry   rz   s           r&   r0   zEventList.table   s;    6 !5"7$;//''"7
 	
r'   c                 b   ddl }| j                  sdn| j                  }t        |d      5 }g }d}|j                  d       | D ]  }|j                  |j                  dj                  |j                  |j                  j                  |j                  j                         |j                  s|j                  nd|j                   d|j                   d	             |j                  D ]P  }|j                  d
|j                   d|j                  j                   d|j                   d| d| d       |dz  }R  t        |       dkD  r=|j                  |j                         dz
  |j                          |j#                          |j                  d       ddd       y# 1 sw Y   yxY w)zExport an EventList as a Chrome tracing tools file.

        The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

        Args:
            path (str): Path where the trace will be written.
        r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r3      ])osr   openwrite
trace_nameformatrN   rO   
elapsed_us	is_remoterF   rI   r:   r6   seektellSEEK_SETtruncate)	r"   pathr   device_namefchrome_eventsnext_idr[   ks	            r&   export_chrome_tracezEventList.export_chrome_trace   s    	$($4$4f$:J:J$_ ,	MG GGCL !!>>)' (.v,,113"}} 

)#++l3::,bQ(   !A GG%cnn%5 6!!$!5!5 6 7""%** .!!(	 ***5 7((	 qLG!'!!D 4y1}qvvx!|R[[1

GGCLY,	 ,	 ,	s   E1F%%F.c                 
    g dS )N)rr   self_cuda_time_totalself_privateuse1_time_total r,   s    r&   supported_export_stacks_metricsz)EventList.supported_export_stacks_metrics
  s    
 	
r'   r   metricc                 >   || j                         vr%t        dt        | j                               z         t        j                  dd      }t	        |d      5 }| D ]  }|j
                  st        |j
                        dkD  s)t        ||      }t        |      dkD  sDd}t        |j
                        D ]  }||j                  |      z  }|dz  } |d d d	z   t        t        |            z   }|j                  |d
z           	 d d d        y # 1 sw Y   y xY w)Nzmetric should be one of: z ;	
____r~   r    ;rQ    
)r   
ValueErrorstr	maketransr   rj   r6   getattrintreversed	translater   )	r"   r   r   translate_tabler   r[   metric_value	stack_strentrys	            r&   export_stackszEventList.export_stacks  s   ==??+d::<=>  --&9$_ 
	2 	299SYY!!3#*3#7L<(1,$&	%-cii%8 -E%)III%,I- %.crNS$83s<?P;Q$Q		D 01	2
	2 
	2 
	2s   D-DD!A(DDc                    | j                   sJ t        t              }dt        t        df   fd}| D ]  }| ||||         j                  |         t        |j                         | j                  | j                  | j                  | j                        }|D ]   }|j                  d| |_        |rd|_        " |S )aH  Averages all function events over their keys.

        Args:
            group_by_input_shapes: group entries by
                (event name, input shapes) rather than just event name.
                This is useful to see which input shapes contribute to the runtime
                the most and may help with size-specific optimizations or
                choosing the best candidates for quantization (aka fitting a roof line)

            group_by_stack_n: group by top n stack trace entries

        Returns:
            An EventList containing FunctionEventAvg objects.
        return.c                 6   t        | j                        t        | j                        t        | j                        t        | j                        g}|r$|j                  t        | j                               |dkD  r|| j                  d | z  }t        |      S Nr   )	r   rG   rI   rS   	is_legacyrZ   input_shapesrj   tuple)rK   group_by_input_shapesgroup_by_stack_nrG   s       r&   get_keyz'EventList.key_averages.<locals>.get_key6  s    EIIEMM"E%%&EOO$	C %

3u1123!#u{{#4$455:r'   r   r   r   r   Nr   )r    r   r   r
   r   r;   r   valuesr   r   r   r!   rj   r   )r"   r   r   statsr   r[   avg_lists          r&   key_averageszEventList.key_averages$  s     9DEU9V	uSRUX 	  	RC'#46FGHLLSQ	R LLN^^''//''
  	&C		"3#34CI(#% 	& r'   c                 N    t               }| D ]  }||z  }d|_         d|_        |S )zVAverages all events.

        Returns:
            A FunctionEventAvg object.
        NTotal)r   rG   )r"   
total_statr[   s      r&   total_averagezEventList.total_averageS  s;     &'
 	"C#J!JN	" !
r'   )Nd   K   7   P   NF)Fr   )__name__
__module____qualname____doc__r   r-   r1   r*   r)   r+   propertyrr   r0   r   r   r   r   r   r   __classcell__)r%   s   @r&   r   r      s    1
& "(D-L#4 B B
   "#&
P7r
2# 2s 2&-^r'   r   c                 N    d}d}| |k\  r	| |z  ddS | |k\  r	| |z  ddS | ddS )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr   )time_usUS_IN_SECONDUS_IN_MSs      r&   _format_timer   a  sU    "LH,L(-Q//(H$S),,c]"r'   c                 D    |dk(  r| dk(  s
J d|         y| dz  |z  ddS )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r   )r   total_time_uss     r&   _format_time_sharer   l  s?    !|G=gYGG|o-c2!44r'   c                     d}d|z  }d|z  }t        |       |k\  r| dz  |z  ddS t        |       |k\  r| dz  |z  ddS t        |       |k\  r| dz  |z  ddS t        |       dz   S )z&Return a formatted memory size string.i         ?r   z Gbz Mbz Kbz b)absr   )nbytesKBMBGBs       r&   _format_memoryr   t  s    	B	B	B
6{b3,#C(,,	V	3,#C(,,	V	3,#C(,,6{T!!r'   c                       t         fd      S )Nc                 .    t        t        |             S r/   )r   r   )r"   r8   s    r&   rL   z!_attr_formatter.<locals>.<lambda>  s    gdD.A!B r'   )r   r8   s   `r&   _attr_formatterr     s    BCCr'   c                       e Zd ZdZ ed      Z ed      Z ed      Z ed      Z ed      Z	 ed      Z
 ed      Z ed	      Z ed
      Zed        Zed        Zed        Zy)r   z{Helpers for FunctionEvent and FunctionEventAvg.

    The subclass should define `*_time_total` and `count` attributes.
    cpu_time	cuda_timeprivateuse1_timecpu_time_totalcuda_time_totalprivateuse1_time_totalrr   r   r   c                 \    | j                   dk(  rdS d| j                  z  | j                   z  S Nr   g        r   )countr   r,   s    r&   r   zFormattedTimesMixin.cpu_time  s+    jjAosQ31D1D+Dtzz+QQr'   c                 \    | j                   dk(  rdS d| j                  z  | j                   z  S r   )r   r   r,   s    r&   r   zFormattedTimesMixin.cuda_time  s+    jjAosR31E1E+E

+RRr'   c                 \    | j                   dk(  rdS d| j                  z  | j                   z  S r   )r   r   r,   s    r&   r   z$FormattedTimesMixin.privateuse1_time  s+    jjAosY31L1L+Ltzz+YYr'   N)r   r   r   r   r   cpu_time_strcuda_time_strprivateuse1_time_strcpu_time_total_strcuda_time_total_strprivateuse1_time_total_strself_cpu_time_total_strself_cuda_time_total_strself_privateuse1_time_total_strr   r   r   r   r   r'   r&   r   r     s    
 #:.L#K0M*+=>()9:)*;<!01I!J-.CD./EF&56S&T#R R S S Z Zr'   r   c                       e Zd Zd Zd Zy)r   c                      || _         || _        y r/   )rO   rP   )r"   rO   rP   s      r&   r   zInterval.__init__  s    
r'   c                 4    | j                   | j                  z
  S )z4
        Returns the length of the interval
        )rP   rO   r,   s    r&   r   zInterval.elapsed_us  s     xx$**$$r'   N)r   r   r   r   r   r   r'   r&   r   r     s    %r'   r   r   )r8   devicedurationc                      e Zd ZdZddddddddddddej
                  dddddfdZd Zd Zd	 Z	e
d
        Ze
d        Ze
d        Ze
d        Ze
d        Ze
d        Ze
d        Ze
d        Ze
d        Ze
d        Zd Zy)r   z.Profiling information about a single function.Nr   FrQ   c                    || _         || _        || _        || _        t	        ||      | _        || _        || _        g | _        d| _	        g | _
        d | _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        y rg   )idrI   r8   r   r   rN   rF   rl   r:   r   r9   r7   r   concrete_inputsrj   rh   r   cpu_memory_usagecuda_memory_usageprivateuse1_memory_usagerR   r   rk   rS   device_indexr   flops)r"   r   r8   rF   start_usend_usrl   r   rj   rh   r   r  r  r  rR   r   rk   rI   rS   r  r   r  r   r  s                           r&   r   zFunctionEvent.__init__  s    4 #	)$,Xv$>!)3%'
1337-9*9 

)3%5&7-E%&( +'2!-($)
r'   c                     | j                   t        j                  k(  sJ | j                  j	                  t        |||             y r/   )rS   r   rT   r:   rZ   r   )r"   r8   r   r   s       r&   append_kernelzFunctionEvent.append_kernel  s5    :>>111F4:;r'   c                     | j                   t        j                  k(  sJ t        |t              sJ |j                   t        j                  k(  sJ | j
                  j                  |       y)zAppend a CPU child of type FunctionEvent.

        One is supposed to append only direct children to the event to have
        correct self cpu time being reported.
        N)rS   r   rT   
isinstancer   r9   rZ   r"   childs     r&   rX   zFunctionEvent.append_cpu_child  sX     :>>111%///  JNN222  'r'   c                     | j                   t        j                  k(  sJ t        |t              sJ |j                   t        j                  k(  sJ || _        y)a$  Set the immediate CPU parent of type FunctionEvent.

        One profiling FunctionEvent should have only one CPU parent such that
        the child's range interval is completely inside the parent's. We use
        this connection to determine the event is from top-level op or not.
        N)rS   r   rT   r  r   r7   )r"   rd   s     r&   rY   zFunctionEvent.set_cpu_parent  sK     :>>111&-000!!Z^^333 r'   c                     | j                   s| j                  t        j                  k7  ry| j                  t        | j                  D cg c]  }|j                   c}      z
  S c c}w r   )rR   rS   r   rT   r  rq   r9   r  s     r&   self_cpu_memory_usagez#FunctionEvent.self_cpu_memory_usage  sT    ==D,,
>$$s151B1BCU##C(
 
 	
C   	A%
c                     | j                   s| j                  t        j                  k7  ry| j                  t        | j                  D cg c]  }|j                   c}      z
  S c c}w r   )rR   rS   r   rT   r  rq   r9   r  s     r&   self_cuda_memory_usagez$FunctionEvent.self_cuda_memory_usage  sT    ==D,,
>%%262C2CDU$$D)
 
 	
Dr  c                     | j                   s| j                  t        j                  k7  ry| j                  t        | j                  D cg c]  }|j                   c}      z
  S c c}w r   )rR   rS   r   rT   r  rq   r9   r  s     r&   self_privateuse1_memory_usagez+FunctionEvent.self_privateuse1_memory_usage  sT    ==D,,
>,,s9=9J9JKU++K0
 
 	
Kr  c                     | j                   s| j                  t        j                  k7  ry| j                  t        | j                  D cg c]  }|j                   c}      z
  S c c}w r   )rR   rS   r   rT   r   rq   r9   r  s     r&   rr   z!FunctionEvent.self_cpu_time_total   sT    ==D,,
>""S/3/@/@AeU!!A&
 
 	
Ar  c                    | j                   s| j                  ry| j                  t        j                  k(  ra| j
                  s9t        d | j                  D              t        d | j                  D              z   S t        d | j                  D              S | j                  t        j                  k(  sJ | j                  j                         S )Nr   c              3   4   K   | ]  }|j                     y wr/   r   .0kinfos     r&   	<genexpr>z0FunctionEvent.cuda_time_total.<locals>.<genexpr>/       De5>>D   c              3   4   K   | ]  }|j                     y wr/   )r   r  rA   s     r&   r  z0FunctionEvent.cuda_time_total.<locals>.<genexpr>/  s      K+-B&&Kr   c              3   4   K   | ]  }|j                     y wr/   r  r  s     r&   r  z0FunctionEvent.cuda_time_total.<locals>.<genexpr>4  r  r   )rR   r   rS   r   rT   r   rq   r:   r9   CUDArN   r   r,   s    r&   r   zFunctionEvent.cuda_time_total(  s    ==DOOz~~->>Dt||DDs K151B1BK H  
 Dt||DDD##z666??--//r'   c                 B   | j                   s| j                  ry| j                  t        j                  k(  r:| j
                  t        | j                  D cg c]  }|j
                   c}      z
  S | j                  t        j                  k(  sJ | j
                  S c c}w r   )	rR   r   rS   r   rT   r   rq   r9   r$  r  s     r&   r   z"FunctionEvent.self_cuda_time_total9  s    ==DOOz~~-''#484E4EF5&&F+   ##z666'''	 G   B
c                 r    | j                   t        j                  k(  r| j                  j	                         S yr   )rS   r   rT   rN   r   r,   s    r&   r   zFunctionEvent.cpu_time_totalE  s*    z~~-??--//r'   c                 B   | j                   s| j                  sy| j                  t        j                  k(  r:| j
                  t        | j                  D cg c]  }|j
                   c}      z
  S | j                  t        j                  k(  sJ | j
                  S c c}w r   )	rR   r   rS   r   rT   r   rq   r9   r$  r  s     r&   r   z)FunctionEvent.self_privateuse1_time_totalL  s    ==z~~-..;?;L;LM%--M2   ##z666...	 Nr&  c                    | j                   s| j                  sy| j                  t        j                  k(  ra| j
                  s9t        d | j                  D              t        d | j                  D              z   S t        d | j                  D              S | j                  t        j                  k(  sJ | j                  j                         S )Nr   c              3   4   K   | ]  }|j                     y wr/   r  r  s     r&   r  z7FunctionEvent.privateuse1_time_total.<locals>.<genexpr>_  r  r   c              3   4   K   | ]  }|j                     y wr/   )r   r"  s     r&   r  z7FunctionEvent.privateuse1_time_total.<locals>.<genexpr>_  s      K24B--Kr   c              3   4   K   | ]  }|j                     y wr/   r  r  s     r&   r  z7FunctionEvent.privateuse1_time_total.<locals>.<genexpr>d  r  r   )rR   r   rS   r   rT   r   rq   r:   r9   PrivateUse1rN   r   r,   s    r&   r   z$FunctionEvent.privateuse1_time_totalX  s    ==z~~->>Dt||DDs K8<8I8IK H  
 Dt||DDD##z'='====??--//r'   c                     | j                   S r/   r   r,   s    r&   rG   zFunctionEvent.keyi  s    yyr'   c                    | j                   sdn| j                   }| j                   s| j                  n| j                  }| j                   s| j                  n| j                  }dj                  | j                  | j                  | j                  | j                  | j                  | j                  j                  | j                  j                  t        | j                  D cg c]  }|j                   c}      ||| j                  | j                   t        | j"                        | j$                  ||| j&                  | j(                  | j*                  | j,                        S c c}w )Nr}   z<FunctionEvent id={} name={} device_type={} node_id={} cpu_time={} start_us={} end_us={} cpu_children={} {}_time={} name={} thread={} input_shapes={} cpu_memory_usage={} {}_memory_usage={} is_async={} is_remote={} seq_nr={} is_legacy={}>)r   r   r   r  r  r   r   r8   rS   rI   r   rN   rO   rP   r   r9   rF   r   r  rR   r   rk   r   )r"   r   device_timedevice_memory_usager  s        r&   __repr__zFunctionEvent.__repr__m  s   $(OOf&*ooD4;T;T 	
 ?? "".. 	fflfl		  !!%%##4+<+<=%UXX=>		D%%&%%#  )g	
 >s   "E%)r   r   r   r   r   rT   r   r
  rX   rY   r   r  r  r  rr   r   r   r   r   r   rG   r2  r   r'   r&   r   r     s#   8 !"NN13*j<	(
! 
 
 
 
 
 
 
 
 0 0  	( 	(   	/ 	/ 0 0   #
r'   r   c                   (    e Zd ZdZd Zd Zd Zd Zy)r   z:Used to average stats over multiple FunctionEvent objects.c                    d | _         d| _        d| _        d| _        d| _        d | _        d| _        d| _        d| _        d| _	        d| _
        d| _        d | _        d | _        d | _        d| _        d| _        d| _        d| _        d| _        d| _        d | _        d | _        t.        j0                  | _        d| _        d| _        y )Nr   F)rG   r   rI   rR   r   r   r   r   r   rr   r   r   r   rj   rh   r  r  r  r  r  r  r9   r7   r   rT   rS   r   r  r,   s    r&   r   zFunctionEventAvg.__init__  s    "&
#$)-#$$%+,#() )*!01(7;%)
$(
%&&'-.%*+"+,#23*;?37'1~~$
r'   c                    | j                   |j                   | _         |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        |j                  | _        |j                  | _	        |j                  | _
        |j                  | _        t        |t        t        f      sJ |j                   | j                   k(  sJ | xj                  |j                  z  c_        | xj                   |j                   z  c_        | xj"                  |j"                  z  c_        | xj$                  |j$                  z  c_        | xj&                  |j&                  z  c_        | xj(                  |j(                  z  c_        | xj*                  |j*                  z  c_        | xj,                  |j,                  z  c_        | xj.                  |j.                  z  c_        | xj0                  |j0                  z  c_        | xj2                  |j2                  z  c_        | xj4                  |j4                  z  c_        | xj6                  |j6                  z  c_        | j8                  |j8                  | _        | S |j8                  | xj8                  |j8                  z  c_        | S r/   )rG   rI   rR   r   r7   r9   r   rj   rh   rS   r   r   r  r   r   r   r   r   rr   r   r   r  r  r  r  r  r  r   r  r"   others     r&   r;   zFunctionEventAvg.add  s0   88 yyDH ==DL!NNDM"__DN#..DO % 2 2D % 2 2DDJDJ$00D"__DN#..DO%-1A!BCCCyyDHH$$$u333 5 55##u'C'CC#  E$=$== !!U%?%??!((E,M,MM(!7!77%"9"99%%)G)GG%""e&A&AA"##u'C'CC#**e.Q.QQ*

ekk!
::DJ  [[$JJ%++%Jr'   c                 $    | j                  |      S r/   )r;   r6  s     r&   __iadd__zFunctionEventAvg.__iadd__  s    xxr'   c                    | j                   sdn| j                   }| j                   s| j                  n| j                  }| j                   s| j                  n| j                  }| j                   s| j
                  n| j                  }dj                  | j                  | j                  | j                  ||||t        | j                        | j                  ||      S )Nr}   z<FunctionEventAvg key={} self_cpu_time={} cpu_time={}  self_{}_time={} {}_time={} input_shapes={} cpu_memory_usage={} {}_memory_usage={}>)r   r   r   r   r   r  r  r   rG   r   r   r   r   r  )r"   r   self_device_timer0  device_memorys        r&   r2  zFunctionEventAvg.__repr__  s    $(OOf ?? ))55 	 '+ooD4;T;T 	
 ?? "".. 	66<f,,!! D%%&%%7	
r'   N)r   r   r   r   r   r;   r9  r2  r   r'   r&   r   r     s    D8%N
r'   r   c                       e Zd Zd Zy)r   c                 p    t        |      dkD  rt        j                  j                  |      n|| |<   | |   S rg   )r6   torch_C	_demangle)r"   rG   s     r&   __missing__zStringTable.__missing__  s2     033x!|EHH&&s+S	Cyr'   N)r   r   r   rB  r   r'   r&   r   r     s    r'   r   c                       e Zd ZdZd Zd Zy)r   z=Acceleration structure for accessing mem_records in interval.c                     || _         g | _        g | _        t        |      dkD  rPt	        t        |      D cg c]  \  }}|d   j                         |f c}}      }t        | \  | _        | _        y y c c}}w r   )_mem_records_start_uses_indicesr6   rU   r<   r  zip)r"   mem_recordsirtmps        r&   r   zMemRecordsAcc.__init__
  sn    '&(#%{a9[;QR41a1Q4==?A.RSC.13i+Ddm  Rs   A5
c              #      K   t        j                  | j                  |      }t        j                  | j                  |      }t	        ||      D ]   }| j
                  | j                  |       " yw)z:
        Return all records in the given interval
        N)bisectbisect_leftrF  bisect_rightr5   rE  rG  )r"   r  r  	start_idxend_idxrJ  s         r&   in_intervalzMemRecordsAcc.in_interval  sh      &&t'7'7B	%%d&6&6?y'* 	6A##DMM!$455	6s   A1A3N)r   r   r   r   r   rS  r   r'   r&   r   r     s    G86r'   r   c                 4     g d}t         fd|D              S )N))autograd/__init___make_grads)rU  backward)ztorch/tensorrW  )_internal/common_utilsprof_callable)rX  prof_func_call)rX  prof_meth_callc              3   @   K   | ]  }|d    v xr |d   v    yw)r   r3   Nr   )r  r   r   s     r&   r  z&_filter_stack_entry.<locals>.<genexpr>%  s)     OAaDEM3adem4Os   )all)r   filtered_entriess   ` r&   _filter_stack_entryr_    s     O>NOOOr'   z[memory]z[OutOfMemory]c                 .    t         t        ddddddg}| |v S )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r8   filtered_out_namess     r&   _filter_namerd  ,  s2     	 *.)	 %%%r'   c                 N    t               }||    } |r| j                  d      rd} | S )NzProfilerStep#zProfilerStep*)r   
startswith)r8   with_wildcardstring_tables      r&   _rewrite_nameri  ?  s,    =LD???+"DKr'   c
                 t  23456 t        |       dk(  ryt        d | D              }
t        d | D              }t        d | D              }t        d | D              }| d   j                  }|s|s|rt        d      t        d | D              }t	        t        | fd
d      |
|||      } t        | D cg c]  }t        |j                         c}      dz   }|t        ||      }t        | D cg c]   }t        t        |j                              " c}      dz   }|t        ||      }d}|}d	}g }| D ]C  }|j                  t        |j                        dkD  s)|j                  |j                         E t        |      dkD  }|rLt        |D cg c]$  }t        |D cg c]  }t        |       c}      & c}}      dz   }|t        ||      }g d}|
r|j                  g d       |r2|j                         }|j                  d| d| d| d| dg       |rT|j                  ddg       |r|j                  ddg       |r*|j                         }|j                  | dd| dg       |j                  d       t        d | D              }|r|j                  d       d2dg6dg32 g4d}d42346fd 	}d! } ||       |d"d	 D ]
  }  ||        |r|j                  d#        ||       |r|j                  d$        ||d%&       |rug }!| D ]-  }|j                  dkD  s|!j                  |j                         / t        |!      dk7  r1 |t        |!            \  }"}#|j                  d'|#         ||       nd(}6d   }$3d   }%4d   }&d	}g 55fd)}'t!        | D (cg c]  }(|(j"                   c}(      })d}*d}+| D ]  }|j$                  t&        j(                  k(  r/|j*                  s-|s|*|j,                  z  }*?|+|j.                  z  }+O|j$                  t&        j0                  k(  r|*|j,                  z  }*||j$                  t&        j2                  k(  s|+|j.                  z  }+ | |'d*|&z          |'|       |	r |'d*|&z          |'d+        |'|%        |' |$j4                  |         |'|%       d, },d}-| D ]T  }|-|k(  r nL|	r|j6                  |-d"z  }-|j                  }.|t        |.      |d-z
  k\  r|.d	|d-z
   d.z   }.|.t9        |j"                  |)      |j:                  |j<                  st9        |j>                  |)      nd|j@                  |jB                  g}/|
rG|/j                  |jD                  t9        |j,                  |*      |jF                  |jH                  g       |rG|/j                  |jJ                  t9        |j.                  |+      |jL                  |jN                  g       |r|/j                  tQ        |jR                        tQ        |jT                        g       |r9|/j                  tQ        |jV                        tQ        |jX                        g       |r9|/j                  tQ        |jZ                        tQ        |j\                        g       |/j                  |j^                         |r|/j                  |j`                         |r'|/j                  t        |j                        d	|        |rA|j                  dk  r|/j                  d/       n |/j                  |j                  "z  d0       |rAd}0t        |j                        dkD  r |,|j                  d   |      }0|/j                  |0        |' |$j4                  |/        |sdgt        |      d"z
  z  }1|j                  d"| D ]"  } |' |$j4                  |1 |,||      gz           $ |1j                  d        |' |$j4                  |1        W  |'|%        |'d1tc        |)              |
r |'d2tc        |*              |r% |'d|j                          d3tc        |+              dje                  5      S c c}w c c}w c c}w c c}}w c c}(w )5zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c              3   :   K   | ]  }|j                   d kD    ywr   N)r   r  rK   s     r&   r  z_build_table.<locals>.<genexpr>X  s     K522Q6K   c              3   :   K   | ]  }|j                   d kD    ywrl  )r  rm  s     r&   r  z_build_table.<locals>.<genexpr>Y  s     LEu33a7Lrn  c              3   :   K   | ]  }|j                   d kD    ywrl  )r   rm  s     r&   r  z_build_table.<locals>.<genexpr>Z  s       27))A-rn  c              3   :   K   | ]  }|j                   d kD    ywrl  )r  rm  s     r&   r  z_build_table.<locals>.<genexpr>]  s       49++a/rn  zAuse_device is None, but there is private device performance data.c              3   l   K   | ],  }|j                   d uxr t        |j                         dkD   . y wr   )r   r6   rm  s     r&   r  z_build_table.<locals>.<genexpr>f  s;       
		4	'	GC0B0B,Ca,G	Gs   24Nc                     t        |       S r/   )r   )r[   rt   s    r&   rL   z_build_table.<locals>.<lambda>m  s    73+@ r'   T)rG   reverser         )Namez
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avg)z	Self CUDAzSelf CUDA %z
CUDA totalzCUDA time avgzSelf z %z totalz	 time avgzCPU MemzSelf CPU MemzCUDA MemzSelf CUDA Memz Memz
# of Callsc              3   :   K   | ]  }|j                   d k7    yw)rQ   N)rI   )r  r[   s     r&   r  z_build_table.<locals>.<genexpr>  s     =s*=rn  zNode IDr      c                     dxx   d|z   t        |       z   dz   dz  z   z  cc<   dxx   d| z  dz  z   z  cc<   dxx   | z   z  cc<   y )Nr   z{: }r   -)r   )paddingtext_dirSPACING_SIZEheader_sep_lstline_length_lstrow_format_lsts     r&   
add_columnz _build_table.<locals>.add_column  sh    qHs7|+c1S<5GH	
 	qS7]cL.@AAg44r'   c                 $   g d}| dkD  sJ t        dt        t        j                  |       dz  t	        t        |      dz
                    }|dk\  r|t        |      k  sJ t        dt        j                  |      dz        |t        |         fS )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r3   
   g      )	maxminmathlog10floatr6   powfloorr   )r  flop_headers	log_flopss      r&   auto_scale_flopsz&_build_table.<locals>.auto_scale_flops  s    
 qyy3tzz%014eC<MPQ<Q6RST	A~)c,.?"???BI.57c)n9UVVr'   r3   zInput ShapeszSource Location<)r~  zTotal Fc                 J    j                  |        j                  d       y )Nr   )rZ   )r   results    r&   rZ   z_build_table.<locals>.append   s    adr'   =z1This report only display top-level ops statisticsc                 t    t        |       |kD  r)t        |       |z
  }| |d  } t        |       dkD  rd| dd  z   } | S )Nr  ...)r6   )r   src_column_widthoffsets      r&   	trim_pathz_build_table.<locals>.trim_path!  sI    t9''Y!11F=D4y1}tABx'r'   r  r  z--z8.3fzSelf CPU time total: zSelf CUDA time total: z time total: )>)3r6   anyr   RuntimeErrorr   rU   r  rG   r  r   r   rj   rZ   r>   upperr  rq   rr   rS   r   rT   r   r   r   r$  r-  r   r7   r   r   rR   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r   rI   r   join)7r]   rt   ry   ru   rv   rw   rx   r   r   rz   has_cuda_timehas_cuda_memhas_privateuse1_timehas_privateuse1_memr   has_input_shapesr[   name_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthr  stacks	has_stackrj   r   headersprivateuse1append_node_idMAX_STACK_ENTRYr  r  _	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthrZ   rK   sum_self_cpu_time_totalsum_self_cuda_time_totalsum_self_privateuse1_time_totalr  event_limitr8   
row_values	src_fieldempty_headersr  r  r  r  r  s7    `                                                @@@@@r&   r{   r{   H  sE	    6{aKFKKMLVLLL ;A   =C  %%J.2FO
 	
   
 6@$O"!)!
 V<cS\<=A( 13HIVLcs3s'7'7#89LMPQQ*!"57NO-F %99 S^a%7MM#))$% FaI6J%e4Uc%j45JKaO 	  +"#35IJG 	
  &&(}%}B'-v&-y)		
 	
 NN# $**,KNN"m4(K=- NN< =f==Ny! LTNTN$}oOO5 5W  !QR[ )'() ~&&'()#c2	 	,Cyy1}  +	, y>Q*:3y>*J'[,NNVL>23)*J"J"J!!$KJ F "&"Q5#<#<"QR &'# O??jnn,}}!,0H0HH,3s7V7VV3__
/$(@(@@$__
 6 66+s/N/NN+O s[ !vs[ !BC
:
:g&'
: K f6)# S^^%?1Kww ,T>SVW>W1W50146>Ds668OP'' << s113JK""

 00&002J ++%%
  77&779X 22,,
  #3#7#78"3#<#<=	 !! 's'<'<=&s'A'AB	 #!! 's'C'CD&s'H'HI	 	II	
 ckk*c#"2"234H5HIJyyA~!!$'!!SYY%<T$BDI399~!%ciil4DE	i( z  *-.DCL1$45M1_5 %J%%'9U<L+M*NN   $$:$$m45Mf6P :
"<0G#H"IJK'5M(N'OPQJ$$&'}\Ba5b4cd	
 776?M	 = M 5Jz #Rs*   :b 7%b%b/
,b*>	b/
=b5*b/
)F)	NNr   r   r   r   FFF)%rN  rV   r  collectionsr   r   operatorr   typingr   r   r   r	   r
   r?  torch.autogradr   __all__listr   r   r   r   r   r   r   r   r   r   r   r   r_  ra  rb  rd  ri  r{   r   r'   r&   <module>r     s       /  3 3  %	E EP
5"DZ Z:	% 	% 
H<	=]
' ]
@h
* h
V+ 6 6*	P  * && Rr'   