
    '}h?4                     T   d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZ d d	fd
efdZ e j"                  ed d      Z e j"                  ed d	      Ze G d d             Ze G d d             Z G d d      Z G d d      ZddZd d dfdZd Zd Zy)    N)deque)	dataclass)DictList)_KinetoEventprofile)
DeviceTypec                     | j                   S N)childrenxs    T/var/www/html/test/engine/venv/lib/python3.12/site-packages/torch/profiler/_utils.py<lambda>r      s
    1::     Freversec              #      K   |rt         nd }t         ||             }|r4 ||      }|  | ||            D ]  }|j                  |        |r3y y w)Nc                     | S r    r   s    r   r   z_traverse.<locals>.<lambda>   s    q r   )reversedr   append)treenext_fnchildren_fnr   order	remaining
curr_eventchild_events           r   	_traverser       s\     H[EeDk"I
Y'
 Z!89 	*K[)	* s   AAAc                 "    | j                         S r   )popr   s    r   r   r      s    aeeg r   T)r   r   c                 "    | j                         S r   )popleftr   s    r   r   r      s     r   c                   V    e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   e	d        Z
y)EventMetricsr   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 T    | j                   dk(  ry| j                  | j                   z  S )Nr   g        )r'   r)   selfs    r   fraction_idle_timezEventMetrics.fraction_idle_time$   s*      A%  4#8#888r   N)__name__
__module____qualname__r'   int__annotations__r(   r)   r*   propertyr.   r   r   r   r&   r&      s=    cL#L#K9 9r   r&   c                   0    e Zd ZU eed<   eed<   dZeed<   y)Intervalstartendr   r*   N)r/   r0   r1   r2   r3   r*   r   r   r   r6   r6   +   s    J	HKr   r6   c                   6    e Zd Zd Zd Zd Zd Zdee   fdZ	y)EventKeyc                     || _         y r   event)r-   r=   s     r   __init__zEventKey.__init__3   s	    
r   c                 @    t        | j                  j                        S r   )hashr=   idr,   s    r   __hash__zEventKey.__hash__6   s    DJJMM""r   c                 \    | j                   j                  |j                   j                  k(  S r   )r=   rA   )r-   others     r   __eq__zEventKey.__eq__9   s    zz}}..r   c                 0    | j                   j                   S r   )r=   namer,   s    r   __repr__zEventKey.__repr__<   s    **//"#r   	intervalsc                    d}t        |d       }|rgt        | j                  j                  |d   j                        }t        | j                  j                  |d   j                        }||k  r|||z
  z  }d\  }}|t        |      k  r||   }||   }|dz  }|j                  |j                  kD  r2|j                  |j                  kD  r|dz  }U|j                  |_        |}t        | j                  j                  |j                        }t        | j                  j                  |j                        }||k  r|||z
  z  }|t        |      k  r|S )Nr   c                     | j                   S r   r7   r   s    r   r   z,EventKey.intervals_overlap.<locals>.<lambda>A   s
    AGG r   key)r      rO   )	sortedmaxr=   start_time_nsr7   minend_time_nsr8   len)	r-   rI   overlap_timeoverlap_startoverlap_endijprev_intervalcurr_intervals	            r   intervals_overlapzEventKey.intervals_overlap?   sJ   9*;<	

 8 8)A,:L:LMMdjj44il6F6FGK{*m ;;1#i. %aLM%aLMFA  =#6#66 $$}'8'88FA*7*;*;M'A

 8 8-:M:MNMdjj44m6G6GHK{*m ;;! #i. $ r   N)
r/   r0   r1   r>   rB   rE   rH   r   r6   r]   r   r   r   r:   r:   2   s&    #/$4> r   r:   c                   B    e Zd ZdefdZd Zd Zd Zd Zdde	de
fd	Zy
)BasicEvaluationprofc                 X   || _         i | _        | j                          t        d | j                  j	                         D        d       | _        | j
                  D cg c]  }|j                   c}| _        g | _        | j                         | _
        | j                          y c c}w )Nc              3       K   | ]  }|  y wr   r   ).0es     r   	<genexpr>z+BasicEvaluation.__init__.<locals>.<genexpr>f   s     ,1Q,s   c                 .    | j                   j                  S r   )r=   rR   r   s    r   r   z*BasicEvaluation.__init__.<locals>.<lambda>f   s    AGG<Q<Q r   rM   )r	   metricscompute_self_timerP   keys
event_keysr=   eventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r-   r`   rd   s      r   r>   zBasicEvaluation.__init__a   s    57  ,))+,2Q
 )-81qww8/1 $ 8 8 :  9s   B'c                 6   | j                   j                  J t        | j                   j                  j                               }|r|j	                         }|j
                  }|j                  D ]"  }||j
                  z  }|j                  |       $ t        |      | j                  vs!J d|j                   d|j                          t        |      | j                  t        |      <   |j
                  | j                  t        |         _        |ryy)zM
        Computes event's self time(total time - time in child ops).
        NzDuplicate id: z, )r(   )r	   kineto_resultsr   experimental_event_treer"   r'   r   r   r:   rg   rA   rG   r&   )r-   stackr   	self_timer   s        r   rh   z!BasicEvaluation.compute_self_timem   s     ||**666dll11IIKL J"33I)22 *[999	[)* $DLL8C
b0ABC81=91UDLL*-. ",!<!< LL$ r   c                 8   | j                   j                  J | j                   j                  j                         }d d t        fd|D        d       }t        fd|D        d       }t        ||z   d       | _        i }d	}|D ]  t        |fd
|      }||<   ||n|}  d	}d}||z   | j                  z   }	d }
g }|	j                  |
       |	D ]6  }t        |d      rF|j                         dz  }|j                         |j                         z   dz  }||v r/||   *||   }n$t        |d      r|j                  }|j                  }|t        |      k  rF||   j                         dz  k  r-|dz  }|t        |      k  r||   j                         dz  |k  r-||z
  dz   }t        |d	      }t        |d      r|j                  t        |             t        |d      s|| j                   t#        |         _        9 |S )z
        Computes queue_depth at each event. This will calculate the queue depth data for
        All the events in the tree.
        This will return a list of Interval of queue depth data of cuda launch and kernels.
        c                      | j                   dk(  S )NcudaLaunchKernel)rG   rd   s    r   is_cuda_launch_kernelzBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel   s    66///r   c                     | j                         t        j                  k(  xr d| j                  j	                         vS )Nmem)device_typer
   CUDArG   lowerrx   s    r   is_cuda_kernelz;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel   s+    ==?joo5U%qvv||~:UUr   c              3   4   K   | ]  } |      s|  y wr   r   )rc   rd   ry   s     r   re   z6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s     D1+@+CQD   c                 "    | j                         S r   start_usr   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>       !**, r   rM   c              3   4   K   | ]  } |      s|  y wr   r   )rc   rd   r   s     r   re   z6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s     =1>!+<Q=r   c                 "    | j                         S r   r   r   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   r   r   c                 "    | j                         S r   r   r   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   s    1::< r   r   c                 F    | j                         j                         k(  S r   )linked_correlation_id)r   cuda_launch_events    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   s!    !113$::<= r   rL   c                     t        | d      r| j                         dz  S t        | d      r| j                  S t        d      )Nr     rR   zUnknown Event Type)hasattrr   rR   	Exceptionr<   s    r   new_old_event_comparatorzEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparator   s@    uj)~~'$..uo.***011r   r   r   rR   rO   )r	   rq   rk   rP   rl   index_of_first_matchsortr   r   duration_usrR   rT   rU   rQ   r   r6   rg   r:   r*   )r-   cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index
all_eventsr   rn   r=   
start_timeend_timecurrent_queue_depthr   r   ry   s                   @@@r   rm   z#BasicEvaluation.compute_queue_depth   sy    ||**666,,55<<>	0	V $DD&
 $==&

 "!339O
 35!3 	T("=(	E 16N,-*/*;AS	T  !!'*<<t{{J
	2 ,.45 	PEuj)"^^-4
!NN,u/@/@/BBdJN*~e/D/P+9%+@(0"00
 ,, %s+='>>'(<=FFHDP %)$	 %s+='>>'(<=FFHDP #79M"MPQ"Q"%&91"=uj) ''Z3FG 0<OXe_-95	P8  r   c                    d}d}g }| j                   r| j                  rw|t        | j                  d   j                  | j                   d   j                        t        | j                   d   j
                  | j                  d   j                        gz  }| j                   D ][  }|j                  dk(  r|s|j
                  }d}|j                  dkD  s2|s5|j                  t        ||j                               d}] | j                  j                         D cg c]  }|j                   }}|D ]7  }t        |      j                  |      | j                  t        |         _        9 yc c}w )z4
        Computes idle time of the profile.
        Fr   r   TN)rn   rk   r6   rR   r7   r8   rT   r*   r   rg   ri   r=   r:   r]   r)   )r-   idle
idle_startidle_intervals
data_pointrd   
event_listr=   s           r   ro   z!BasicEvaluation.compute_idle_time   sM   
 
)+  T[[Q55t7L7LQ7O7U7UV..r266B8S8ST N
 // 	J%%*4'^^
%%)d%%hz:;K;K&LM	 (,||'8'8':;!agg;
; 	0E9A:/ LL%)6	0 <s   E.c                    ddl }t        t        | j                              }|D cg c]  }|j                   }}dd}g }d}|t        |      k  r||   kD  r|dz  }t        |dz   t        |            D ]i  }	t        |fd|	      }
t        ||	|
      }|%||   |k\  s.|j                  t        ||   j                  ||   j                               |
|
n|} n |dz  }|t        |      k  r| j                  j                         D cg c]  }|j                  |      r| }}|r|j                  |D cg c]  }| j                  |   j                    c}|j"                        }|j                  |D cg c]  }| j                  |   j$                   c}|j"                        }||j'                  |      z
  |j)                  |      z  }||j'                  |      z
  |j)                  |      z  }|d	|z  z   }t+        t-        ||      d
 d      D cg c]  \  }}|	 }}}|d| }|S c c}w c c}w c c}w c c}w c c}}w )a  
        Filter and Rank the events based on some heuristics:
        1) Events that are in the falling phase of the queue depth.
        2) Events that have a high idle_time, self_time difference.

        Parameters:
            length: The number of events to return.
        r   N   rO   c                     | k  S r   r   )r   bottom_threasholds    r   r   z-BasicEvaluation.rank_events.<locals>.<lambda>  s    .?)? r   rL   )r7   r8   )dtypeg333333?c                     | d   S )Nr   r   r   s    r   r   z-BasicEvaluation.rank_events.<locals>.<lambda>7  s
    !A$ r   T)rN   r   )torchlistr   rn   r*   rU   ranger   argmaxr   r6   r7   rg   ri   r]   tensorr(   float32r.   meanstdrP   zip)r-   lengthr   rn   rd   	qd_valuestop_threasholddecrease_intervalrY   rZ   next_minimum_idxpeak_idxr=   r   rt   	idle_timenormalized_gainnormalized_selfheuristic_score_list_r   s                       @r   rank_eventszBasicEvaluation.rank_events   s    	)>)> ?@,<=qQ]]=	=#i. |//Q1q5#i.1  $8?q$  ")1:JK 'Ih,?>,Q%,, ,X6<<>Nq>Q>W>W
 -=,H(aA!" FA+ #i. 2 **,
&&'89 

 

 ?IJee$11Jmm % I EOPEe$77Pmm % I  )5::i+@@EIIiDXXO(5::i+@@EIIiDXXO#2S?5J#J 
 !',j9& !Au J  $GV,Js >:
 K Qs   H>I I I(Ir   print_enablec                    | j                  |      }|s|S |rdnd}|dj                  |D cg c]@  }d d| dt        |j                         d| j                  |   j
                  dz  d	d
d 	B c}      z  }|rt        |       |S c c}w )NzOptimizable events:
zNo events to optimize

zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: d   z.2fz%
)r   joinsource_code_locationr=   rg   r.   print)r-   r   r   r   outputr=   s         r   get_optimizable_eventsz&BasicEvaluation.get_optimizable_events>  s    %%f-
,6(<U$)) (  H g +EKK89 :||E*==CCH I		
 		
 &Ms   AB	
N)rO   T)r/   r0   r1   r	   r>   rh   rm   ro   r   r2   boolr   r   r   r   r_   r_   `   s9    
!W 
!=,T l08GRS D r   r_   c                 z    ||t        |       k\  rt        |       }t        ||      D ]  } || |         s|c S  y r   )rU   r   )seq	predicater7   r8   rY   s        r   r   r   S  sF    
{cSXo#h5# SVH r   c                     | S r   r   r   s    r   r   r   \  s    a r   c                 h    | || } t        |       dk(  ry | j                  t        | |            |z   S )Nr   rM   )rU   r   rQ   )r   rN   r7   r8   s       r   r   r   \  s6    
eC.C
3x1}99S#&'%//r   c                 ~    | ;t        j                  d| j                        }|| j                  } 1| j                  S y)Nz
\.py\(.*\)zNo source code location found)researchrG   parent)r=   matchs     r   r   r   c  s:    

		-4=LLEzz*r   c                  J    ddl m}   |        5  	 d d d        y # 1 sw Y   y xY w)Nr   r   )torch.autograd.profilerr	   r   s    r   _init_for_cuda_graphsr   q  s"    /	   s   ")r   N)	functoolsr   collectionsr   dataclassesr   typingr   r   torch.autogradr   r   r	   torch.profilerr
   r   r    partialtraverse_dfstraverse_bfsr&   r6   r:   r_   r   r   r   r   r   r   r   <module>r      s     	  !  ' + % *>u * * !y  4EtT y  ,e
 
9 
9 
9   + +\p pf  qd 0+r   