
    '}h<o                        d dl Z d dlZd dlZd dlZd dlmZ d dlmc mc mZ	 d dl
mZ d dlmZmZmZmZmZmZmZmZ ddlmZmZ d dlmZmZ d dlmZmZmZmZm Z m!Z! d dl"m#Z# d d	l$m%Z% d d	l$m%Z& g d
Z'ejP                  ejR                  jP                  ejT                  ejR                  jT                  iejR                  jP                  ej                  jP                  ejR                  jT                  ej                  jT                  idZ+d Z,	 d!dZ-d"dZ.d Z/d Z0d#dZ1d$dZ2d Z3d Z4	 	 	 d%dZ5d Z6d Z7d&dZ8dejr                  ddfdZ:d&dZ;d#dZ<	 	 d'dZ=	 	 d(dZ>d Z?d)d Z@y)*    N)_FusedModule))get_default_dynamic_quant_module_mappings(get_default_static_quant_module_mappings2get_default_static_quant_reference_module_mappingsget_default_qat_module_mappings$get_default_qconfig_propagation_listno_observer_set_has_special_act_post_process_get_special_act_post_process   )get_qparam_dict)has_no_children_ignoring_parametrizations)DeQuantStubQuantWrapper)_add_module_to_qconfig_obs_ctrdefault_dynamic_qconfigfloat16_dynamic_qconfig!float_qparams_weight_only_qconfig&float_qparams_weight_only_qconfig_4bit_activation_is_memoryless)type_before_parametrizations)_is_activation_post_process)
get_default_custom_config_dictpropagate_qconfig_add_quant_dequantpreparequantizequantize_dynamicprepare_qatquantize_qatconvertswap_module)%float_to_observed_custom_module_class)observed_to_quantized_custom_module_classc                      t         S )z,Defines the default custom config dict.
    )_DEFAULT_CUSTOM_CONFIG_DICT     ]/var/www/html/test/engine/venv/lib/python3.12/site-packages/torch/ao/quantization/quantize.pyr   r   =   s
     '&r(   c                    |j                  t        |       |      }|j                  ||      }t        | d|      }t        j                  j
                  j                  j                  ||        t        ||       }|| _        | j                         D ]T  \  }}|r|dz   |z   n|}	|3||j                  dg       v r)t        |      |j                  dg       v rGt        ||||	       V y)a  This is a helper function for `propagate_qconfig_`

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name of submodule to quantization
                     configuration
        qconfig_parent: quantization config of parent module, we will fallback to
                       this config when there is no specified config for current
                       module
        prefix: corresponding prefix of the current module, used as key in
                qconfig_dict
        prepare_custom_config_dict: dictionary for custom handling of modules
                                    see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    qconfig.Nnon_traceable_module_namenon_traceable_module_class)getr   getattrtorchaoquantizationr+   _assert_valid_qconfigr   named_childrentype_propagate_qconfig_helper)
moduleqconfig_dictqconfig_parentprefixprepare_custom_config_dictmodule_qconfigqconfig_with_device_checknamechildmodule_prefixs
             r)   r7   r7   B   s    ( "%%&B6&JN[N!%%fn=NVY?N	HH!!77O >~v V.FN,,. 	e/5t+4%-.223NPRSSE{8<<=Y[]^^%|%>	r(   c                 0    |i }|i }t        | ||       y)a  Propagate qconfig through the module hierarchy and assign `qconfig`
    attribute on each leaf module

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name or type of submodule to
            quantization configuration, qconfig applies to all submodules of a
            given module unless qconfig for the submodules are specified (when
            the submodule already has qconfig attribute)
        prepare_custom_config_dict: dictionary for custom handling of modules
            see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    N)r<   )r7   )r8   r9   r<   s      r)   r   r   j   s(      !)%'"flOijr(   c                 $    | j                  |      S )z3Forward hook that calls observer on the output
    activation_post_process)selfinputoutputs      r)   _observer_forward_hookrI      s     ''//r(   c                 *    | j                  |d         S )z7Forward pre hook that calls observer on the output
    r   rD   )rF   rG   s     r)   _observer_forward_pre_hookrK      s     ''a11r(   Fc                     t        | d      sJ d       |r| j                  t        d      }y | j                  t        d      }y )NrE   zGExpect activation_post_process attribute already attached to the moduleT)prepend)hasattrregister_forward_pre_hookrK   register_forward_hookrI   )r8   pre_hookhandles      r)   &_register_activation_post_process_hookrS      sX    645 RQR511& 2 
 --"D . 
r(   c                 j   |
t               }|i }Gt        |       }t        |      dk  s
J d|        t        |      dkD  rt        t	        |            ndddd dfd	}| j                         D ]m  \  }}t        |      t        j                  fv r$t        t        |      t        j                  t        j                  f      rB |      s`t        |d      sJ d	t        |       d
        |j                        |_        t!        |t"              r |      s ||       |t        |      |v r |      s ||       t%        |      rt'        |      }	 |||	        |      r[t        |      |v rN|t        |         j)                  |      }
t+        | ||
       |t        |         t-               vsU ||
       _t/        ||||       p t1        |       r<t!        | t2        j                  j4                        st        |       |v r	 ||        yyyy)as  Add observer for the leaf child of the module.

    This function insert observer module to all leaf child module that
    has a valid qconfig attribute.

    Args:
        module: input module with qconfig attributes for all the leaf modules that we want to quantize
        qconfig_propagation_list: a list of quantizable modules that will have observers added to them
            if they are leaf nodes
        device: parent device, if any
        non_leaf_module_list: list of non-leaf modules we want to add observer

    Return:
        None, module is modified inplace with added observer modules and forward_hooks
    Nr   zR_add_observer_ only works with cpu or single-device CUDA modules, but got devices r   c                 ^    || j                         n |       }||j                  |       |S N)
activationto)r+   devicespecial_act_post_processrW   s       r)   get_activation_post_processz3_add_observer_.<locals>.get_activation_post_process   s3    -E-MW'')SkSm
MM&!r(   c                 :    t        | d      xr | j                  d uS )Nr+   rN   r+   )ms    r)   needs_observationz)_add_observer_.<locals>.needs_observation   s    q)$>$)>>r(   c                      |       rVt        | t              sE| j                  d | j                  |             t	        | t        | j                               yyy)zn Adds an activation post process module and register
        a pre or post hook that calls the module
        rE   rQ   N)
isinstancer   
add_moduler+   rS   r   )r^   rZ   rY   r[   r_   s     r)   insert_activation_post_processz6_add_observer_.<locals>.insert_activation_post_process   s[    
 Q
1k(BLL24O		6#;5= > 31?XYZYbYb?cd )Cr(   rE   zfunctional class z- has no pre-defined `activation_post_process`rV   )r   _get_unique_devices_lennextiterr5   r   nnDropout
issubclassnnqFloatFunctionalQFunctionalrN   r+   rE   rb   r   r
   r   
from_floatsetattrr	   _add_observer_r   r1   
Sequential)r8   qconfig_propagation_listnon_leaf_module_listrY   custom_module_class_mappingdevicesrd   r?   r@   rZ   observed_childr[   r_   s      `       @@r)   rq   rq      s=      '#G#I "*&(# ~&v.7|q  	
`ah`ij	
  ),Gq(8d7m$d?e ,,. we'.2::,>4U;c>Q>QSVSbSb=cd 'u&?@ '(DU(K'LLyz@ 1LEMM[a0b-|, '.u5!-2Nu2UYm2m '.u5*51'DU'K$*52JKu%*Fu*MQl*l89UV[9\]hhinoNFD.1 ++G+NOWfWhh.~>5":<PRXZuv9w@ 18FTYT\T\TgTgAh'/3KK&v. L Bi8r(   c                     | j                         D ch c]  }|j                   c}| j                         D ch c]  }|j                   c}z  S c c}w c c}w rV   )
parametersrY   buffers)r8   ps     r)   re   re      sF    $//12AHH2!>>+,a,- -2,s
   AAc                     t        |       r#t        | d      r| j                  rt        |       S | j	                         D ]  \  }}t        |      | j                  |<    | S )a{  Wrap the leaf child module in QuantWrapper if it has a valid qconfig
    Note that this function will modify the children of module inplace and it
    can return a new module which wraps the input module as well.

    Args:
        module: input module with qconfig attributes for all the leaf modules
        that we want to quantize

    Return:
        Either the inplace modified module with submodules wrapped in
        `QuantWrapper` based on qconfig or a new `QuantWrapper` module which
        wraps the input module, the latter case only happens when the input
        module is a leaf module and we want to quantize it.
    r+   )r   rN   r+   r   r5   r   _modules)r8   r?   r@   s      r)   r   r      s]     18WVY=W\b\j\jF##,,. 9e 1% 89Mr(   c                 l   t         j                  j                  d       |
t               }|j	                  di       }|st        j                  |       } |}|
t               }t        | d       t        d | j                         D              st        j                  d       t        | |||       | S )a  Prepares a copy of the model for quantization calibration or quantization-aware training.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    The model will be attached with observer or fake quant modules, and qconfig
    will be propagated.

    Args:
        `model`: input model to be modified in-place
        `inplace`: carry out model transformations in-place, the original module is mutated
        `allow_list`: list of quantizable modules
        `observer_non_leaf_module_list`: list of non-leaf modules we want to add observer
        `prepare_custom_config_dict`: customization configuration dictionary for prepare function

    .. code-block:: python

       # Example of prepare_custom_config_dict:
       prepare_custom_config_dict = {
           # user will manually define the corresponding observed
           # module class which has a from_float class method that converts
           # float custom module to observed custom module
           "float_to_observed_custom_module_class": {
               CustomModule: ObservedCustomModule
           }
        }

    z!quantization_api.quantize.prepareNr#   r9   c              3   P   K   | ]  }t        |d       xr |j                     yw)r+   Nr]   ).0r^   s     r)   	<genexpr>zprepare.<locals>.<genexpr>7  s#     Lqwq)$22Ls   $&zNone of the submodule got qconfig applied. Make sure you passed correct configuration through `qconfig_dict` or by assigning the `.qconfig` attribute directly on submodules)ru   )r1   _C_log_api_usage_oncer   r/   copydeepcopyr   r   anymoduleswarningswarnrq   )modelinplace
allow_listobserver_non_leaf_module_listr<   ru   rs   s          r)   r   r   	  s    > 
HH  !DE!)%C%E""<"@"@Ahjl"me$  *#G#I u40 LEMMOLL U 	V ')F$?A Lr(   c                      t         d      r!t         j                        rt         d       d fd	} |d        |d       y )NrE   Fc                     | rj                   nj                  }| rt        nt        }t	               }|j                         D ]  \  }}||u s|j                  |        |D ]  }|j                  |        y rV   )_forward_pre_hooks_forward_hooksrK   rI   setitemsaddpop)rQ   hook_mapobserver_hookhandle_ids_to_remove	handle_idhook_fnr8   s         r)   remove_hooksz5_remove_activation_post_process.<locals>.remove_hooksI  sy    086,,f>S>S6>2DZ"u"*.."2 	4Iw-'$((3	4 . 	$ILL#	$r(   Tra   F)rN   r   rE   delattr)r8   r   s   ` r)   _remove_activation_post_processr   A  s@     v01"6#A#AB12$ $% r(   c                 v    | j                         D ]  }t        |        t        | d      r| `t	        |        y)zClean up the qconfig left in the module so that new qconfig can be
    propagated.

    Args:
        module: module to be cleaned up
    r+   N)children_remove_qconfigrN   r+   r   )r8   r@   s     r)   r   r   W  s;     "  vy!N#F+r(   c                     t         j                  j                  d       |
t               }|st	        j
                  |       } | j                          t        | d        || g|  t        | |d       | S )a  Quantize the input float model with post training static quantization.

    First it will prepare the model for calibration, then it calls
    `run_fn` which will run the calibration step, after that we will
    convert the model to a quantized model.

    Args:
        model: input float model
        run_fn: a calibration function for calibrating the prepared model
        run_args: positional arguments for `run_fn`
        inplace: carry out model transformations in-place, the original module is mutated
        mapping: correspondence between original module types and quantized counterparts

    Return:
        Quantized model.
    z"quantization_api.quantize.quantizeTr   )	r1   r   r   r   r   r   evalr   r!   )r   run_fnrun_argsmappingr   s        r)   r   r   f  sf    " 
HH  !EF:<e$	JJLE4 
58E7D)Lr(   c                 p   t         j                  j                  d       ||t         j                  k(  r|t        j
                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        i}n|t         j                  k(  r|t        j
                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        i}n(|t         j                  k(  r+t        j                  t         t        j"                  t         i}n|t         j$                  k(  rt        j                  t&        i}nt)        d| d      t+        |t,              r|t         j                  u rt        }n`|t         j                  u rt        }nG|t         j                  u rt         }n.|t         j$                  u rt&        }nt/        dt1        |            t3        t5        |t7        j8                  |                  }|
t;               }|st=        j>                  |       } | jA                          tC        | |       tE        | |d       | S )av  Converts a float model to dynamic (i.e. weights-only) quantized model.

    Replaces specified modules with dynamic weight-only quantized versions and output the quantized model.

    For simplest usage provide `dtype` argument that can be float16 or qint8. Weight-only quantization
    by default is performed for layers with large weights size - i.e. Linear and RNN variants.

    Fine grained control is possible with `qconfig` and `mapping` that act similarly to `quantize()`.
    If `qconfig` is provided, the `dtype` argument is ignored.

    Args:
        model: input model
        qconfig_spec: Either:

            - A dictionary that maps from name or type of submodule to quantization
              configuration, qconfig applies to all submodules of a given
              module unless qconfig for the submodules are specified (when the
              submodule already has qconfig attribute). Entries in the dictionary
              need to be QConfig instances.

            - A set of types and/or submodule names to apply dynamic quantization to,
              in which case the `dtype` argument is used to specify the bit-width

        inplace: carry out model transformations in-place, the original module is mutated
        mapping: maps type of a submodule to a type of corresponding dynamically quantized version
            with which the submodule needs to be replaced

    z*quantization_api.quantize.quantize_dynamicz5Don't know how to quantize with default settings for z. Provide full qconfig pleasez.Unknown dtype specified for quantize_dynamic: Tr   )#r1   r   r   qint8ri   Linearr   LSTMGRULSTMCellRNNCellGRUCellfloat16r   quint8EmbeddingBagr   	Embeddingquint4x2r   
ValueErrorrb   r   RuntimeErrorstrdictzip	itertoolsrepeatr   r   r   r   r   r!   )r   qconfig_specdtyper   r   default_qconfigs         r)   r   r     s   < 
HH  !MNEKK		3105

4

4L emm#		3105

4

4L ell""C@L enn$"HL GwNkln n	L#	&EKK5Oemm#5Oell"?Oenn$DOOQTUZQ[\\Ci.>.>.OPQ;=e$	JJLul+E7D)Lr(   c                 2   t         j                  j                  d       | j                  sJ d       |
t	               }|st        j                  |       } t        | d       t        | |dd       t        | t        |j                               d       | S )	a  
    Prepares a copy of the model for quantization calibration or
    quantization-aware training and converts it to quantized version.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    Args:
        model: input model to be modified in-place
        mapping: dictionary that maps float modules to quantized modules to be
                 replaced.
        inplace: carry out model transformations in-place, the original module
                 is mutated
    z%quantization_api.quantize.prepare_qatz1prepare_qat only works on models in training modeNr   TF)r   r   remove_qconfig)r   r   )r1   r   r   trainingr   r   r   r   r!   r   r   values)r   r   r   s      r)   r   r     s}     
HH  !HI>>NNN>13e$u40E7DGEW^^5E1FPTULr(   c                     t         j                  j                  d       |st        j                  |       } | j                          t        | d        || g|  t        | d       | S )ag  Do quantization aware training and output a quantized model

    Args:
        model: input model
        run_fn: a function for evaluating the prepared model, can be a
                function that simply runs the prepared model or a training
                loop
        run_args: positional arguments for `run_fn`

    Return:
        Quantized model.
    z&quantization_api.quantize.quantize_qatTr   )r1   r   r   r   r   trainr   r!   )r   r   r   r   s       r)   r    r      sW     
HH  !IJe$	KKMt$
58E4 Lr(   c                     t         j                  j                  d       |st        j                  |       } t        | |d||       |rt        |        | S )ag  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class. And remove qconfig at the
    end if remove_qconfig is set to True.

    Args:
        `module`: prepared and calibrated module
        `mapping`: a dictionary that maps from source module type to target
                   module type, can be overwritten to allow swapping user defined
                   Modules
        `inplace`: carry out model transformations in-place, the original module
                   is mutated
        `convert_custom_config_dict`: custom configuration dictionary for convert function

    .. code-block:: python

       # Example of convert_custom_config_dict:
       convert_custom_config_dict = {
           # user will manually define the corresponding quantized
           # module class which has a from_observed class method that converts
           # observed custom module to quantized custom module
           "observed_to_quantized_custom_module_class": {
               ObservedCustomModule: QuantizedCustomModule
           }
       }

    z!quantization_api.quantize.convertT)r   is_referenceconvert_custom_config_dict)r1   r   r   r   r   _convertr   )r8   r   r   r   r   r   s         r)   r!   r!   	  sN    : 
HH  !DEv&L#=? Mr(   c                    ||r
t               n	t               }|
t               }|j                  di       }|st	        j
                  |       } i }| j                         D ]A  \  }}t        |t              st        |      |vrt        ||d||       t        |||      ||<   C |j                         D ]  \  }	}
|
| j                  |	<    | S )a  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class

    Args:
        module: input module
        mapping: a dictionary that maps from source module type to target
                 module type, can be overwritten to allow swapping user defined
                 Modules
        inplace: carry out model transformations in-place, the original module
                 is mutated
        is_reference: a flag to enable quantized reference module

    r$   T)r   r   r   r/   r   r   r5   rb   r   r   r   r"   r   r}   )r8   r   r   r   r   ru   reassignr?   modkeyvalues              r)   r   r   0  s      JVDF9; 	!)%C%E""<"@"@Alnp"qv&H**, P	c #|,',4OOS'4!#=?$S'3NOP nn& %
U$% Mr(   c                 b   | }t        | d      r| j                  d}t        |       |v r |t        |          j                  |       }d}nt        |       |v r|t        |          }t        |d      rd|j                  rX| j                  J | j                  j                         } || j
                         t        |      }|j                  | |      }n|j                  |       }d}|r| j                  j                         D ]  }|j                  |        | j                  j                         D ]  }	|	t        us|j                  |	        t        |       }
t        |
      dk  s
J d|
        t        |
      dkD  rt!        t#        |
            nd}|r|j%                  |       |S )	a	  Swaps the module if it has a quantized counterpart and it has an
    `observer` attached.

    Args:
        mod: input module
        mapping: a dictionary that maps from nn module to nnq module

    Return:
        The corresponding quantized module of `mod`
    r+   NFT_IS_REFERENCEr   zOswap_module only works with cpu or single-device CUDA modules, but got devices r   )rN   r+   r   from_observedr   weightr   ro   r   r   rO   r   rI   rP   re   rf   rg   rh   rX   )r   r   ru   new_modswappedqmodweight_post_processweight_qparamspre_hook_fnr   rv   rY   s               r)   r"   r"   X  s    GsI3;;#:',0KK12Ns2STbbcfgGG)#.'97<=Dt_-$2D2D{{...&)kk&8&8&:##CJJ/!01D!E//#~>//#.G"55<<> ?11+>? --446 ;"8811':;
 +3/Gw<1$ abiajk$ -0L1,<T$w-($F

6"Nr(   c                     d }t        | d      r| j                  | ||      dz   <   | j                         D ]!  \  }}|r ||      |z   n|}t        |||       # y)a,  Traverse the modules and save all observers into dict.
    This is mainly used for quantization accuracy debug
    Args:
        mod: the top module we want to save all observers
        prefix: the prefix for the current module
        target_dict: the dictionary used to save all the observers
    c                     | dk(  r| S | dz   S )N r,   r'   )r;   s    r)   
get_prefixz&_get_observer_dict.<locals>.get_prefix  s    2v76C<7r(   rE   N)rN   rE   r5   _get_observer_dict)r   target_dictr;   r   r?   r@   rA   s          r)   r   r     sj    8 s-.FIFaFaJv&)BBC))+ >e5;
6*T15+}=>r(   )Nr   N)NNr   )NNNN)FNNN)NF)NFTFN)NFFN)r   )Ar   r   r   r1   torch.nnri   torch.ao.nn.quantizedr2   	quantizedrl   torch.ao.nn.intrinsicr   +torch.ao.quantization.quantization_mappingsr   r   r   r   r   r	   r
   r   utilsr   r   torch.ao.quantization.stubsr   r   torch.ao.quantization.qconfigr   r   r   r   r   r   torch.nn.utils.parametrizer   torch.ao.quantization.observerr   is_activation_post_process__all__r   quantizableMultiheadAttentionr&   r   r7   r   rI   rK   rS   rq   re   r   r   r   r   r   r   r   r   r    r!   r   r"   r   r'   r(   r)   <module>r      se        # # .	 	 	 N A  D F 	$$
r~~@@.
 	R\\..
))2<<+J+J2	 ' Z^&Pk,0
2


V/p-, .2*.'+6p!,,8 *.U[[!5Sj8. =A7;%P ',7;&P/b>r(   