
    '}h3T                        d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZmZ d dlmZmZmZmZmZmZmZmZmZ g dZ edd      Z G d	 d
e
j4                        Z	 dde
j4                  dedefdZ G d de      Z G d de      Zy)    N)
namedtuple)AnyOptionalTupleListCallableDict))sparse_semi_structured_from_dense_cutlass'sparse_semi_structured_to_dense_cutlass)	fallback_dispatchersemi_sparse_valuessemi_sparse_indicessemi_sparse_detachsemi_sparse_tsemi_sparse_viewsemi_sparse_mmsemi_sparse_addmmsemi_sparse_linear)SparseSemiStructuredTensor!SparseSemiStructuredTensorCUTLASS$SparseSemiStructuredTensorCUSPARSELTto_sparse_semi_structured_SEMI_STRUCTURED_SPARSE_CONFIGz=sparse_min_rows sparse_min_cols dense_min_rows dense_min_colsc                      e Zd ZU dZdZeed<   eej                  e
f   ed<   dZeed<   dZeed<   dZeed	<   eeef   ed
<   eej"                     ed<   eej"                     ed<   eej"                     ed<   eej"                     ed<   eej"                     ed<   eed<   eed<   g dZe	 	 	 d(dej(                  deej"                     deej"                     deej"                     deej"                     deej"                     dededefd       ZdefdZdeee   eej(                  eeef   f   fdZedeej(                  eeef   dej"                  fd       Zej:                  j<                  Zede fd       Z!ed)d*d       Z"edej"                  ddfd       Z#ed ej"                  dej"                  fd!       Z$d" Z%edej"                  dd fd#       Z&dd$d%ej"                  d&eej"                     dej"                  fd'Z'y)+r   a  
    This class implementes semi-structured sparsity as a Tensor subclass.

    Semi-structured sparsity describes a sparsity pattern where n in every 2n elements are sparse,
    depending on the datatype. It is also referred to as 2:4 sparsity or fine-grained
    structured sparsity.

    There are two backends available for semi_structred sparsity, either cuSPARSELt or CUTLASS.
    This class is meant to serve as a base class for both implementations. SparseSemiStructuredCUTLASS
    and SparseSemiStructuredCUSPARSELT both inherit from this class and define three backend-specific items.
    Note that as such, this class cannot be insantiated directly.

    -`_DTYPE_SHAPE_CONSTRAINTS` - A dictionary holding backend specific dense/sparse min shape constraints
    - `def from_dense()` - backend specific compression routines
    - `def _mm()` - backend specifc mm op (either torch._cslt_sparse_mm or torch._sparse_semi_structured_linear)
    r   _DEFAULT_ALG_ID_DTYPE_SHAPE_CONSTRAINTST_FORCE_CUTLASSF_FUSE_TRANSPOSE_PROTOTYPE_WARNING_SHOWNSPARSE_DISPATCHpackedmetapacked_tmeta_tthreads_masksfuse_transpose_cusparseltalg_id_cusparselt)r!   r"   r#   r$   r%   shaperequires_gradc
                    | j                   s1t        j                  dt               d| _         | j	                          ||}
n||}
nt        d      |
j                  |
j                  |
j                  |	d}t        j                  j                  | |fi |}||_        ||_        ||_        ||_        ||_        ||_        ||_        |S )a  
        Create a new instance of the tensor subclass from the compressed sparse representation.

        We have the option to create the subclass with the compressed representations of both X and X', for training.
        For inference, we only need a single representation (either X or X'), while the corresponding other set will be None.

        Depending on the backend selected, certain fields will be set to None. (CUSPARSELT vs CUTLASS)

        Args:
            shape: The shape of the original dense tensor
            packed: The compressed representation of the original dense tensor
            meta: The metadata of the original dense tensor, if it is stored separately
            packed_t: The compressed representation of the transposed original dense tensor
            meta_t: The metadata of the transposed original dense tensor, if it is stored separately
            threads_masks: The masks used by the CUTLASS backend to determine which threads should participate in the computation.
                           Used for pointwise ops.
            fuse_transpose_cusparselt: When running with cuSPARSELt, we have the option to fuse a transposition
                                       with a matmul, which is useful in the case of 2:4 sparse training.
            alg_id_cusparselt: The algorithm id to use when using cuSPARSELT, will have effect on performance

        Returns:
            torch.Tensor: A torch.Tensor wrapper subclass.

        Raises:
            ValueError: If all of the tensor arguments are None.
        zThe PyTorch API of SparseSemiStructuredTensor is in prototype stage and will change in the near future. Please open a Github issue for features requests and see our documentation on the torch.sparse module for further information about the project.Tz3At least one of packed or packed_t must be provided)devicedtypelayoutr)   )r   warningswarnUserWarning_load_dispatch_table
ValueErrorr+   r,   r-   torchTensor_make_wrapper_subclassr!   r"   r#   r$   r%   r&   r'   )clsr(   r!   r"   r#   r$   r%   r&   r'   r)   previous_tensorkwargstensors                [/var/www/html/test/engine/venv/lib/python3.12/site-packages/torch/sparse/semi_structured.py__new__z"SparseSemiStructuredTensor.__new__G   s    N ++MMH
  ,0C(
 $$&$O!&ORSS &,,$**%,,*	
 44S%J6J",+D(#4     returnc                 j    t        | d      sJ | j                  j                   d| j                   dS )Nr(   z(shape=))hasattr	__class____name__r(   )selfs    r:   __repr__z#SparseSemiStructuredTensor.__repr__   s4    tW%%%..))*'$**Q??r<   c                      t        t         fd j                              } j                   j                   j
                   j                  f}||fS )Nc                      t        |       d uS N)getattr)xrC   s    r:   <lambda>z?SparseSemiStructuredTensor.__tensor_flatten__.<locals>.<lambda>   s    WT1-T9 r<   )listfilter	__slots__r(   r&   r'   r)   )rC   inner_tensorstensor_metas   `  r:   __tensor_flatten__z-SparseSemiStructuredTensor.__tensor_flatten__   sV     94>>J
 JJ**""	
 k))r<   rO   c                     |\  }}}} | ||j                  dd       |j                  dd       |j                  dd       |j                  dd       |j                  dd       |||	      S )Nr!   r"   r#   r$   r%   	r(   r!   r"   r#   r$   r%   r&   r'   r)   )get)	r6   rN   rO   
outer_sizeouter_strider(   r&   r'   r)   s	            r:   __tensor_unflatten__z/SparseSemiStructuredTensor.__tensor_unflatten__   s     NYJ(*;] $$Xt4""640"&&z48 $$Xt4'++OTB&?/'

 
	
r<   c                     |j                   | j                  vr%t        | j                   d|j                   d       | j                  |j                      ||||      S )NzI only supports a specific set of operations, can't perform requested op (r?   )_overloadpacketr    NotImplementedErrorrB   )r6   functypesargsr8   s        r:   __torch_dispatch__z-SparseSemiStructuredTensor.__torch_dispatch__   sh    s':'::%<<. !//3}}oQ@  9s""4#7#78udFSSr<   Nc                    t        | dd      t        j                  j                  j                  t
        t        j                  j                  j                  t        t        j                  j                  j                  t        t        j                  j                  j                  t        t        j                  j                  j                  t        t        j                  j                  j                  t        t        j                  j                  j                  t         t        j                  j                  j"                  t$        t        j                  j                  j&                  t$        t        j                  j                  j(                  t*        t        j                  j                  j,                  t.        i| _        || j0                  j3                  |       yyy)zT
        Loads the op overload sparse dispatch table for the current class.
        r    N)rH   r3   opsatenvaluesr   indicesr   is_same_sizer   detach_detachr   tr   viewr   mmr   matmuladdmmr   linearr   r    update)r6   custom_dispatch_tables     r:   r1   z/SparseSemiStructuredTensor._load_dispatch_table   s   
 3)408		%%'9		&&(;		++-@		&&(;		%%'9		  -		##%5		!!>		%%~		$$&7		%%'9#C %0##**+@A 1 9r<   original_tensorc           	      V   |j                   st        d|j                   d      |j                         dk7  rt        d|j                          d      |j	                         st        d      |j
                  | j                  vrt        d|j
                   d      |j                  \  }}| j                  |j
                     j                  }| j                  |j
                     j                  }||k  s||z  s
||k  s||z  rt        d	|j                   d
| d| d      y)z_
        Assert that the given tensor is valid for semi-structured sparse compression.
        zError original_tensor.device= z= is not supported! Only CUDA tensors are currently supported.   zError original_tensor.dim = z; is not supported! Only 2d tensors are currently supported.zXError original_tensor is not contiguous!Only contiguous tensors are currently supported.zError original_tensor.dtype zO is not a supported dtype! dtype must be one of: {cls._DTYPE_SHAPE_CONSTRAINTS}zError original_tensor.shape zS is not supported! Both dimensions must be larger or equal than and a multiple of (z, r?   N)
is_cudaRuntimeErrorr+   dimis_contiguousr,   r   r(   sparse_min_rowssparse_min_cols)r6   rn   mnmin_rowsmin_colss         r:    _validate_device_dim_dtype_shapez;SparseSemiStructuredTensor._validate_device_dim_dtype_shape   sp    &&01G1G0H I= =   A%./B/B/D.E F; ;  ,,.C    (D(DD./D/D.E FG G  $$1//0E0EFVV//0E0EFVVx<1x<1x<1x<./D/D.E FSS[R\\^_g^hhik  <Hr<   dense_inputc                    |j                         dk(  sJ |j                  \  }}| j                  |j                     j                  }| j                  |j                     j
                  }||k  s||z  r| |z  nd}||k  s||z  r| |z  nd}|s|r.t        j                  j                  j                  |d|d|f      S |S )z
        Calculates padding for dense tensor and pads tensor if necessary.
        If padding is not required, this function returns the original tensor.
        rp   r   )
rs   r(   r   r,   dense_min_rowsdense_min_colsr3   nn
functionalpad)r6   r|   rw   rx   ry   rz   to_pad_mto_pad_ns           r:   _pad_dense_inputz+SparseSemiStructuredTensor._pad_dense_input  s      A%%%   1//0A0ABQQ//0A0ABQQ %&LALA2=a$%LALA2=ax88&&**;Ha8RSSr<   c                     | j                   d   }t        j                  | t        j                  || j                  | j
                              S )N)r,   r+   )r(   r3   rh   eyer,   r+   )rC   cols     r:   to_densez#SparseSemiStructuredTensor.to_dense!  s5    jjnxxeii4::dkkRSSr<   c                     t         rG   rY   r6   rn   s     r:   
from_densez%SparseSemiStructuredTensor.from_dense%  s    !!r<   biasBr   c                    t         rG   r   )rC   r   r   r8   s       r:   _mmzSparseSemiStructuredTensor._mm)  s
     "!r<   )Fr   FrG   )r=   N)(rB   
__module____qualname____doc__r   int__annotations__r	   r3   r,   r   r   boolr   r   r   r   r4   rM   staticmethodSizer;   strrD   r   r   rP   classmethodrV   _C_disabled_torch_function_impl__torch_function__r   r]   r1   r{   r   r   r   r    r<   r:   r   r   #   s   " OS"5;;0N#NOOND!OT!%*d*(H,--U\\""
5<<
  u||$$U\\""ELL))##II +0!"#MzzM &M u||$	M
 5<<(M &M  -M $(M M M M^@# @*	tCy%

D#t ;<<	=* 
 EJJc478
 

 
( ??Tc T T B B* ) )QU ) )V 5<< ELL  *T "%,, ";W " " (,	"<<" u||$	" 
"r<   r   rn   
transposedr=   c                     |rt        d      t        j                  rt        j                  j
                  nt        j                  j                  }|j                  |       S )a	  
    This function converts a dense tensor into a sparse semi-structured tensor.
    It will return a SparseSemiStructuredTensor, a subclass of torch.Tensor.

    This function will check to ensure the dense tensor has the right dtype, size, dims, and device.
    We currently only support semi-structured sparse tensors for 2d CUDA tensors.
    Additionally, your tensor must be a positive multiple of the mininum sparse block size, given in
    `_DTYPE_TO_SHAPE_CONSTRAINTS` for each dtype (float32, float16, bfloat16, int8).

    Args:
        original_tensor (Tensor): the dense tensor to convert
        transposed (bool, optional): deprecated arg to be removed in another release. Do not use.
    Returns:
        SparseSemiStructuredTensor: A sparse semi-structured tensor created from the given original_tensor
    Raises:
        None
    Example:
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
        >>> A = torch.Tensor([0, 0, 1, 1]).tile((128, 32)).half().cuda()
        tensor([[0., 0., 1.,  ..., 0., 1., 1.],
                [0., 0., 1.,  ..., 0., 1., 1.],
                [0., 0., 1.,  ..., 0., 1., 1.],
                ...,
                [0., 0., 1.,  ..., 0., 1., 1.],
                [0., 0., 1.,  ..., 0., 1., 1.],
                [0., 0., 1.,  ..., 0., 1., 1.]], device='cuda:0', dtype=torch.float16)
        >>> A_sparse = to_sparse_semi_structured(A)
        SparseSemiStructuredTensor(shape=torch.Size([128, 128]))
        >>> A_sparse.values()
        tensor([[1., 1., 1.,  ..., 1., 1., 1.],
                [1., 1., 1.,  ..., 1., 1., 1.],
                [1., 1., 1.,  ..., 1., 1., 1.],
                ...,
                [1., 1., 1.,  ..., 1., 1., 1.],
                [1., 1., 1.,  ..., 1., 1., 1.],
                [1., 1., 1.,  ..., 1., 1., 1.]], device='cuda:0', dtype=torch.float16),
        >>> A_sparse.indices()
        tensor([[-4370, -4370, -4370,  ..., -4370, -4370, -4370],
                [-4370, -4370, -4370,  ..., -4370, -4370, -4370],
                [-4370, -4370, -4370,  ..., -4370, -4370, -4370],
                ...,
                [-4370, -4370, -4370,  ..., -4370, -4370, -4370],
                [-4370, -4370, -4370,  ..., -4370, -4370, -4370],
                [-4370, -4370, -4370,  ..., -4370, -4370, -4370]], device='cuda:0', dtype=torch.int16))
    zSetting transpose from to_sparse_semi_structured is deprecated and will be removed in a future release.SparseSemiStructuredTensor only support contiguous input tensors. )DeprecationWarningr   r   r3   sparser   r   r   )rn   r   sparse_subclasss      r:   r   r   3  sZ    b  Q
 	
 &44 	66\\>> 
 %%o66r<   c                   X    e Zd ZdZej
                   edddd      ej                   edddd      ej                   edddd      ej                   edddd      iZ
edej                  d	d fd
       Z fdZdddej                  deej                     d	ej                  fdZ xZS )r   a  
    This class implements semi-structured sparsity for the CUTLASS backend.

    In this implementation, the specified elements and metadata are stored seprately,
    in packed and meta respectively.

    When _FORCE_CUTLASS is set, or when cuSPARSELt is not available, this subclass calls into _sparse_semi_structured_linear
    and sparse_semi_structured_from_dense for conversion to the compressed format.
              @         rn   r=   c           	          | j                  |       t        |      \  }} | |j                  ||d d d |j                        S )N)r!   r"   r#   r$   r%   r)   )r{   r
   r(   r)   )r6   rn   sparse_tensor_cutlassmeta_tensor_cutlasss       r:   r   z,SparseSemiStructuredTensorCUTLASS.from_dense  sU     	,,_= 6oF	
!!!($)77
 	
r<   c                     | j                   | j                  J | j                   j                  dk(  r t        | j                  | j                         S t        |          S )Nrp   )r"   r!   ndimr   superr   )rC   rA   s    r:   r   z*SparseSemiStructuredTensorCUTLASS.to_dense  s]    yy$)@@@ yy~~"	 4			
 !#	
r<   Nr   r   r   c                   t        |t              rt        d      | j                  j                  }| j
                  dk7  s|j
                  dk7  rt        d| d      | j                  | j                  t        d| d      t        j                  |j                         | j                  | j                  |      j                         }|d | j                  d    S )NZ`SparseSemiStructuredTensor @ SparseSemiStructuredTensor` is not supported by the hardwarerp   `)` matmul: Broadcasting is not implemented$` matmul: operation is not supportedr   r   )
isinstancer   r2   rA   rB   r   rY   r!   r"   r3   _sparse_semi_structured_linearrf   r(   )rC   r   r   r8   cls_nameress         r:   r   z%SparseSemiStructuredTensorCUTLASS._mm  s     a34l  >>**99>QVVq[%H:FG  ;;$))"3%H:AB  66t{{DIIDac  A''r<   )rB   r   r   r   r3   int8r   float16bfloat16float32r   r   r4   r   r   r   r   __classcell__)rA   s   @r:   r   r   r  s     	

22sBC5b"aC6r2q!D5b"aC	  
#ll
	,
 
$	
 (,	(<<( u||$	( 
(r<   r   c                   F   e Zd ZdZej
                   edddd      ej                   edddd      ej                   edddd      ej                   edddd      iZ
edej                  dd fd       Zd	d
dej                  deej                     dej                  fdZy	)r   a  
    The cuSPARSELt backend expects the specified elements and the metadata to be stored in a single tensor:
    packed = [ specified elements of original tensor | metadata ]
    For an original tensor of size (m, k) we expect the first m * k // 2 elements to be the kept elements
    The rest of the tensor is metadata. Since there is only one tensor, we only use the packed and packed_t
    attributes respectively.

    cuSPARSELt also supports transposition fusion, which is necessary for performant 2:4 sparse training, as well
    as specifying alg_id, a config that affects the performance of the matmul depending on matmul sizes.
    r   r   r   r   rn   r=   c                     | j                  |        | |j                  t        j                  |      d d d d t        j
                  t        j                  |j                  	      S )NrR   )r{   r(   r3   _cslt_compressr   r   r   r)   r   s     r:   r   z/SparseSemiStructuredTensorCUSPARSELT.from_dense  s[    ,,_=!''''8&@&P&P8HH)77

 
	
r<   Nr   r   r   c                   t        |t              rt        d      | j                  dk7  s|j                  dk7  r#t	        d| j
                  j                   d      |j                  | j                  k7  rit	        d| j
                  j                   dt        | j                         dt        |j                         d| j                   d|j                   d	      |h|j                  | j                  k7  rOt	        d| j
                  j                   dt        | j                         dt        |j                         d
      | j                  #t	        d| j
                  j                   d      t        j                  | j                  ||| j                  | j                        }| j                  r|j                         S |S )Nr   rp   r   r   z` matmul: trying to do `A=z @ B=z`, with A.dtype=z and B.dtype=zH. This operation is only supported when A and B have the same data type.z + C`, with A.dtype=B.dtype={self.dtype} and C.dtype={B.dtype}. This operation is only supported when A, B and C have the same data type.r   )r   transpose_resultalg_id)r   r   r2   r   rY   rA   rB   r,   tupler(   r!   r3   _cslt_sparse_mmr&   r'   rf   )rC   r   r   r8   r   s        r:   r   z(SparseSemiStructuredTensorCUSPARSELT._mm  s    a34l  99>QVVq[%DNN++,,UV  77djj %DNN++,,FuTZZGXFYY^_defelel_m^n o  $

|=	 BYY 
 

djj 8%DNN++,,FuTZZGXFYY^_defelel_m^n o\ \ 
 ;;%DNN++,,PQ  ''!%!?!?--C #<<3557E#Er<   )rB   r   r   r   r3   r   r   r   r   r   r   r   r4   r   r   r   r   r<   r:   r   r     s    	 	

22r2rB5b"aC6r2q!D5aAqA	  
%,, 
;a 
 
$ (,	'F<<'F u||$	'F 
'Fr<   r   )F) r.   collectionsr   typingr   r   r   r   r   r	   r3   )torch.sparse._semi_structured_conversionsr
   r   !torch.sparse._semi_structured_opsr   r   r   r   r   r   r   r   r   __all__r   r4   r   r   r   r   r   r   r<   r:   <module>r      s     " = = 
 
 
 ",$C" M" M"d <7\\<7<7  <7~H((B H(VIF+E IFr<   