o
    gLQ                     @   sx   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ eeZG dd	 d	eZdS )
    )	getLogger)TupleUnionN)Fusion)NumpyHelper)	NodeProtohelpernumpy_helper)	OnnxModelc                       s   e Zd ZdZdededef fddZdedefd	d
ZdedefddZ	dd Z
	d+dedededeeef fddZdedededededededededeedf fddZdd  Zd!d" Zdefd#d$Zd,d%d&Zded'ed(ededef
d)d*Z  ZS )-FusionMultiHeadAttentionSam2zI
    Fuse MultiHeadAttention subgraph of Segment Anything v2 (SAM2).
    modelhidden_size	num_headsc                    s.   t  |ddg || _|| _d| _d| _d S )NMultiHeadAttentionLayerNormalizationT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__ e/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_attention_sam2.pyr      s
   
z%FusionMultiHeadAttentionSam2.__init__	reshape_qreturnc                 C   s`   d}| j |jd }|dur#t|tjr#t|jdgkr#t|d }t|tr.|dkr.|S dS )Detect num_heads from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            int: num_heads, or 0 if not found
        r      N      )	r   get_constant_valueinput
isinstancenpndarraylistshapeint)r   r   r   shape_valuer   r   r   get_decoder_num_heads$   s   z2FusionMultiHeadAttentionSam2.get_decoder_num_heads
reshape_inc                 C   s   d}| j |jd }|dur$t|tjr#t|jdgkr#t|d }n4| j 	|dd}|durXt
|jdkrX| j |jd }|durXt|tjrXt|jdgkrXt|d }t|trc|dkrc|S dS )r   r   r   N      Concat)r   r    r!   r"   r#   r$   r%   r&   r'   match_parentlen)r   r*   r   r(   concat_shaper   r   r   get_encoder_num_heads9   s   z2FusionMultiHeadAttentionSam2.get_encoder_num_headsc                 C   s*   | j |jd }|rt|jd S dS )zDetect hidden_size from LayerNormalization node.
        Args:
            layernorm_node (NodeProto): LayerNormalization node before Q, K and V
        Returns:
            int: hidden_size, or 0 if not found
        r   r   )r   get_initializerr!   r   to_arrayr&   )r   layernorm_nodelayernorm_biasr   r   r   get_hidden_sizeU   s   z,FusionMultiHeadAttentionSam2.get_hidden_sizeFr4   
is_encoderc                 C   s   |r|  |}n| |}|dkr| j}| jdkr1|| jkr1| jr1td| j d| d d| _| |}|dkr=| j}| jdkrZ|| jkrZ| jrZtd| j d| d d| _||fS )a  Detect num_heads and hidden_size.

        Args:
            reshape_q (NodeProto): reshape node for Q
            layernorm_node (NodeProto): LayerNormalization node before Q, K, V
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r   z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )	r1   r)   r   r   loggerwarningr6   r   r   )r   r   r4   r7   r   r   r   r   r   get_num_heads_and_hidden_sizeb   s&   

z:FusionMultiHeadAttentionSam2.get_num_heads_and_hidden_sizeq_matmulq_addk_matmulk_addv_matmulv_addoutputNc
              
   C   s*  |dkr|| dkrt d| d|  dS | j|jd }
| j|jd }| j|jd }|
r8|r8|s:dS t|
}t|}t|}t d|j d|j d|j d	|  | jd
}|j	d |j	d |j	d g}t
jd
||	g|d}d|_|jt
d|g dd}| | |S )aF  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            output (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zinput hidden size z# is not a multiple of num of heads Nr   zqw=z kw=z vw=z hidden_size=r   inputsoutputsnamecom.microsoftr   MultiHeadAttention ({})zcross attention)r8   debugr   r2   r!   r   r3   r&   create_node_namerA   r   	make_nodedomain	attributeextendmake_attributeformatincrease_counter)r   r;   r<   r=   r>   r?   r@   r   r   rA   q_weightk_weightv_weightqwkwvwattention_node_nameattention_inputsattention_nodecounter_namer   r   r   create_attention_node   s8   


(

z2FusionMultiHeadAttentionSam2.create_attention_nodec                 C   s  |  |||r	d S | |}|d u r4|jd |vrd S ||jd  }|jdkr)d S | |}|d u r4d S |\	}}}}	}
}}}}|}| ||d\}}|dkrUtd d S | j|	|
|||||||jd d	}|d u rld S | j	
| | j| j|j< | j||g d| _d S )Nr   AddF*fuse_attention: failed to detect num_heads)rA   T)fuse_sam_encoder_patternmatch_attention_subgraphr!   op_typer:   r8   rH   r[   rA   nodes_to_addappendthis_graph_namenode_name_to_graph_namerE   nodes_to_removerM   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node	match_qkvskip_addreshape_qkvtranspose_qkvr   matmul_qadd_qmatmul_kadd_kmatmul_vadd_vattention_last_nodeq_num_headsq_hidden_sizenew_noder   r   r   fuse   sF   




z!FusionMultiHeadAttentionSam2.fusec              	   C   sl  | j |g dg d}|du rdS |\}}}}}| j |g dg d}|du r0td dS |\}}}}	| j |ddgd	d	g}
|
durK|
\}}ntd
 dS | j |g dg d}|du ritd dS |\}}}}}| j |g dg d}|du rtd dS |\}}}}}| j |g dg d}|du s|d |krtd dS ||||||||	|f	S )z.Match Q, K and V paths exported by PyTorch 2.*r\   MatMulReshape	Transposerz   )NNNr   r   N)r|   r{   r\   rz   )r   r   r   Nz&fuse_attention: failed to match v pathSoftmaxrz   r   z'fuse_attention: failed to match qk path)Mulr|   r{   r\   rz   )r   Nr   r   Nz&fuse_attention: failed to match q path)r   Nr   r   Nz&fuse_attention: failed to match k path)SqrtDivr   CastSliceShaper|   r{   )Nr   r   r   r   r   r   r   z*fuse_attention: failed to match mul_q pathr   match_parent_pathr8   rH   )r   node_after_output_projection	qkv_nodes_rl   rm   
matmul_qkvv_nodesrs   rr   qk_nodes_softmax_qk	matmul_qkq_nodesmul_q_transpose_qr   ro   rn   k_nodes_mul_krq   rp   mul_q_nodesr   r   r   r_      sR   





z5FusionMultiHeadAttentionSam2.match_attention_subgraphc                 C   s  | j |g dg d}|d u r| j |g dg d}|d u r*| j |dgdg}|d u r0dS |d }| j|t|d	kr?d	nd d
}|d u rIdS |\}}}	}
}}t|
d}t|trb|g dkrddS t|d}t|tru|g dkrwdS t|d}t|tr|g dkrdS | j |	g dg d}|d u rdS |\}}}| ||d\}}|dkrt	
d dS d}| j |}|d u rtjtjg ddd|d}| j || j | j d}tjd|
jd |g|
jd d g|d}| j| | j| j|j< |
}|jd |jd< |jd d |jd< t	
d|d| | ||||}|d u r,dS t| j ||d	ks:J |jd |jd< | j| | j| j|j< | j|g d| _dS )N)r\   r{   r|   r{   r   Nr   r   )r\   r   r   r{   r|   r{   )r   Nr   r   r   r   r\   r   Fr   r   )input_indexperm)r   r   r   r,   )r   r   r,   r   )r{   r\   rz   )r   r   NTr]   bsnh_to_bsd_reshape_dims)r   r   r   int64)dtype)rE   r{   _BSDrB   _BNSHzFound MHA: q_num_heads=z q_hidden_size=) r   r   $match_sam_encoder_attention_subgraphr/   r
   get_node_attributer"   r%   r:   r8   rH   r2   r	   
from_arrayr#   arrayadd_initializerrc   rI   r   rJ   r!   ra   rb   rd   rE   rA   create_mha_nodeget_childrenre   rM   rf   )r   rg   rh   ri   nodesr   matched_sdpareshape_outtranspose_out	split_qkvtranspose_qtranspose_ktranspose_vpermutation_qpermutation_kpermutation_vinput_projection_nodesr*   add_in	matmul_inru   rv   new_dims_namenew_dimsreshape_q_namer   transpose_k_bnshrw   r   r   r   r^   2  s   


z5FusionMultiHeadAttentionSam2.fuse_sam_encoder_patternc              	   C   sj  | j |g d|ddddg}|du rdS |\}}}}}| j |g dg d}|du r3td dS |\}	}}
}| j |ddgddg}|durN|\}}ntd	 dS | j |g d
g d}|du r|| j |g dg d}|du r|td dS |d |
krdS |d }| j |g d
g d}|du rtd dS |d |
krdS |\}}}}|||
|||	fS )z%Match SDPA pattern in SAM2 enconder.*ry   Nr   )r|   SqueezeSplitr{   )r   r   r   r   zfailed to match v pathr}   rz   zfailed to match qk path)r~   r|   r   r   r   )	r~   r|   r{   r|   MaxPoolr|   r{   r   r   )	r   Nr   r   r   r   r   r   r   zfailed to match q pathr   r   )r   Nr   r   zfailed to match k pathr   )r   r   r   	out_nodesr   r   r   matmul_qk_vr   r   r   rl   r   r   r   r   r   r   mul_kr   
_squeeze_kr   r   r   r     sN   




zAFusionMultiHeadAttentionSam2.match_sam_encoder_attention_subgraphr   r   c           
      C   sx   | j d}|jd |jd |jd g}|d }tjd||g|d}d|_|jtd|g d	d}	| 
|	 |S )	a  Create a MultiHeadAttention node for SAM2 encoder.

        Args:
            reshape_q (NodeProto): Reshape node for Q, output is 3D BxSxNH format
            transpose_k (NodeProto): Transpose node for K, output is BNSH format
            transpose_v (NodeProto): Transpose node for V, output is BNSH format
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

        Returns:
            NodeProto: the MultiHeadAttention node created.
        r   r   _outrB   rF   r   rG   zself attention)r   rI   rA   r   rJ   rK   rL   rM   rN   rO   rP   )
r   r   r   r   r   rW   rC   rA   rY   rZ   r   r   r   r     s"   

z,FusionMultiHeadAttentionSam2.create_mha_node)F)N)__name__
__module____qualname____doc__r
   r'   r   r   r)   r1   r6   boolr   r:   strr   r[   rx   r_   r^   r   r   __classcell__r   r   r   r   r      sv    

$	


@2: 
7r   )loggingr   typingr   r   numpyr#   fusion_baser   fusion_utilsr   onnxr   r   r	   
onnx_modelr
   r   r8   r   r   r   r   r   <module>   s   