o
    gU/                     @   sp   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZ d dlmZ eeZG dd deZdS )	    )	getLogger)TupleUnionN)Fusion)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                       s   e Zd ZdZdededef fddZdeded	eeef fd
dZ	dedededededededede
de
d	eedf fddZdd Z  ZS )FusionAttentionVaezI
    Fuse Attention subgraph of Vae Decoder into one Attention node.
    modelhidden_size	num_headsc                    s.   t  |ddg || _|| _d| _d| _d S )N	AttentionSoftmaxT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__ d/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_attention_vae.pyr      s
   
zFusionAttentionVae.__init__	reshape_qadd_qreturnc           	      C   s4  | j |d}|du st|jdkr| j| jfS | j |jd }|dur0t|tj	r0|j
dks6| j| jfS t|}|dkrD| j| jfS | j |\}}|du s[t|tj	r[|jdkra| j| jfS |jd }| jdkr~|| jkr~| jr~td|| j d| _| jdkr|| jkr| jrtd|| j d| _||fS )	zDetect num_heads and hidden_size from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
            add_q (NodeProto): add node for Q

        Returns:
            Tuple[int, int]: num_heads and hidden_size
           N      r   z?Detected number of attention heads is %d. Ignore --num_heads %dFz3Detected hidden size is %d. Ignore --hidden_size %d)r   
get_parentleninputr   r   get_constant_value
isinstancenpndarraysizeintget_constant_inputndimshaper   loggerwarningr   )	r   r   r   concatvaluer   _biasr   r   r   r   get_num_heads_and_hidden_size   s0   

z0FusionAttentionVae.get_num_heads_and_hidden_sizeq_matmulq_addk_matmulk_addv_matmulv_add
input_nameoutput_nameNc           %      C   s:  |j d |	ks|j d |	ks|j d |	kr(td|j d |j d |j d  dS |dkr;|| dkr;td|| dS | j|j d }| j|j d }| j|j d }|r\|r\|s^dS | j|j d po| j|j d }| j|j d p| j|j d }| j|j d p| j|j d }t|}t|}t|}t|j	}t|j	}t|j	}|j
dkrtd dS t|}t|}t|}|j	|j	ks|j	|j	krdS |j	d }|j	d }|j	d }||kr||ksJ |dkr||krtd| d	| d
t|j	dd }tj|||fdd}dt| }| jd} ||  kr9|ks<J  J d}!tj|||fdd}"d| }!| j| d tj||g|d tjd|gtjd}"d| }!| j| d tj|!g|"d |	| d | d g}#tjd|#|
g| d}$d|$_|$jtd|g | d |$S )at  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input_name (str): input name
            output_name (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zRFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNz9input hidden size %d is not a multiple of num of heads %dr   
   zBweights are in fp16. Please run fp16 conversion after optimizationzInput hidden size (z,) is not same as weight dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   r   _qkv_weight)name	data_typedimsvals)dtype	_qkv_bias)inputsoutputsr?   zcom.microsoftr   zAttention (self attention))r"   r,   debugr   get_initializerr	   to_arrayr%   prodr+   r@   
ValueErrorstackr(   create_node_nameadd_initializerr   FLOATzerosfloat32r   	make_nodedomain	attributeextendmake_attributeincrease_counter)%r   r3   r4   r5   r6   r7   r8   r   r   r9   r:   q_weight_tensork_weight_tensorv_weight_tensorq_bias_tensork_bias_tensorv_bias_tensorq_biask_biasv_biasq_bias_shapek_bias_shapev_bias_shapeq_weightk_weightv_weight
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameqkv_bias_dimqkv_biasattention_inputsattention_noder   r   r   create_attention_nodeG   s   *$$$










 
z(FusionAttentionVae.create_attention_nodec                  C   sf  | j j|d|dd}|d u rd S | j j|d|dd}|d u r d S | j j|d|dd}|d u r0d S | j j|d|dd}|d u r@d S | j j|d|dd}|d u rPd S | j j|d|dd}	|	d u r`d S | j j|	d|dd}
|
d u rpd S | j |g dg d}|d u rtd	 d S |\}}}}}| j |g d
g d}|d ur|\}}}}ntd d S | j |g dg d}|d u rtd d S |\}}}}}| j |g dg d}|d u rtd d S |\}}}}}}|}| ||\}}|dkrtd d S | |||||||||jd |jd 
}|d u rd S | j	
| | j| j|j< | j||g d| _d S )NMatMulF)	recursiveReshape	TransposeAdd)ru   rv   ru   rw   rs   )r   r   r   r   Nz&fuse_attention: failed to match v path)r   rw   Mulrs   )r   r   r   r   z'fuse_attention: failed to match qk path)r   r   r   r   Nz&fuse_attention: failed to match q path)rv   ru   rv   ru   rw   rs   )r   r   r   r   r   Nz&fuse_attention: failed to match k pathr   z*fuse_attention: failed to detect num_headsT)r   find_first_child_by_typematch_parent_pathr,   rG   r2   rr   r"   outputnodes_to_addappendthis_graph_namenode_name_to_graph_namer?   nodes_to_removerU   prune_graph) r   softmax_nodeinput_name_to_nodesoutput_name_to_node
matmul_qkvreshape_qkvtranspose_qkvreshape_out
matmul_outadd_outtranspose_outv_nodesr0   add_vmatmul_vqk_nodes_softmax_qk	_add_zero_mul_qk	matmul_qkq_nodes_transpose_qr   r   matmul_qk_nodesadd_kmatmul_kattention_last_nodeq_num_headsq_hidden_sizenew_noder   r   r   fuse   s   






zFusionAttentionVae.fuse)__name__
__module____qualname____doc__r
   r(   r   r   r   r2   strr   rr   r   __classcell__r   r   r   r   r      s:    	)	


 r   )loggingr   typingr   r   numpyr%   fusion_baser   onnxr   r   r   r	   
onnx_modelr
   r   r,   r   r   r   r   r   <module>   s   