o
    g}p                     @   sZ   d dl Z d dlZd dlmZmZ d dlmZmZ d dl	m
Z
 e eZG dd deZdS )    N)AttentionMaskFusionAttention)TensorProtohelper)	OnnxModelc                       sJ   e Zd ZdZdedededef fddZdd	 Zd
d Z	dd Z
  ZS )FusionBartAttentionz?
    Fuse Bart Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                    s   t  |||| d S )N)super__init__)selfr   r	   r
   r   	__class__ e/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_bart_attention.pyr      s   zFusionBartAttention.__init__c                 C   s  | j |dgdg}|d u rdS |d }| j |g dg d}	| j |g dg d}
|	d u s4|
d u r6dS |	\}}}|
\}}}|jd |ksN|jd |krPdS | j |g dg d}| j |g dg d	}|d u sp|d u rrdS |d
 j|jks|d
 j|jkrdS | j |g dg d}| j |g dg d}| j |g dg d}|d u s|d u s|d u rdS |d
 }|d
 }|d
 }|jd }|jd |ks|jd |ks|jd |krdS dS )NConcat   Fr   	UnsqueezeGatherShaper   r   r   )r   r   r   )r   r   r   )r      r   )r   r   MulT)r   match_parent_pathinputnameoutput)r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2
root_inputconcat_qkv_2_pathconcat_qkv_2reshape_qkv_2_path_1reshape_qkv_2_path_2_gather_1shape_1gather_2shape_2reshape_qkv_1_path_1reshape_qkv_1_path_2reshape_q_2_pathreshape_k_2_pathreshape_v_2_pathmul_qmul_kmul_vgather_1_outr   r   r   check_runtime_shape_path   s<   	

 
*z,FusionBartAttention.check_runtime_shape_pathc                 C   s  | j |g dg d}|d u rdS |d jd |jd kr dS | j |g dg d}| j |g dg d}|d u s@|d u rBdS |d }	|d }
|	jd	 |
jd	 krVdS |d jd |jd krp|d jd |jd krpdS |d ur| j |d
gd	g}|d u rdS | j |d g dg d}| j |d g dg d}|d u r|d u rdS |\}}}}|\}}}|jd |jd krdS |d jd |jd kr|d jd |jd krdS dS )N)r   Slicer   r   r   r   r   r   Fr   r   )r   PowCastDivr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r:   )r:   r   r   r   )r   r   r   r   r   )r   r   r   T)r   r   r   r    )r   r!   
matmul_qkvadd_qk	matmul_qkadd_qreshape_qkv_2_pathmatmul_qk_path_1matmul_qk_path_2mul_1mul_2add_qk_pathslice_q_path_1slice_q_path_2r+   unsqueeze_1unsqueeze_2r   r   r   check_runtime_shape_path_openaiM   sL   0
0z3FusionBartAttention.check_runtime_shape_path_openaic           a      C   s*  d}| j |g dg d}| j |g dg d}|d ur'|\}}}	}
}}n|d ur7|}|\}}}	}
}d}nd S g }|jD ]}||vrEq>||d jd krOq>|| q>t|dkr]d S |d }	 || }|jd	krs| j |d }|jD ]}|s{qv|| }d
d |D }|ddkr|} nqvt	dd | j 
 jD }t	dd | j 
 jD }| j |g dg d}| j |g dg d}| j |g dg d}| j |dgdg}| j |g dg d}d\}}d\}}|d ur|\}}} }}!|jd }n|d urf|}|\}} }}!| j j|ddg| gd}"| j |g d}#|"d ur9|"\}$}%|%jd |v r9|%jd }|#d ure|#\}&}$}'|'jd |v rP|'jd }| j |&ddgddg}(|(\}$})|)jd }n|d ur|\}}&}} }}!|}|&jd }|&jd }n|d ur|d jd |v r|}|d jd }|d jd }||vrttdd | j  | }*t|*dkr|*d jd nd }nJ|d ur|d jd |v r|}|d jd }|d jd }||vrttd!d | j  | }*t|*dkr|*d jd nd }ntd" d S ||v r|nd }||v r|nd }| j |d#dgddg}+| j |g d$g d%},| j |g d&g d'}-d }.|+d urQ|+\}$}/|+}0n|,d ur`|,\}$}$}.}$}/|,}0n|-d urm|-\}$}.}/|-}0nd S | j |/g d(g d)}1| j |/g d*g d+}2d }3|1d ur|1\}3}4}5}6}7}8n|2d ur|2}1|1\}6}4}5}7}8nd S | j |/g d,g d-}9| j |/g d.g d}:| j |/g d/g d0};| j |/g d1g d2}<| j |/ddgddg}=| j |/g d3g d0}>d\}?}@d4\}A}B}C|9d ur
|9\}$}A}D}B}E}C|9}Fn |:d urt|:\}G}D}B}C|:}F|Cjd }@| j j|Cddg|Bgd}"| j |Cg d}#|"d urG|"\}$}H|Hjd |v rG|Hjd }@|#d urs|#\}I}$}J|Jjd |v r^|Jjd }@| j |Iddgddg}(|(\}$}K|Kjd }?n|;d ur|;\}$}A}D}B}C|;}F|Djd }@n|<d ur|<\}$}A}I}$}B}C|<}F|Ijd }?|Ijd }@n|=d ur|=d jd |v r|=}F|Fd jd }?|Fd jd }@|@|vrttd5d | j  |? }Lt|Ldkr|Ld jd nd }@nE|>d ur(|>d jd |v r(|>}F|Fd jd }?|Fd jd }@|@|vr'ttd6d | j  |? }Lt|Ldkr%|Ld jd nd }@nd S |?|v r1|?nd }?|@|v r:|@nd }@|F|:|;|<fv r| j |jd jd }Md7}N| j |N}O|Od u rq| j|Ntj|Mgtjd8g|M tjd9d: | j d	}Ptd	|N|Cjd g|Bjg|P}E|r|?s| |	||.|/|7sd S |s|?s| |	||3|A||sd S |?o|o|Cd u od;t v}Q|Q o|Cjd |ko|8jd |ko|!jd |k}R|Q o|8jd |ko|Cjd |!jd ko|Cjd |8jd k}S|Ro|0|+k}T|Ro|0|,|-fv }U|s|Tn|Uo|?o|}V|So|0|+k}W|Qo!|0|+k}Xd }Y|UrV| j |.d<gdg}Z| j |.g d=g d}[|[d urJ|[d jd }Yn|Zd urV|Zd jd }Y|Tse|Use|Vse|Wse|Xr|	}\|  |5\}]}^|]dks|^dks|^|] dkrtd> d S d }_|Vs|Ws|Xr| j!r| j"|8|Ws|Vr|Cn|?|Ws|Vr|!n||7|Ws|Vr|End |Ws|Vr|nd |]|^|\jd |Vr|?nd |Vr|nd |@||Vd?nd }_n&| j!}`d| _!| j#d |8|C|!|7|E||]|^||\jd |Ur|Ynd |?||@|d@}_|`| _!|_d u rd S | j$|_ | j%| j&|_j< | j'(|\|
|g | j'(|0 |Vs*|Ws*|Xr||1d jdkr6|1)  |Fd jdkrB|F)  |d jdkrN|)  | j*r||WsX|Xr||1d jd	krd|1)  |Fd jd	krp|F)  |d jd	kr||)  | j'(|1 | j'(|F | j'(| d| _+d S d S )ANF)AddMatMulReshape	TransposerQ   rP   r?   )rO   rP   rQ   rR   rP   )r   r   r   r   r   Tr   r   rO   c                 S      g | ]}|j qS r   op_type).0childr   r   r   
<listcomp>       z,FusionBartAttention.fuse.<locals>.<listcomp>rP   c                 S   rS   r   r   rV   noder   r   r   rX      rY   c                 S   rS   r   rZ   r[   r   r   r   rX      rY   )rQ   rR   rQ   rO   rP   )r   r   r   r   N)rR   rQ   rO   rP   )r   r   r   N)rQ   r   rR   rQ   rO   rP   )r   r   r   r   r   NrQ   )rR   rQ   rQ   rR   r;   ) r]   )NNrR   )exclude)r   rQ   rR   r   c                 S   
   | j dkS NIdentityrT   r\   r   r   r   <lambda>      
 z*FusionBartAttention.fuse.<locals>.<lambda>r]   c                 S   r_   r`   rT   rb   r   r   r   rc   ,  rd   z&fuse_attention: failed to match v pathSoftmax)re   rQ   rO   rQ   rP   )r   r   r   r   r   )re   rO   rP   r   )rQ   rR   rQ   r   rO   rP   )r   r   r   r   r   r   )r   rR   rQ   rO   rP   )r   r   r   r   r   )rR   rQ   rR   rQ   rO   rP   )r   r   r   r   r   r   )r   rR   rQ   rP   )rR   rQ   rR   rQ   rP   )r   r   r   r   r   )rR   rQ   r   rR   rQ   rP   )r   r   r   r   r   r   )r   rR   rQ   rQ   rR   )NNNc                 S   r_   r`   rT   rb   r   r   r   rc     rd   c                 S   r_   r`   rT   rb   r   r   r   rc     rd   
empty_biasg        )dtype)dimsvalsmatmul_vWhere)Expandr   r   rk   z9fuse_attention: failed to detect num_heads or hidden_size)past_kpast_v	present_k	present_v
packed_qkv)
add_qk_strrm   rn   ro   rp   ),r   r   r   r    appendlenrU   get_childrencountsetgraphmatch_child_pathlistfilterinput_name_to_nodesloggerdebugget_initializerrh   add_initializerr   FLOATnparrayfloat32create_node_namer   	make_noder   rN   r9   localsget_num_heads_and_hidden_sizeuse_multi_head_attentioncreate_multihead_attention_nodecreate_attention_nodenodes_to_addthis_graph_namenode_name_to_graph_namenodes_to_removeextendpop!disable_multi_head_attention_biasprune_graph)ar   normalize_noder|   output_name_to_nodemodel_impl_openai	qkv_nodesqkv_nodes_openaiadd_out
matmul_outr!   transpose_qkvr"   r@   other_inputsr   r&   skip_layernormr    childrenchildren_typesgraph_input_namesgraph_output_namesv_nodesv_nodes_openaiv_nodes_with_past_self_attnv_nodes_with_past_cross_attn#v_nodes_with_past_cross_attn_openairn   rp   r%   add_vtranspose_vreshape_v_1rj   reshape_pathconcat_pathr+   transpose_add_vconcat_vtranspose_concat_vconcat_nodestranspose_concat_v_inidentity_node_v
qk_nodes_1
qk_nodes_2qk_nodes_2_openairA   rB   qk_nodesq_nodesq_nodes_openair#   transpose_qreshape_q_1r5   rC   matmul_qk_nodes_with_biask_nodes_with_bias_openaik_nodes_no_bias#k_nodes_no_bias_with_past_self_attn$k_nodes_no_bias_with_past_cross_attn+k_nodes_no_bias_with_past_cross_attn_openairm   ro   r$   reshape_k_1matmul_ktranspose_k_1add_kk_nodesr6   transpose_matmul_kconcat_ktranspose_concat_ktranspose_concat_k_inidentity_node_kbias_dimempty_bias_nameempty_tensoradd_namethree_root_inputsone_root_inputtwo_root_inputsencoder_attentiondecoder_attentiondecoder_attention_with_pastdecoder_cross_attention!decoder_cross_attention_with_past
mask_indexmask_nodes_bartmask_nodes_whisperattention_last_noder
   r	   new_node%use_multi_head_attention_ground_truthr   r   r   fuse   sP  




	








 

 





















 

 
 

"


zFusionBartAttention.fuse)__name__
__module____qualname____doc__r   intr   r   r9   rN   r   __classcell__r   r   r   r   r      s    	05r   )loggingnumpyr   fusion_attentionr   r   onnxr   r   
onnx_modelr   	getLoggerr   r}   r   r   r   r   r   <module>   s   
