o
    gU                     @   sp   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ eeZG dd deZG d	d
 d
eZdS )    )	getLoggerN)Fusion)FusionUtils)helper)	OnnxModelc                       sJ   e Zd ZdZdedef fddZdd Zdd	 Zd
d Z	dd Z
  ZS )FusionGptAttentionPastBasez3Base class for GPT Attention Fusion with past statemodel	num_headsc                    s6   t  |dddgd || _t|| _i | _d | _d S )N	AttentionLayerNormalizationSkipLayerNormalizationz	with past)super__init__r	   r   utilscasted_attention_maskmask_filter_valueselfr   r	   	__class__ d/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_gpt_attention.pyr      s
   

z#FusionGptAttentionPastBase.__init__c           
      C   s   | j |d|}|d u s|jdkrtd d S | j |ddkr(td d S |jd }| j |d|}|r?|jdkr?|}n| j |ddgddg}|d u rVtd d S |d }| j |ddkrjtd	 d S |jd }	||	krztd
 d S |S )Nr   Gatherz,match_past_pattern_1: expect Gather for past   z9match_past_pattern_1: expect indices=1 for Gather of past	Transposez7match_past_pattern_1: failed match Transpose and Gatherz;match_past_pattern_1: expect indices=0 for Gather k of pastz,match_past_pattern_1: expect past to be same)r   
get_parentop_typeloggerdebugfind_constant_inputinputmatch_parent_path)
r   concat_kconcat_voutput_name_to_nodegatherpastparentgather_past_kpast_k_nodespast_kr   r   r   match_past_pattern_1   s0   






z/FusionGptAttentionPastBase.match_past_pattern_1c           
      C   st  | j |d|}|d u s|jdkrtd d S | j |d|}|d u s)|jdkr0td d S | j  }|dk rYt|ddgsHtd d S t|d	d
d
gsXtd d S n!| j	|d
dgsitd d S | j	|d
d
d
gsztd d S tj|ddddstd d S |j
d }| j |ddgddg}|d u rtd d S |d j
d }	||	krtd d S |S )Nr   Squeezez:match_past_pattern_2: expect Squeeze as parent of concat_vSplitz0match_past_pattern_2: expect Split for past path   axesz:match_past_pattern_2: axes != [0] for Squeeze in past pathsplitr   z<match_past_pattern_2: split != [1, 1] for Split in past pathaxis)default_valuezKmatch_past_pattern_2: attribute axis of Split are not expected in past pathz7match_past_pattern_2: failed to match past_k_nodes pathr   z,match_past_pattern_2: expect past to be same)r   r   r   r   r   get_opset_versionr   check_node_attributer   check_node_input_valuer!   r"   info)
r   r#   r$   r%   squeezer1   opset_versionr'   r*   r+   r   r   r   match_past_pattern_2K   sH   










z/FusionGptAttentionPastBase.match_past_pattern_2c                 C   sZ   | j j|d|dd}|std d S | j j|d|dd}|s&td d S |jd }|S )N	UnsqueezeF)	recursivezexpect unsqueeze for presentConcatzexpect concat for presentr   )r   find_first_child_by_typer   r7   output)r   r$   input_name_to_nodesunsqueeze_present_vconcat_presentpresentr   r   r   match_present   s   


z(FusionGptAttentionPastBase.match_presentc                 C   s`   || j v r| j | }|S | j|r!| j|\}}|| j |< |S | j|\}}|| j |< |S N)r   r   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32)r   
input_nameattention_mask_input_namecasted	cast_noder   r   r   cast_attention_mask   s   



z.FusionGptAttentionPastBase.cast_attention_mask)__name__
__module____qualname____doc__r   intr   r,   r:   rD   rM   __classcell__r   r   r   r   r      s    1Cr   c                       s:   e Zd ZdZdedef fddZdd Zdd	 Z  Z	S )
FusionGptAttentionzP
    Fuse GPT-2 Attention with past state subgraph into one Attention node.
    r   r	   c                    s   t  || d S rE   )r   r   r   r   r   r   r      s   zFusionGptAttention.__init__c
                 C   s  | j d}
tjd|||||g|
d |g|
d}d|_|jtd| jtd|	r+dnd	g | j	d urC|jtd
t
| j	g tjd|
d |jd g|
d g|
d d}tjd|
d |jd g|g|
d d}| j|||g | j| j|j< | j| j|j< | j| j|j< d S )NGptAttentionr
   _output)inputsoutputsnamezcom.microsoftr	   unidirectionalr   r   r   MatMul_matmul_output_matmulAdd   _add)r   create_node_namer   	make_nodedomain	attributeextendmake_attributer	   r   floatr!   nodes_to_addthis_graph_namenode_name_to_graph_namerY   )r   	fc_weightfc_biasgemm_qkvr'   rC   r!   r?   maskis_unidirectionalattention_node_nameattention_nodematmul_nodeadd_noder   r   r   create_attention_node   s>   

z(FusionGptAttention.create_attention_nodec           8      C   s  d }d }g }|j dk}d }|s| jj|g dg d||d}n| jj|g dg d||d}|d u r4d S d }	|sK|\}
}}}}}}|
jd|d   }	n|\}}}}}}| j|g d	g d
}|d u rjtd d S |\}}}}| j|g dg d|}|d u r| j|g dg d|}|d u r| j|g dg d|}|d u r| j|g dg d|}|d u rtd d S |d jd }| j|d \}}|d j| }n|d jd }|d jd }|d }|	d ur|	|jvrtd d S d}d }d }d }| j|g dg d} | d ure| \}!}"}#}$}%| j|"g dg d}&|&d u r.td d S |&d }'|&d }|$|'krBtd d S t|&dkrd|&d j dkrd| j|&d \}}(|(dkrd|( | _n| j	|g d g d!fg d"g d#fg|\}} }| d u rtd$ d S | d% })| d& }$| d }%|dkr| d }*| j	|*g d'g d(fg d)g d*fg d+g d,fg|\}}}|d u rtd- d S t|dkr|d j dkr| j|d \}}(|(dkr|(| _| j	|)g d.g d/fg d0g d1fg|\}}&}|&d u rtd2 d S |&|dkrdnd }| j
|&d d|}+|+j d3kr;|+}'|$|'kr:td d S n|+j d4krD|+}ntd2 | j|jd },t|,tjrvt|,jd5krv|,jd d d6krv|,jd |,jd ks}td7 d S t|,t|,rd8}nt|,tt|,std9 d S | j|%g d:g d;}-|-d u rtd< d S |-\}.}/}0||0krtd= d S | j|%g d	g d
}1|1d u r| j|%g d>g d?}1|1d u rtd@ d S |1\}}2}3}4}5n|1\}2}3}4}5||5kr
tdA d S |r|2|krtdB d S dC}6|d ur,|d jd }7| |7}6| |2||p:| |2||}|d u rGtdD d S | j|sStdE | ||}|d u retdF d S | j|sstdG d S | ||||||jd |jd |6|	 d| _d S )HNr   )r^   ReshapeGemmru   ru   r   r[   )r   Nr   r   r   r   r   )r%   return_indice)ru   rv   ru   ru   r   r[   )Nr   r   r   r   r   r   r   )r=   r   ru   r.   )r   r   r   r   z&fuse_attention: failed to match v path)ru   rv   ru   r   )r   r   r   r   )ru   rv   ru   r   )r^   r[   r   )r   Nr   )r^   r[   r   z'fuse_attention: failed to match fc pathr_   r   zCUpstream Add and (Skip)LayerNormalization shall have one same inputT)SoftmaxSubMulDivr[   )r   r   r   r   r   )
rz   ry   Slicer|   r;   ry   r-   r|   Shaper{   )
r   r   r   r   r   r   r   r   r   r   z8fuse_attention: failed to match unidirectional mask path   z-fuse_attention: skip since div_qk != div_maskrz   i)rx   Wherer{   r[   )r   r   r   r   )rx   r^   r   r{   r[   )r   r   Nr   r   z(fuse_attention: failed to match qk nodes)rz   ry   Castr;   r;   ru   )Nr   r   r   r   r   )rz   ry   r;   r;   ru   )Nr   r   r   r   )rz   ry   r;   r;   )Nr   r   r   z9fuse_attention: failed to match input attention mask path)r   r|   r|   r;   ry   r-   r|   r}   )r   r   r   r   r   r   r   r   )r|   r|   r;   ry   r-   r|   r}   )r   r   r   r   r   r   r   z)fuse_attention: failed to match mask pathr{   r=      )r   r   z4fuse_attention: skip since mask shape is not 1x1xWxWFzDfuse_attention: skip since mask is neither lower triangular nor ones)r   ru   r.   )r   r   r   z&fuse_attention: failed to match q pathz.fuse_attention: skip since split_fc != split_q)r   r=   r   ru   r.   )r   r   r   r   r   z&fuse_attention: failed to match k pathz.fuse_attention: skip since split_fc != split_kz8fuse_attention: skip since concat_k != concat_k_to_match z)fuse_attention: failed to match past pathzpast is not graph input.z,fuse_attention: failed to match present pathz!expect present to be graph output)r   r   r"   r!   r   r   get_constant_inputlenr   match_parent_pathsr   get_constant_value
isinstancenpndarrayshapeallclose	ones_liketrilrM   r,   r:   r7   rF   rD   find_graph_outputrt   r?   prune_graph)8r   normalize_noder@   r%   r'   rC   rw   is_normalize_node_skiplayernorm	qkv_nodesanother_inputadd_qkvreshape_qkvrm   	reshape_1	reshape_2transpose_qkv
matmul_qkvv_nodesr$   transpose_v	reshape_vsplit_fcfc_nodesrk   i_rl   layernorm_before_attentionro   
slice_maskinput_mask_nodesconcat_k_to_matchqk_nodes
softmax_qksub_qkmul_qkdiv_qk	matmul_qk
mask_nodesdiv_maskmul_valwhere_qkadd_qkdiv_or_concat	mask_dataq_nodestranspose_q	reshape_qsplit_qk_nodesr#   transpose_k	reshape_ksplit_krJ   rI   r   r   r   fuse   s  
		













































zFusionGptAttention.fuse)
rN   rO   rP   rQ   r   rR   r   rt   r   rS   r   r   r   r   rT      s
    0rT   )loggingr   numpyr   fusion_baser   fusion_utilsr   onnxr   
onnx_modelr   rN   r   r   rT   r   r   r   r   <module>   s    