o
    gm                     @   s   d dl Z d dlmZmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ e eZG d
d deZG dd de
ZG dd deZdS )    N)OptionalUnion)AttentionMaskFusionAttention)Fusion)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModel)BertOnnxModelc                       s   e Zd ZdZdedededef fddZ	d$d	ed
e	de	de	dededededede
e dee	df fddZdededed	edededededededededee	df fddZdd Zd d! Zd"d# Z  ZS )%FusionT5Attentionz=
    Fuse T5 Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                    s&   t  j||||dddgd d| _d S )NF SkipSimplifiedLayerNormalizationAdd)use_multi_head_attentionsearch_op_types   )super__init__	static_kv)selfr   r   r   r   	__class__ ]/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_t5.pyr      s   
zFusionT5Attention.__init__N
mask_indexq_matmulk_matmulv_matmulinputoutput
add_qk_strscalereturnc                 C   sR  |dksJ |dkr|| dkrt d| d|  dS | j|jd }| j|jd }| j|jd }|du rHt|jd  d dS t|}t|}t|}|j|jks_J |jd }|jd }|jd }||  kry|ks|J  J |dkr||krt 	d| d| d	 t
|jdd }t
j|||fdd
}d| }| jd}tj|d tj||g| dd}| j|| j ||d dg}|dur|| n|d |	dur|d ||	 tjd||g|d}d|_|jtd|g |
dur|jtd|
g | jdur'|jtdt| jg |S )a  Create an Attention node.
        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nr   zl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   	Attention_qkv_weightTname	data_typedimsvalsraw inputsoutputsr1   com.microsoftr   r(   mask_filter_value)loggerdebugr   get_initializerr%   printr	   to_arrayshapewarningnpprodstackcreate_node_namer   make_tensorr   FLOATtobytesadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendmake_attributer;   float)r   r!   r"   r#   r$   r   r   r%   r&   r'   r(   q_weightk_weightv_weightqwkwvw
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameweightattention_inputsattention_noder   r   r    create_attention_node*   sv   









z'FusionT5Attention.create_attention_nodequerykeyvalueres_pos_biaspast_key
past_valuepresent_keypresent_valuec                 C   st  |dksJ |dkr|| dkrt d| d|  d S | jd}||d u r*dn||d u r1dn|dg}|d ur?|| n|d |d urN|| n|d |d urg|d us]J || || |g}|	d ur~|
d ustJ ||	 ||
 tjd|||d}d|_|j	t
d|g |j	t
d	d
g | jd ur|j	t
dt| jg | d |S )Nr   r*   r+   MultiHeadAttentionr6   r7   r:   r   r(         ?r;   )r<   r=   r   rF   rL   r   rM   rN   rO   rP   rQ   r;   rR   increase_counter)r   rd   re   rf   r!   rg   rh   ri   r&   rj   rk   r   r   r_   ra   attention_outputsrb   r   r   r    create_mha_node   sL   







z!FusionT5Attention.create_mha_nodec                 C   s    |  ||| | ||| d S N)fuse_t5_encoderfuse_t5_decoder)r   normalize_nodeinput_name_to_nodesoutput_name_to_noder   r   r    fuse   s   zFusionT5Attention.fusec           "      C   s  |j dkr|j dkrd S | j|g dg d}|d u rd S |\}}}}| j|g dg d}	|	d u r6d S |	d }
| j|g dg d}|d u rLd S |\}}}| j|g d	g d
}|d u rcd S |\}}}d }| j|g dg d}|d u r|d S |d }|d j dkrd S | j|\}}|dkr|| _| j|d jd }d }| j|ddgddg}|d u rd S |d }|jd }| j|g dg d}|d u rd S |\}}}| j|g dg d
}|d u rd S |\}}}|jd |
jd krd S | |\}} | 	|||||| |
jd |j
d |d
}!|!d u rd S | j|! | j| j|!j< | j|dd   | j| | j|d d  |d urR| j|d d  | j|d d  d| _d S )Nr   r   MatMulReshape	Transposery   r   r   r   r   Concat	UnsqueezeGatherShaper{   rz   ry   r   r   r   Softmaxr   ry   r   r   r   r   MulSubCastr   r   r   r   r   r   r   r   r   r   r   RelativePositionBiasrm   T)op_typer   match_parent_pathget_constant_inputr;   r   process_maskr%   get_num_heads_and_hidden_sizerc   r&   nodes_to_addrL   rK   node_name_to_graph_namer1   nodes_to_removerP   prune_graph)"r   rt   ru   rv   	qkv_nodes_reshape_qkvtranspose_qkv
matmul_qkvqkv_shape_nodesinput_shape_nodev_nodes	reshape_vmatmul_vqk_nodesadd_qk	matmul_qkr!   
mask_nodesmul_nodemul_valrg   	rpb_nodesrpb_add_nodek_nodes	reshape_kmatmul_kq_nodestranspose_q	reshape_qmatmul_qq_num_headsq_hidden_sizenew_noder   r   r    rr      s   







z!FusionT5Attention.fuse_t5_encoderc           /      C   s  |j dkr|j dkrd S | j|g dg d}|d u rd S |\}}}}| j|g dg d}	|	d u r6d S |	d }
d }d }d }| j|g dg d}|d u r| j|g d	g d
}|d ur|\}}}|jd }|jd }d|vrud S |jd |
jd krd| _nCd| _n?|jd }||v rd S d|vrd S d| _n*|\}}}}|jd }||v rd S d|vrd S |jd }d|vrd S |jd }d| _| j|g dg d}|d u rd S |\}}}d }d }| jdkr#| j|g dg d}|d u rd S |d }|d j dkrd S | j|\}}|dkr|| _| j	|d jd }n/| j|ddgddg}|d ur:|jd }n| j|ddgddg}|d u rMd S |jd }d }d }d }| jdkr| j|g d	g d
}|d ur|\} }!}|!jd }||!jd  }"|"D ]}#| j
|#jd }$|$d ur|$j} nq|d u rd S d|vrd S n| j|dgdg}|d u rd S |d } | jd }||v rd S d|vrd S n| j|g dg dfg dg dfg|\}%}}d }&d }"|d ura|d |d  }'}!|!jd }|%dkr||'jd  }&|&jd }n|'jd }||v r d S d!|vr'd S |%dkrM||'jd  }"|"D ]}#| j
|#jd }$|$d urJ|$j} nq5n|'jd }|d u rYd S d"|vr`d S nK| j|g d	g d
}|d u rtd S |\}}!}|!jd }||!jd  }"|"D ]}#| j
|#jd }$|$d ur|$j} nq|d u rd S d"|vrd S | j|g d	g d}(|(d u rd S |(\})}*}+|+jd |
jd krd S | |*\},}-| jdkr|d ur|}|}d }d }| |+jd |||||||jd |||,|-}.|.d u rd S | j|. | j| j|.j< | j|dd   | j| | j|d d  |d ur>| j|d d  | j|(d d  d#| _d S )$Nr   r   rx   r|   r}   r   )r~   r{   rz   ry   )r   r   r   r   r   r   r   rk   r   past_value_crosspast_value_selfpresent_value_selfr   r   r   r   r   r   Slicer   present_key_crossr{   past_key_cross)r{   r~   rz   ry   )r   r   r   r   )r{   r~   r{   rz   ry   )r   r   r   r   r   past_key_selfpresent_key_selfT)r   r   r   r%   r&   r   r   r;   r   r   find_graph_outputr1   match_parent_pathsr   rp   r   rL   rK   r   r   rP   r   )/r   rt   ru   rv   r   r   r   r   r   r   r   rf   ri   rk   r   transpose_vr   r   concat_vr   r   r   r!   rg   r   r   r   r   re   rh   rj   r   transpose_kr   present_key_transpose_nodespresent_key_transpose_nodepresent_key_candidateidxpast_key_transpose_nodeconcat_kr   r   r   r   r   r   r   r   r   r    rs   X  s  












































z!FusionT5Attention.fuse_t5_decoderrq   )__name__
__module____qualname____doc__r   intr   r   strr
   r   rR   r   rc   rp   rw   rr   rs   __classcell__r   r   r   r    r      s    	


m	


A|r   c                       s.   e Zd Zdedef fddZdd Z  ZS )FusionRelativePositionBiasBlockr   max_distancec                    s$   t  |dddg || _d| _d S )Nr   r   r   F)r   r   r   is_bidirectional)r   r   r   r   r   r    r   ]  s   
z(FusionRelativePositionBiasBlock.__init__c                 C   s.  |j dkr|j dkrd S | j|g dg d}|d u r.| j|g dg d}|d u r.d S |d }|d }|d	 }| j|g d
g d}|d u rLd S |d }	| j|	g dg d}
|
d u ru| j|	g dg d}
d| _|
d u rud S |
d }| j| | j| | j|
 | jrdnd}| j|jd	 }t	|}t
|}tj| jjd|dtjt
|d	 t
|d g| dd}| j|| j |j|jd |jd g}|jd	 g}tjd||| jjd|dd}d|_|jtd| jg |jtd| jg | j| | j| j|j< d S )Nr   r   )r   r{   r   Where)r   r   r   r   )r   r{   r   r   r   )r   r   r   r   r      r   r   )	MinConstantOfShaper   r   r   r   DivLogr   )	r   r   r   r   r   r   r   r   r   )r   Negr   r   r   r   r   Range)r   r   r   r   r   r   r   r   )r   Absr   r   r   )r   r   r   r   r   Tencoderdecoderbias_table_weight)name_prefixr   r0   r   r7   r:   r   r   )r   r   r   r   r   rP   r>   r%   r	   r@   rC   	transposer   rG   rF   r   rH   rA   rI   rJ   rK   r1   r&   rM   rN   rO   rQ   r   r   rL   r   )r   noderu   rv   compute_bias_nodesgatherwhere	unsqueezecompute_buckets_nodesdivrange_nodes
range_nodenode_name_prefixtable_weight_itable_weighttable_weight_t
bias_tabler8   r9   rpb_noder   r   r    rw   c  s~   

z$FusionRelativePositionBiasBlock.fuse)r   r   r   r   r   r   rw   r   r   r   r   r    r   \  s    r   c                       sT   e Zd Z fddZdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
  ZS )T5OnnxModelc                    sT   t  ||| t| | _t| | j| j| j| _t| | _	t
| | _t| d| _d S )N   )r   r   r   r   r   r   r   attention_fusionr   layer_norm_fusionr   skip_layer_norm_fusionr   
rpb_fusion)r   r   r   r   r   r   r    r     s   


zT5OnnxModel.__init__c                 C      | j   d S rq   )r   applyr   r   r   r    fuse_attention     zT5OnnxModel.fuse_attentionc                 C   r   rq   )r   r   r   r   r   r    fuse_layer_norm  r   zT5OnnxModel.fuse_layer_normc                 C   r   rq   )r   r   r   r   r   r    fuse_skip_layer_norm  r   z T5OnnxModel.fuse_skip_layer_normc                 C   s   g }|   D ]@}|jdkrF| |g dg d}|d u rq| |dgdg}|d u r+q|d }|jd |jd< || || | | qd S )Nr   )r   r   r   r   r   LessOrEqualTiler~   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   nodesr   r   r&   rP   rL   remove_nodesr   r   r   extended_mask_nodesr   r   r   r   r    !remove_extended_mask_decoder_init  s(   



z-T5OnnxModel.remove_extended_mask_decoder_initc                 C   s   g }|   D ]B}|jdkrH| |g dg d}|d u rq| |ddgddg}|d u r-q|d }|jd |jd< || || | | qd S )Nr   )r   r   r   r   r~   r   r   r   r~   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r    remove_extended_mask_decoder  s(   



z(T5OnnxModel.remove_extended_mask_decoderc                 C   s   |    | j  d S rq   )adjust_reshape_and_expandr   r   r   r   r   r    
preprocess  s   zT5OnnxModel.preprocessc                 C   s   |    |   |   d S rq   )r  r  r   r   r   r   r    postprocess  s   zT5OnnxModel.postprocess)r   r   r   r   r   r   r   r  r  r  r  r   r   r   r   r    r     s    
#$r   )loggingtypingr   r   numpyrC   fusion_attentionr   r   fusion_baser   fusion_simplified_layernormr   r   fusion_utilsr	   onnxr
   r   r   
onnx_modelr   onnx_model_bertr   	getLoggerr   r<   r   r   r   r   r   r   r    <module>   s$   
    LV