o
    gL                     @   st  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z; ee<Z=G dd de;Z>dS )    )	getLogger)ListOptional)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionQuickGelu)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
ModelProtoTensorProtohelper)	OnnxModelc                       s:  e Zd ZdHdededef fddZdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdIddZd d! Zd"d# Zd$d% Zd&ed'ee d(efd)d*Zd(efd+d,Zd-d. ZdJd1d2Zd3d4 Zd5d6 Zd7d8 Zd9d: ZdKd=e e! d>efd?d@Z"dAdB Z#dLdCdDZ$dMdEefdFdGZ%  Z&S )NBertOnnxModelr   model	num_headshidden_sizec                    s   |dkr|dks|dkr|| dksJ t  | || _|| _t| | _t| | j| j| j| _t| | j| j| j| _	t
| | _dS )aG  Initialize BERT ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   N)super__init__r&   r'   r   attention_maskr   attention_fusionr   qordered_attention_fusionr   utils)selfr%   r&   r'   	__class__ _/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_bert.pyr)   &   s   (
zBertOnnxModel.__init__c                 C   s   | j   | j  d S N)r+   applyr,   r.   r1   r1   r2   fuse_attention;   s   
zBertOnnxModel.fuse_attentionc                 C   sD   t | }|  t| }|  t| }|  t| }|  d S r3   )r   r4   r   r   r   r.   fusionr1   r1   r2   	fuse_gelu@   s   zBertOnnxModel.fuse_geluc                 C      t | |}|  d S r3   )r	   r4   )r.   is_fastgelur8   r1   r1   r2   fuse_bias_geluK      
zBertOnnxModel.fuse_bias_geluc                 C      t | }|  d S r3   )r   r4   r7   r1   r1   r2   gelu_approximationO      z BertOnnxModel.gelu_approximationc                 C   r>   r3   )r   r4   r7   r1   r1   r2   fuse_gemm_fast_geluS   r@   z!BertOnnxModel.fuse_gemm_fast_geluc                 C   r>   r3   )r   r4   r7   r1   r1   r2   fuse_add_bias_skip_layer_normW   r@   z+BertOnnxModel.fuse_add_bias_skip_layer_normc                 C   r>   r3   )r   r4   r7   r1   r1   r2   fuse_reshape[   r@   zBertOnnxModel.fuse_reshapec                 C   r>   r3   )r   r4   r7   r1   r1   r2   
fuse_shape_   r@   zBertOnnxModel.fuse_shapec                 C   r:   r3   )r
   r4   )r.   use_mask_indexr8   r1   r1   r2   fuse_embed_layerc   r=   zBertOnnxModel.fuse_embed_layerc                 C   s4   t | }|  t| }|  t| }|  d S r3   )r   r4   r   r   r7   r1   r1   r2   fuse_layer_normg   s   zBertOnnxModel.fuse_layer_normc                 C   r>   r3   )r   r4   r7   r1   r1   r2   fuse_simplified_layer_normr   r@   z(BertOnnxModel.fuse_simplified_layer_normTc                 C   s   t | |d}|  d S )N)shape_infer)r   r4   )r.   rI   r8   r1   r1   r2   fuse_skip_layer_normv   s   z"BertOnnxModel.fuse_skip_layer_normc                 C   r>   r3   )r   r4   r7   r1   r1   r2   fuse_skip_simplified_layer_normz   r@   z-BertOnnxModel.fuse_skip_simplified_layer_normc                 C   s   t | }|  ttdd | jjj}ttdd |}d}|t	| jj
k rM| jj
| }d|jv r?|j|vr?| jj
| n|d7 }|t	| jj
k s'd S d S )Nc                 S   s   | j dko	| jdkS )NRotaryEmbeddingcom.microsoft)op_typedomainnoder1   r1   r2   <lambda>   s    z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>c                 S   s   | j S r3   )rO   rP   r1   r1   r2   rR      s    r   rL      )r   r4   listfilterr%   graphrQ   setmaplen	functionsnamerO   remove)r.   r8   rot_emb_nodesnon_ms_domains_to_keepifnr1   r1   r2   fuse_rotary_embeddings~   s    z$BertOnnxModel.fuse_rotary_embeddingsc                 C   r>   r3   )r   r4   r7   r1   r1   r2   fuse_qordered_mamtul   r@   z"BertOnnxModel.fuse_qordered_mamtulrN   input_indicescastedc           
         s   g }|   }| |}|D ]>  fdd|D }|D ]0}| |r)|s(|| q||v rJ|| }	|	jdkrJ| |	jd durJ|rJ||	jd  qq|S )z
        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
        Returns a list of the graph input names based on the filter whether it is casted or not.
        c                    s$   g | ]}|t  jk r j| qS r1   )rY   input).0r_   rP   r1   r2   
<listcomp>   s   $ zABertOnnxModel.get_graph_inputs_from_node_type.<locals>.<listcomp>Castr   N)output_name_to_nodeget_nodes_by_op_typefind_graph_inputappendrN   re   )
r.   rN   rc   rd   graph_inputsri   nodesbert_inputs
bert_inputparentr1   rP   r2   get_graph_inputs_from_node_type   s$   


	z-BertOnnxModel.get_graph_inputs_from_node_typec                 C   s*   |  dg d|}||  ddg|7 }|S )NEmbedLayerNormalization)r   rS      	Attention   )rr   )r.   rd   inputsr1   r1   r2   !get_graph_inputs_from_fused_nodes   s   z/BertOnnxModel.get_graph_inputs_from_fused_nodesc                 C   sb   |   }d}d}|jD ]}| |tj\}}|r|d7 }|t|7 }qtd| d| d dS )zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   rS   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)rV   re   change_graph_input_typer!   INT32rY   loggerinfo)r.   rV   add_cast_countremove_cast_countgraph_inputnew_noderemoved_nodesr1   r1   r2   change_graph_inputs_to_int32   s   
z*BertOnnxModel.change_graph_inputs_to_int32
batch_sizemax_seq_lenc                 C   s   | j dd| j dd }| jjjD ]!}|j|v r2|jjjjd }||_	|dur2|jjjjd }||_	q| jjj
D ]}|jjjjd }||_	q8dS )zD
        Update input and output shape to use dynamic axes.
        T)rd   Fr   NrS   )rx   r%   rV   re   r[   typetensor_typeshapedim	dim_paramoutput)r.   dynamic_batch_dimdynamic_seq_lenbert_graph_inputsre   	dim_protor   r1   r1   r2   use_dynamic_axes   s"   

zBertOnnxModel.use_dynamic_axesc                 C   s   |    d S r3   )adjust_reshape_and_expandr5   r1   r1   r2   
preprocess   s   zBertOnnxModel.preprocessc           
      C   s2  g }|   D ]}}|jdkr| |jd }|d ur1|jdkr1||g | |jd |jd  q| |g dg d| 	 }|d ur|d }| |jd }|d }| |jd }|d }	|d ur|d urt
|d	krt
|dkr|d |d kr|	jd |jd< q|r| | td
t
|  d S d S )NReshaperS   r   )Expandr   r   Slice)r   r   r   r      z"Removed Reshape and Expand count: )rn   rN   get_constant_valuere   sizeextendreplace_input_of_all_nodesr   match_parent_pathri   rY   remove_nodesr{   r|   )
r.   nodes_to_removerQ   reshape_shapereshape_pathexpand_nodeexpand_shape_valuereshape_before_expandshape_value
slice_noder1   r1   r2   r      s>   

z'BertOnnxModel.adjust_reshape_and_expandc                 C   sd  |   }g }|  D ]}dddd}|j|v rQ||j }| |g d|dddddg|}|d urQ|\}}}	}
}}|jd |  jd jkrQ|jd |jd< |   }|jdkr| |g dg d|}|d ur|d	 jd |  jd jkrtj	d|jdt
|jd  |j|jd
 d}d|_|jtd| jg | || |j || q
| | d S )NrS   r   rv   )rs   	ReduceSumru   )rh   ConstantOfShapeConcat	UnsqueezeGatherShaperu   )r   rh   r   r   )rv   r   r   r   r   _remove_mask)rw   outputsr[   rM   r&   )ri   rn   rN   r   re   rV   r[   r   r"   	make_noderY   rO   	attributer   make_attributer&   add_nodeget_graph_by_noderl   r   )r.   ri   r   rQ   op_input_idr_   parent_nodescastconstantOfShapeconcat	unsqueezegatherr   attention_noder1   r1   r2   clean_graph   sZ   	



zBertOnnxModel.clean_graphc                 C   s   |    |   d S r3   )r   prune_graphr5   r1   r1   r2   postprocessB  r@   zBertOnnxModel.postprocessNFoptionsadd_dynamic_axesc                 C   s  |d ur|j s|   | j  | j  |d u s|jr$|   |   |d u s+|jr/| 	  | 
  |   |d u s>|jrH| |j  |   |d u sO|jrS|   |d urt| j|j |jrtt| jtstt| | j| j| j|j| _|d u s{|jr|   |d u s|jr|   |   |d u s|j r|jt!j"k}| #| | j$  | %  |d u s|j&r| j'dd | j'dd |d u s|j(r| )  |d ur|j*r| +  |d ur|j,r| -  | .  |r| /  t01d| 2   d S )NT)r;   Fzopset version: )3enable_shape_inferencedisable_shape_inferencer-   remove_identity_nodesremove_useless_cast_nodesenable_layer_normrG   rH   enable_gelur9   r   rC   enable_skip_layer_normrJ   rK   enable_rotary_embeddingsra   r*   set_mask_formatattention_mask_formatuse_multi_head_attention
isinstancer+   r   r   r'   r&   enable_attentionr6   enable_qordered_matmulrb   rD   enable_embed_layer_normr   MaskIndexEndrF   remove_useless_reshape_nodesr   enable_bias_gelur<   enable_bias_skip_layer_normrB   enable_gelu_approximationr?   enable_gemm_fast_gelurA   remove_unused_constantr   r{   r|   get_opset_version)r.   r   r   rE   r1   r1   r2   optimizeF  sb   



zBertOnnxModel.optimizec                 C   sL   i }g d}g d}|| D ]}|  |}t|||< qtd|  |S )z8
        Returns node count of fused operators.
        )rs   ru   MultiHeadAttentionGeluFastGeluBiasGeluGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationrL   )QOrderedAttentionQOrderedGeluQOrderedLayerNormalizationQOrderedMatMulzOptimized operators: )rj   rY   r{   r|   )r.   op_countopsq_opsoprn   r1   r1   r2   get_fused_operator_statistics  s   
z+BertOnnxModel.get_fused_operator_statisticsc           	         s   du r|    dtf fdd}|d}|d|d |d }|d	|d
 |d }|d|d }|d|d }|dkoT|dkoT||koT|d| kpT|d| k}|dkr^td |dkrgtd |dkrptd |dkrytd |dkrtd |S )zA
        Returns True when the model is fully optimized.
        Nop_namec                    s     | pdS )Nr   )get)r   fused_op_countr1   r2   r     s   z2BertOnnxModel.is_fully_optimized.<locals>.op_countrs   ru   r   r   r   r   r   r   r   r   r   r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r   strr{   debugwarning)	r.   r   r   embed	attentiongelu
layer_normsimple_layer_norm
is_perfectr1   r   r2   is_fully_optimized  s4   




z BertOnnxModel.is_fully_optimizeduse_symbolic_shape_inferc                 C   s   t | }|| d S r3   )r   convert)r.   r   packing_moder1   r1   r2   convert_to_packing_mode  s   z%BertOnnxModel.convert_to_packing_mode)r   r   )T)r   r   )NFr3   )F)'__name__
__module____qualname__r    intr)   r6   r9   r<   r?   rA   rB   rC   rD   rF   rG   rH   rJ   rK   ra   rb   r   r   boolrr   rx   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r1   r1   r/   r2   r$   %   s:    

)BQ
 (r$   N)?loggingr   typingr   r   r   r   fusion_attentionr   r   fusion_bart_attentionr   fusion_biasgelur	   fusion_embedlayerr
   fusion_fastgelur   fusion_gelur   fusion_gelu_approximationr   fusion_gemmfastgelur   fusion_layernormr   r   fusion_optionsr   r   fusion_qordered_attentionr   fusion_qordered_gelur   fusion_qordered_layernormr   fusion_qordered_matmulr   fusion_quickgelur   fusion_reshaper   fusion_rotary_attentionr   fusion_shaper   fusion_simplified_layernormr   r   fusion_skiplayernormr   r   fusion_utilsr   onnxr    r!   r"   
onnx_modelr#   r   r{   r$   r1   r1   r1   r2   <module>   s8   