o
    g                      @   s   d dl Z d dlmZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ e eZG dd	 d	eZG d
d deZdS )    N)Union)AttentionMaskFusionAttention)NumpyHelper)	NodeProtohelper)	OnnxModel)BertOnnxModelc                       sp   e Zd ZdZdedededef fddZded	e	d
e	dededededede
e	df fddZdd Z  ZS )FusionTnlrAttentionz
    Fuse TNLR Attention subgraph into one Attention node.
    TNLR Attention has extra addition after qk nodes and adopts [S, B, NH] as I/O shape.
    modelhidden_size	num_headsattention_maskc                    s   t  |||| d S N)super__init__)selfr   r   r   r   	__class__ _/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_tnlr.pyr      s   zFusionTnlrAttention.__init__
mask_indexmatmuladdinputoutput
add_qk_strreturnNc	                 C   s  |dksJ |dkr|| dkrt d| d|  d S | j|jd }	| j|jd p7| j|jd }
|	d u s@|
d u rBd S t|	}t|
}| jd}|	j}t	
|}t	j|d ||d| g|| dd	}	| j|	| j t	j|d
 |d| g|| dd	}
| j|
| j ||d |d
 g}|d ur|| n|d |d ur|d || t	jd||g|d}d|_|jt	d|g |S )Nr   zinput hidden size z# is not a multiple of num of heads    	Attention_qkv_weight   T)name	data_typedimsvalsraw	_qkv_bias )inputsoutputsr"   zcom.microsoftr   )loggerdebugr   get_initializerr   r   to_arraycreate_node_namer#   r   tensor_dtype_to_np_dtypemake_tensorastypetobytesadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendmake_attribute)r   r   r   r   r   r   r   r   r   weightbias
qkv_weightqkv_biasattention_node_nametensor_dtypenp_typeattention_inputsattention_noder   r   r   create_attention_node    s`   $






z)FusionTnlrAttention.create_attention_nodec                 C   s  |}|j dkr	d S | j|g dg d}|d ur"|\}}}}}	}
nd S g }t|jD ]\}}||vr4q+||d jd kr>q+|| q+t|dkrLd S |d }| j|
g dg d}|d u rbd S |\}}}}}| j|dgdg}|d }| j|
g d	g d
}|d u rd S |\}}}| j|g dg d}|d u rd S |d }|d }| j|g dg d}|d u rd S |d }|d }| j|ddgddg}|d u rd S |jd |kr^d }|}| |||| j	| j
||jd |d jd }|d u rd S | j| | j| j|j< tjdd|j g|jd gd|j g dd}| j|| j |jd |jd< d|j |jd< | j||	|
g | j| | j| | j| | j| d| _d S d S )NSkipLayerNormalization)WhereAddMatMulReshape	TransposerI   )r   r   r   r   r   r   r   r   )rK   rJ   SlicerH   rI   )r   r   r   r   r   rK   )SoftmaxrH   rI   )r   r   r   )MulrK   rJ   rL   rH   rI   )r   r   r   r   r   r   rJ   rG   back_transpose_in_back_transpose_)r   r      )permT)op_typer   match_parent_path	enumerater   r   r6   lenrE   r   r   nodes_to_addr5   node_name_to_graph_namer"   r   r7   add_nodenodes_to_remover:   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node
start_node	qkv_nodes_matmul_belowreshape_qkvtranspose_qkv
matmul_qkvother_inputs_ir   
root_inputv_nodesr   r   upper_nodes	transposeqk_nodesadd_qk	matmul_qkq_nodesk_nodesrelative_position_bias_nodesr   attention_last_nodenew_nodeback_transposer   r   r   fuseh   s   





zFusionTnlrAttention.fuse)__name__
__module____qualname____doc__r   intr   r   strr   r   rE   rw   __classcell__r   r   r   r   r
      s>    		


Hr
   c                       s$   e Zd Z fddZdd Z  ZS )TnlrOnnxModelc                    s4   t  ||| t| | _t| | j| j| j| _d S r   )r   r   r   r   r
   r   r   attention_fusion)r   r   r   r   r   r   r   r      s   
zTnlrOnnxModel.__init__c                 C   s   | j   d S r   )r   apply)r   r   r   r   fuse_attention   s   zTnlrOnnxModel.fuse_attention)rx   ry   rz   r   r   r~   r   r   r   r   r      s    r   )loggingtypingr   fusion_attentionr   r   fusion_utilsr   onnxr   r   
onnx_modelr   onnx_model_bertr	   	getLoggerrx   r+   r
   r   r   r   r   r   <module>   s   
 L