o
    g(                     @   sh   d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ eeZG dd deZd	S )
    )	getLogger)Tuple)AttentionMaskFusionAttention)AttentionMaskFormat)	NodeProto)	OnnxModelc                       sP   e Zd ZdZdededef fddZdedeeef fd	d
Z	dd Z
  ZS )FusionAttentionClipzB
    Fuse Attention subgraph of Clip into one Attention node.
    modelhidden_size	num_headsc                    s.   t |}tj|_t j||||ddgd d S )NFSkipLayerNormalization)use_multi_head_attentionsearch_op_types)r   r   NoMaskmask_formatsuper__init__)selfr
   r   r   attention_mask	__class__ e/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_attention_clip.pyr      s   
zFusionAttentionClip.__init__	reshape_qreturnc                 C   sZ  | j |dd}|du st|jdkr| j| jfS | j |jd }|du r,| j| jfS t|dks8|d dkr>| j| jfS |d }| j |jd }|du rU| j| jfS t|dksa|d dkrg| j| jfS |d }|| }| jdkr|| jkr| jrt	d| j d	| d
 d| _| jdkr|| jkr| j
rt	d| j d	| d
 d| _
||fS )zDetect num_heads and hidden_size for ONNX model from MiDaS
        Args:
            reshape_q (NodeProto): reshape node for q
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        Concat   N      r      z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r
   match_parentleninputr   r   get_constant_valuenum_heads_warningloggerwarninghidden_size_warning)r   r   concatnum_head_valuer   head_size_value	head_sizer   r   r   r   get_num_heads_and_hidden_size'   s6   z1FusionAttentionClip.get_num_heads_and_hidden_sizec           -      C   s  d }d }dD ]}| j |d|}|d ur|}|}qd }|d ur%|jd }nDdD ];}d }| j |d|}	| j |d|}
|	d urB|	}n|
d urH|
}|d u rMq'| j |d|d}|d u r[q'|jd }|} |d u rid S | j |g dd	| d d dddg}|d u rd S |\}}}}}}| j |g d
g d}|d u rtd d S |\}}}}}d }g }d }| j j|g dg d|d}| j |ddgddg}|d ur|}t|d	ksJ d	|d  }|\}}}}}n|d ur|}|\}}ntd d S | j |g dg d}|d u rtd d S |\}}}} }!}"| j |g dg d}#|#d u r-td d S |#\}$}%}}}&}'|"jd |ksM|'jd |ksM|jd |krTtd d S | 	|\}(})|(dkse|)dkrltd d S |}*|d ur| j |g d|dddddg}+|+d u r| j |g d|ddddg}+|+d u rtd d S | j
d |"|'||!|&||(|)||*jd d d |d ud},|,d u rd S | j|, | j| j|,j< | j|*|g d| _d S ) N)r   r   r   r   )r   r   AddLayerNormalizationF)r.   MatMulReshape	Transposer1   r0   r   )r1   r2   r1   r.   r0   )r   r   r   r   Nz&fuse_attention: failed to match v path)Softmaxr1   r.   r1   r0   )r   r   r   Nr   )return_indicer3   r0   z'fuse_attention: failed to match qk path)r1   r2   r1   Mulr.   r0   )r   r   r   r   NNz&fuse_attention: failed to match q path)r2   r1   r2   r1   r.   r0   )r   r   r   r   r   Nz&fuse_attention: failed to match k pathz>fuse_attention: expect to have same input to q, k and v matmulz9fuse_attention: failed to detect num_heads or hidden_size)r   Expand	Unsqueezer7   WhereLess)r6   r7   r7   r8   r9   z4fuse_attention: failed to match causal mask subgraph)
mask_indexq_matmulk_matmulv_matmulq_addk_addv_addr   r   r#   output
add_qk_strscalecausalT)r
   r!   rA   find_first_child_by_typematch_parent_pathr&   debugr"   r#   r-   create_attention_nodenodes_to_addappendthis_graph_namenode_name_to_graph_namenamenodes_to_removeextendprune_graph)-r   normalize_nodeinput_name_to_nodesoutput_name_to_nodeskip_input_indexnode_before_layer_normiparent
root_inputnode_before_layer_norm_1node_before_layer_norm_2child	qkv_nodes_reshape_qkvtranspose_qkv
matmul_qkvv_nodes	reshape_vadd_vmatmul_vadd_maskadd_mask_indicesqk_nodes
qk_nodes_1
qk_nodes_2causal_mask_input_index_softmax_qk	matmul_qkq_nodes_transpose_qr   mul_qadd_qmatmul_qk_nodes_transpose_k
_reshape_kadd_kmatmul_kr   r   attention_last_nodecausal_mask_nodesnew_noder   r   r   fuseU   s  









0







zFusionAttentionClip.fuse)__name__
__module____qualname____doc__r   intr   r   r   r-   rz   __classcell__r   r   r   r   r	      s    .r	   N)loggingr   typingr   fusion_attentionr   r   fusion_optionsr   onnxr   
onnx_modelr   r{   r&   r	   r   r   r   r   <module>   s   