o
    g0                     @   s   d dl mZ d dlmZmZ d dlZd dlmZmZ d dlm	Z	m
Z
mZmZ d dlmZ d dlmZ eeZG dd	 d	ZG d
d dZdS )    )	getLogger)OptionalTupleN)array_equalndarray)	NodeProtoTensorProtohelpernumpy_helper)onnx_pb)	OnnxModelc                
   @   s  e Zd ZdefddZdedeeef fddZd/defd	d
Z				d0dede
dee dee fddZdefddZdefddZedd Zed1defddZed2defddZedejfddZed3d!edefd"d#Zd$e
fd%d&Zd'd( Zd)d* Zd+d, Zd-d. ZdS )4FusionUtilsmodelc                 C   s
   || _ d S N)r   )selfr    r   \/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_utils.py__init__   s   
zFusionUtils.__init__
input_namereturnc                 C   sn   | j |}|d ur&|jjjtjkr&| |\}}t	d| d d|fS t	d| d|d u  d|fS )NzCasted graph input z	 to int32TzDid not cast graph input z to int32: found F)
r   find_graph_inputtypetensor_type	elem_typer   INT32cast_input_to_int32loggerdebug)r   r   graph_inputcast_output	cast_noder   r   r   cast_graph_input_to_int32   s   z%FusionUtils.cast_graph_input_to_int32int32c                 C   sf   |d | }|dkrt tj}n|dkrt tj}n|dkr$t tj}ntd| |||}||fS )N_r"   float32float16z"Invalid target_type: {target_type})intr   r   FLOATFLOAT16
ValueErroradd_cast_node)r   r   target_typeoutput_nameto_typer    r   r   r   
cast_input   s   zFusionUtils.cast_inputNr-   r,   
graph_namec           	      C   s   |d u r|d|  }|g}|d u r| j  }||v r,|| }|r,|jdkr,|jd g}tjd||gd}|jtd|g | j j	||d |S )N	_cast_to_Castr   )inputsoutputsto)r/   )
r   output_name_to_nodeop_typeinputr	   	make_node	attributeextendmake_attributeadd_node)	r   r   r-   r,   r5   r/   r2   parent_noder    r   r   r   r*   /   s   
zFusionUtils.add_cast_nodec                 C   s   |  |dS )Nr"   )r.   )r   r   r   r   r   r   J   s   zFusionUtils.cast_input_to_int32c                 C   s   | j  }|| }|D ]4}|jdkr?d}|jD ]}|jdkr*|jttjkr*d} nq|r?|j	d }| j 
| | j || qd S )Nr1   Fr4   Tr   )r   input_name_to_nodesr6   r9   nameir&   r   r   outputremove_nodereplace_input_of_all_nodes)r   r   r>   nodesnodeis_int32attr,   r   r   r   remove_cast_int32M   s    



zFusionUtils.remove_cast_int32c                 C   s|   d}| j | |v r%| || j |  v r%|| j |  |  t|| j |  }|| j |< ||v r7|| |  |S | g||< |S )Nr   )r7   removelenappend)rE   r@   new_input_namer>   old_input_referencer   r   r   update_node_input\   s    

zFusionUtils.update_node_inputr   c           
      C   s<   |j | }|j | }t||||}|dko| | }	|	S )a  
        Before:
              (input)-->parent-->node-->(output)
        After:
              (input)-->parent-->
                |
                +----->node-->(output)

        This function returns a flag whether the parent node can be removed.
        r   )r7   r   rN   find_graph_output)
r   rE   r=   r>   node_input_indexparent_input_indexold_input_namerL   rM   parent_can_be_removedr   r   r   skip_parentl   s
   

zFusionUtils.skip_parentattribute_namec                 C   sR   |}| j D ]}|j|krt|}qt|tr%t|ttfo$t||ddS ||kS )a  Verify that a node has expected value for an attribute.

        Args:
            node (NodeProto): a node to check
            attribute_name (str): name of attribute
            expected_value (Any): expected value of the attribute
            default_value (Any, optional): default value if the attribute does not exist. Defaults to None.

        Returns:
            bool: whether the check is passed or not
        F	equal_nan)r9   r?   r	   get_attribute_value
isinstancelistr   r   )rE   rU   expected_valuedefault_valuevalueattrr   r   r   check_node_attribute   s   



z FusionUtils.check_node_attributetensorc                 C   s   t | tjstdt|  t| jdks| jtjjkr!td| j	r@t
t
j| j	dd| j}t
|ddg}| | _	| S td)	zTranspose a 2-D INT8 TensorProto
        Args:
            tensor (TensorProto): tensor to be transposed
        Returns:
            tensor (TensorProto): transposed tensor
        z3Expected input type is an ONNX TensorProto but got    z'Only INT8 2-D tensors can be transposedint8)dtype   r   zonly raw buffer supported)rY   
onnx_protor   r)   r   rJ   dims	data_typeINT8raw_datanumpyreshape
frombuffer	transposetobytes)r`   
int32_dataint32_transposed_datar   r   r   transpose_2d_int8_tensor   s   
z$FusionUtils.transpose_2d_int8_tensorTrE   c                 C   s   | j dvrtd| j   || jd }|du rdS |jdkp,|jdko,|jd dk}|r3|s3dS t| jdkr<dS || jd }|j|jkrLdS |du rRdS t	|dkS )	a  Verify if a provided QuantizeLinear (Q) / DequantizeLinear (DQ) node is a good candidate for fusion.
           It is a good candidate for fusion if:
           (1) The Q/DQ node is for per-tensor quantization if allow_per_tensor_quantization_only is `True`
           (2) The Q/DQ node should have constant scale
           (3) The Q/DQ node should have a zero point of 0
        Args:
            node (NodeProto): a Q/DQ node to check
        Returns:
            bool: whether the check is passed or not
        >   QuantizeLinearDequantizeLinearz+Provided node is not a Q/DQ node. Op Type: rd   NFr   ra   T)
r6   r   r   get_constant_valuer7   ndimshaperJ   rj   all)rE   r   "allow_per_tensor_quantization_onlyscalescale_has_single_element
zero_pointr   r   r   check_qdq_node_for_fusion   s    
"z%FusionUtils.check_qdq_node_for_fusioninput_indexc                 C   sR   t |j|ks	J | j|j| }t|tr%t|ttfo$t||ddS ||kS )a7  Verify that a node has expected input value

        Args:
            node (NodeProto): a node to check
            input_index (int): index of its input to be verified
            expected_value (Any): expected value of the input

        Returns:
            bool: whether the check is passed or not
        FrV   )rJ   r7   r   rt   rY   rZ   r   r   )r   rE   r}   r[   r]   r   r   r   check_node_input_value   s
   
z"FusionUtils.check_node_input_valuec                 C   s   g }| j  }| j  D ] }|jdkr,|jd |vr,| j |jd |jd  || q|rB| j | t	
dt| d dS dS )z>Remove Identity nodes, except those right before graph output.Identityr   zRemoved z Identity nodesN)r   get_graphs_output_namesrD   r6   rA   rC   r7   rK   remove_nodesr   inforJ   )r   nodes_to_removegraph_output_namesrE   r   r   r   remove_identity_nodes   s   


z!FusionUtils.remove_identity_nodesc                 C      | j   d S r   )r   remove_cascaded_cast_nodesr   r   r   r   r         z&FusionUtils.remove_cascaded_cast_nodesc                 C   r   r   )r   remove_useless_cast_nodesr   r   r   r   r     r   z%FusionUtils.remove_useless_cast_nodesc                 C   s@  | j jdd}|du rdS g }| j  D ]0}|jdkrD||jd }||jd }|rD|rD||krDtd|j	 d|  |
| q|rt| j  }t| j  }|D ]F}tt|j|@ rtt|j|@ st| j  |jd  dkr| j |jd |jd  nqW| j |jd |jd  | j | qWdS dS )	ziRemove reshape node that is not needed based on symbolic shape inference: input and output has same shapeT)updateNReshaper   zRemove reshape node z* since its input shape is same as output: rd   )r   infer_runtime_shaperD   r6   get_edge_shaper7   rA   r   r   r?   rK   setget_graphs_input_namesr   boolrJ   r>   replace_output_of_all_nodesrC   rB   )r   shape_inferr   rE   input_shapeoutput_shapegraph_input_namesr   r   r   r   remove_useless_reshape_nodes  s8   

z(FusionUtils.remove_useless_reshape_nodes)r"   )NNN)r   r   r   )T)__name__
__module____qualname__r   r   strr   r   r!   r.   r&   r   r*   r   rH   staticmethodrN   rT   r_   re   r   rq   r   r|   r~   r   r   r   r   r   r   r   r   r      sB    


*r   c                   @   s(   e Zd ZeddededefddZdS )	NumpyHelperFr`   
fill_zerosr   c                 C   s0   |rddl m} t| j|j| j dS t| S )Nr   )mapping)rv   rc   )onnxr   r   rf   TENSOR_TYPE_TO_NP_TYPErg   r
   to_array)r`   r   r   r   r   r   r   '  s   

zNumpyHelper.to_arrayN)F)r   r   r   r   r   r   r   r   r   r   r   r   r   &  s    r   )loggingr   typingr   r   rj   r   r   r   r   r   r	   r
   r   re   
onnx_modelr   r   r   r   r   r   r   r   r   <module>   s     