o
    gj                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlZzd dlm	Z	 W n e
y+   dZ	Y nw ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZ G d	d
 d
ZG dd dZdS )    N)AnyDict)to_array_extended   )
TensorData)	ONNXModel)
ONNX_TYPE_TO_NP_TYPETENSOR_NAME_QUANT_SUFFIXfind_by_namemodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   @   sL   e Zd Zdeeef fddZdddZdd Zd	d
 Z	dd Z
dd ZdS )QuantizationParamsdatac                 K   s   i | _ | D ]d\}}t|tstdt| d|d|dkr7t|tttjfs7tdt| d|d|dkrNt|tsN|d urNtdt| d|dkrf|j	tj
tjfvrftd|j	 d||| j |< qd S )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarraydtypefloat32float16
ValueError)selfr   kv r'   ^/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/quantization/base_quantizer.py__init__%   s   
zQuantizationParams.__init__Nc                 C   s   | j ||S N)r   get)r$   keydefault_valuer'   r'   r(   r+   2      zQuantizationParams.getc                 c   s    | j E d H  d S r*   r   r$   r'   r'   r(   __iter__5   s   zQuantizationParams.__iter__c                 C   s
   | j | S r*   r/   )r$   r,   r'   r'   r(   __getitem__8      
zQuantizationParams.__getitem__c                 C   s   || j |< d S r*   r/   )r$   r,   valuer'   r'   r(   __setitem__;   r.   zQuantizationParams.__setitem__c                 C   s
   t | jS r*   )lenr   r0   r'   r'   r(   __len__>   r3   zQuantizationParams.__len__r*   )__name__
__module____qualname__r   r   r   r)   r+   r1   r2   r5   r7   r'   r'   r'   r(   r   $   s    
r   c                   @   s   e Zd Z	dddZdejjdefddZdd	 Z	d
d Z
dd Zdd Zdd Zdd Zd ddZd!ddZ		d"ddZdd ZdS )#BaseQuantizerNc                 C   s  t |st|}dd |jjD | _| jdd |jjD  | jdd |jjD  t|| _	|| _
|| _|
r;|
ni | _d| jv oG| jd | _d | _d| jv oU| jd | _| jdd | _| jdd	| _| jd
| _t|d|| _t|d|| _	 |d urttdd | rtdtdd | D  d|| _|| _|| _|	| _|   | _!t"| jdi | _#dd | j	$ D | _%| j#&| j%| j' |\}}|st(|| j#) | _*d S )Nc                 S      i | ]}|j |qS r'   name).0vir'   r'   r(   
<dictcomp>R       z*BaseQuantizer.__init__.<locals>.<dictcomp>c                 S   r<   r'   r=   )r?   otr'   r'   r(   rA   S   rB   c                 S   r<   r'   r=   )r?   itr'   r'   r(   rA   T   rB   EnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec                 S   s   t | t S r*   )r   r   )tr'   r'   r(   <lambda>w   s    z(BaseQuantizer.__init__.<locals>.<lambda>z(tensors_range contains unexpected types c                 s   s    | ]}t |V  qd S r*   )r   )r?   r&   r'   r'   r(   	<genexpr>y   s    z)BaseQuantizer.__init__.<locals>.<genexpr>z, not TensorData.TensorQuantOverridesc                 S   r<   r'   r=   )r?   initzerr'   r'   r(   rA      rB   )+r   r   graph
value_infovalue_infosupdateoutputinputr   modelper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkr+   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanymapvaluesr   settensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizecheck_opset_versionopset_versionr   tensor_quant_overridesinitializerinitializersis_validkeysr#   get_quant_typestensor_quant_override_qtypes)r$   rV   rW   rX   rb   ra   rg   rh   ri   rj   rY   overrides_validoverrides_errr'   r'   r(   r)   C   sJ   

zBaseQuantizer.__init__weight_quant_typereturnc                 C   s0   | j d ur| j S |tjjtjjtjjtjjfv S r*   )r]   onnxTensorProtoINT4INT8INT16FLOAT8E4M3FN)r$   rv   r'   r'   r(   is_weight_symmetric   s   
z!BaseQuantizer.is_weight_symmetricc                 C   s   t r*   )NotImplementedErrorr0   r'   r'   r(   quantize_model   s   zBaseQuantizer.quantize_modelc                 C   s   t || j }|d uS r*   )r
   rV   rn   )r$   
input_namern   r'   r'   r(   is_input_a_initializer   s   z$BaseQuantizer.is_input_a_initializerc                 C   s   | j S r*   )rW   r0   r'   r'   r(   is_per_channel   s   zBaseQuantizer.is_per_channelc                 C   sN   t || j }|d ur|jtjjtjjfv S | jr| j	d u r!dS | j	
|S )NF)r
   rV   rn   	data_typerx   ry   FLOATFLOAT16rZ   r[   is_valid_quantize_weight)r$   weight_nameweightr'   r'   r(   r      s   z&BaseQuantizer.is_valid_quantize_weightc                 C   sV   | j d urt| j dkr|j| j vrdS |j| jvrdS | jd ur)|j| jv r)dS dS )Nr   FT)rh   r6   r>   op_typerj   ri   )r$   noder'   r'   r(   should_quantize_node   s   
z"BaseQuantizer.should_quantize_nodec                 C   s  dd | j j jD }t|dkrtd|d j}|dkr(td| d dS |dk rOtd| d	 | j j j|d  | j j jt	j
d
dg d}|dk r| jt	jjkrtd| d | j j j|d  | j j jt	j
d
dg d| j j _d}|S )Nc                 S   s    g | ]}|j r|j d kr|qS )zai.onnx)domain)r?   opsetr'   r'   r(   
<listcomp>   s    z5BaseQuantizer.check_opset_version.<locals>.<listcomp>r   z$Failed to find proper ai.onnx domainr   
   z$The original model opset version is ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       z, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   )rV   opset_importr6   r#   versionloggingwarningremoveextendrx   helpermake_opsetidrb   ry   r}   
ir_version)r$   ai_onnx_domainrl   r'   r'   r(   rk      s6   




z!BaseQuantizer.check_opset_version      ?c                 C   s  t || j }t|}|t }| jtjjkr`t	
|}|jt	jkr'tjj}	n|jt	jkr2tjj}	n	td|j d|t	j}
t	jdg|
jd}|d}tj|
|}| j|g d}nz|| | }t	j
|t	jdt	j
|t	jd }
|
 }
t	t	t	jj}t	t	t	jj}t	|
|k st	|
|krtd| d t	 |
||t	j}
t	j
|
t	jd|j!}tj||}| j|g t	j
||jdd}d	}| j}	|d
 }tj||}| j|g | jtjjkr| j}ntjj"}|d }| jtjjkrtj#$|| jdgdg}n$|j%dkr.t	j&|j't	jdd}tj||}n
tj#$||g dg}| j|g ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r    CastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.DequantizeLinear_scale_zero_point        r   )(r
   rV   rn   r   r	   rb   rx   ry   r}   r   asarrayr    r"   r   r!   r   r   astypearrayreshapenumpy_helper
from_arrayinitializer_extendfloat64roundiinfoint32minmaxrc   r   r   clipdimsINT32r   make_tensorsizezerosshape)r$   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_type	int32_min	int32_maxbias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrJ   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datar'   r'   r(   quantize_bias_static_impl   sj   



 
z'BaseQuantizer.quantize_bias_static_implFc                 C   s~  |j t }|j d }|j d }t|}| jj|j i d}	d|	v r%|	d j}d|	v r{d|	v r{tj|	d t| d}
t|	d }t	||
 ||
}t|
tjsWJ dt|
 |
jtjkrc|
jtjkskJ d	|
j t|tjszJ dt| na|| jkr| |n| j}t|
 ||	d
||	d| jo|| j|	d|	dd\}
}}t|
tjsJ dt|
 |
jtjkr|
jtjksJ d	|
j t|tjsJ dt| |j}tj||g |d }tj||g |
d }| j||g |s| jtj j!krkt  }| j|_|j"#|j" ||_ |
 $ % |_&t'durjt'|}|j(|j(ksB|% |% krjt)d|j( d|% dd  d|% dd  d|j( dt*|dd  dnH|tj j+tj j,fv r|jtj-tj.fvrt)d| dt/t0|% }tjj|||j"|dd}ntj1|tj2|d|j"}tj34||}| j|g |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricrX   rminrmaxrX   r_   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting r   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5r>   r	   r   rm   get_per_tensor_overridesrJ   r   r   r   r   flattenr   r   r   r    r!   r"   rb   r~   r^   r   r+   rX   r_   r   rx   r   r   r   tolistrV   r   ry   r}   r   r   copytobytesraw_datar   r   RuntimeErrorr   rz   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r$   r   qTyperX   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_datar   scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datar'   r'   r(   quantize_initializer_impl1  s   
	



 



 


z'BaseQuantizer.quantize_initializer_implTc           &      C   s  t || j }|d u rtd|t|}t|j}t||\}	}
|	s0td| d| d| |
}|j| }| jj	|d|igd}t|}|dkrY||krYtd| d	| d
t|d d |\}}|rj||kr|td| d| d|d d  dd|d v r|d d j
}|d d| |}|d d| jo|}g }g }g }t|j}t|}d||< t|D ]}|||}||k r|nd}|| }d|v r5d|v r5tj|d t| d}t|d }t|| ||}t|tjsJ dt| |jtjkr|jtjksJ d|j t|tjs$J dt| t|tjs4J dt| n]t| |||| j|d|dd\}}}t|tjs\J dt| |jtjkrj|jtjksrJ d|j t|tjsJ dt| t|tjsJ dt| || || |t| | qt!||}|t" }|d }|d } |j#| g}!t$j%&| |j'|!t(|) }"t$j%&|||!t(|) }#| j*|"|#g |s=|t$j+j,t$j+j-fv r |jtj.tj/fvrt0d| dt1t2|3 }$t$j%j&||||$dd }%| j*|%g ntj|t$j%4|d |j#}t$j56||}%| j*|%g ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   rX   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )7r
   rV   rn   r#   r   r6   r   r   rm   get_per_channel_overridesrJ   r+   r~   rX   listrangetaker   r   r   r   r   r   r   r   r    r!   r"   r   r_   appendr   r   concatenater	   r   rx   r   r   r   hstackr   r   ry   rz   r   r   r   r   r   r   r   r   r   r   )&r$   r   rb   channel_axisrX   r   rn   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listweights_shapereshape_dimsiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_dataquantized_weightsr   r   r   zero_scale_shaper   r   r   r   r'   r'   r(    quantize_weight_per_channel_impl  s   	






  

 





z.BaseQuantizer.quantize_weight_per_channel_implc                 C   s   | j d u rd S | j D ]p}|jdv r_| |sqt| j |jd  dkr(q|jd | j vs8|jd | j vr9q| j |jd  }t	|t
sVtdt| d|jd d|| j |jd < q|jdkr|| |sjqt
tdtd	d
| j |jd < qd S )N)ClipRelur   r   r   z for r   Softmaxr   r   )lowesthighest)rg   rV   nodesr   r   r6   input_name_to_nodesrU   rT   r   r   r   r   r   r!   )r$   r   tdr'   r'   r(   adjust_tensor_ranges  s(   


 
 

$z"BaseQuantizer.adjust_tensor_rangesr*   )r   )FF)TF)r8   r9   r:   r)   rx   ry   DataTypeboolr~   r   r   r   r   r   rk   r   r   r  r#  r'   r'   r'   r(   r;   B   s"    
K

#
T`
 r;   )r   typingr   r   numpyr   rx   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr   r	   r
   r   r   r   r   r   r   r   rm   r   r   r;   r'   r'   r'   r(   <module>   s    0