o
    g                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlmZmZmZmZ d dlZddlmZmZmZ dejd	ejd
ejfddZ		 d0dejd	ejdee ded
ejf
ddZG dd dZ G dd dZ!G dd deZ"G dd de j#dZ$G dd dZ%G dd de%Z&G dd de%Z'G d d! d!e'Z(G d"d# d#e'Z)G d$d% d%e'Z*G d&d' d'e j#dZ+G d(d) d)e+Z,dd*e"j-d+i fd,ee.ef d-eee.  fd.d/Z/dS )1    N)Enum)Path)DictOptionalSequenceTupleUnion)
ModelProtoTensorProtohelpernumpy_helper   )
apply_plotload_model_with_shape_infersmooth_distributionpkqkreturnc                 C   s|   t j| j| jd}| dd t | dd |dd   |dd< | dk|dk@ }d||< | dk|dk@ }t j|| < |S )z
    See https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.rel_entr.html#scipy.special.rel_entr.
    Python implementation.
    dtypeNr   )npemptyshaper   loginf)r   r   resc2c1 r   Y/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/quantization/calibrate.pyrel_entr   s   2r    baseaxisc                 C   s   |du s|dksJ d|dusJ dt | t j} d|  t j| |dd } t |t j}t | |\} }d| t j||dd }t| |}t j||d}|dur]|t | }|| jS )	z
    Simplifeied version of entropy.
    Source: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html.
    This avoids taking a dependency on scipy just for this function.
    Nr   z0base={base} must be a positive number or `None`.z
qk is None      ?T)r"   keepdimsr"   )	r   asarrayastypefloat32sumbroadcast_arraysr    r   r   )r   r   r!   r"   vecsr   r   r   entropy'   s   
r-   c                   @   sL   e Zd Zeg dZeg dZdd Zedd Zedd Z	d	d
 Z
dS )
TensorData)avgstdlowesthighesthist
hist_edgesbins)r/   r0   r1   r2   r4   c                 K   s   t | | _| D ]E\}}|tjvr td|dtj d|tjv rJt|ds6tdt	| d||j
tjtjfvrJtd|j
 d|t| || qd S )NzUnexpected value z not in .r   Unexpected type z for k=zUnexpected dtype )listkeys_attrsitemsr.   _allowed
ValueError_floatshasattrtyper   r   float16r(   setattr)selfkwargskvr   r   r   __init__G   s   


zTensorData.__init__c                 C   4   t | dr
t | dstdt|  d| j| jfS )Nr1   r2   z0Attributes 'lowest' and/or 'highest' missing in r6   )r?   AttributeErrordirr1   r2   rC   r   r   r   range_valueS      zTensorData.range_valuec                 C   rH   )Nr/   r0   z)Attributes 'avg' and/or 'std' missing in r6   )r?   rI   rJ   r/   r0   rK   r   r   r   avg_stdY   rM   zTensorData.avg_stdc                    s$    fdd j D } jj|d< |S )Nc                    s   i | ]}|t  |qS r   )getattr).0rE   rK   r   r   
<dictcomp>a   s    z&TensorData.to_dict.<locals>.<dictcomp>CLS)r:   	__class____name__rC   datar   rK   r   to_dict_   s   zTensorData.to_dictN)rT   
__module____qualname__	frozensetr<   r>   rG   propertyrL   rN   rW   r   r   r   r   r.   C   s    

r.   c                   @   sj   e Zd Zdeeeeef f fddZdd Z	dd Z
dd	 Zd
d Zdd Zdd Zdd Zdd ZdS )TensorsDatarV   c              	   C   s   || _ i | _| D ]p\}}t|tstdt| dt|trf|tj	kr;t
|dkr;t|d |d d| j|< q
t
|dkrUt|d |d |d |d d	| j|< q
td
|ddt
| d| dt|tsutdt| d|| j|< q
d S )NzKeys must be strings not r6      r   r   r1   r2         )r1   r2   r3   r5   zUnexpected tuple for rz	, it has z elements: zValues must be TensorData not )calibration_methodrV   r;   
isinstancestr	TypeErrorr@   tupleCalibrationMethodMinMaxlenr.   )rC   rb   rV   rE   rF   r   r   r   rG   g   s"   

&"
zTensorsData.__init__c                 c   s    | j E d H  d S NrV   rK   r   r   r   __iter__y   s   zTensorsData.__iter__c                 C   s
   || j v S rj   rk   rC   keyr   r   r   __contains__|      
zTensorsData.__contains__c                 C   s
   | j | S rj   rk   rm   r   r   r   __getitem__   rp   zTensorsData.__getitem__c                 C   s(   || j vrtd|d|| j |< d S )Nz)Only an existing tensor can be modified, z is not.)rV   RuntimeError)rC   rn   valuer   r   r   __setitem__   s   
zTensorsData.__setitem__c                 C   
   | j  S rj   )rV   r9   rK   r   r   r   r9      rp   zTensorsData.keysc                 C   ru   rj   )rV   valuesrK   r   r   r   rv      rp   zTensorsData.valuesc                 C   ru   rj   )rV   r;   rK   r   r   r   r;      rp   zTensorsData.itemsc                 C   s   | j j| j| jd}|S )N)rR   rV   rb   )rS   rT   rV   rb   rU   r   r   r   rW      s
   zTensorsData.to_dictN)rT   rX   rY   r   rd   r   r.   r   rG   rl   ro   rq   rt   r9   rv   r;   rW   r   r   r   r   r\   f   s    r\   c                   @   s   e Zd ZdZdZdZdZdS )rg   r   r   r]   r`   N)rT   rX   rY   rh   Entropy
PercentileDistributionr   r   r   r   rg      s
    rg   c                   @   sV   e Zd Zedd ZejdefddZdd Z	dd	 Z
d
d ZdedefddZdS )CalibrationDataReaderc                 C   s   t |dr
t|jptS )Nget_next)r?   callabler{   NotImplemented)clssubclassr   r   r   __subclasshook__   s   z&CalibrationDataReader.__subclasshook__r   c                 C      t )z9generate the input data dict for ONNXinferenceSession runNotImplementedErrorrK   r   r   r   r{      s   zCalibrationDataReader.get_nextc                 C   s   | S rj   r   rK   r   r   r   rl         zCalibrationDataReader.__iter__c                 C   s   |   }|d u r
t|S rj   )r{   StopIteration)rC   resultr   r   r   __next__   s   zCalibrationDataReader.__next__c                 C   r   rj   r   rK   r   r   r   __len__   r   zCalibrationDataReader.__len__start_index	end_indexc                 C   r   rj   r   )rC   r   r   r   r   r   	set_range   r   zCalibrationDataReader.set_rangeN)rT   rX   rY   classmethodr   abcabstractmethoddictr{   rl   r   r   intr   r   r   r   r   rz      s    
rz   )	metaclassc                   @   s   e Zd Z					ddeeef deee  fddZdgfd	d
Z	dd Z
defddZdd Zdd ZdefddZdefddZdS )CalibraterBaseNaugmented_model.onnxF
model_pathop_types_to_calibratec                 C   sn   t |trtt|| _nt |trt|| _ntd|| _|| _|| _|| _	|| _
d| _d| _dg| _dS )a  
        :param model_path: ONNX model to calibrate. It should be a model file path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb.
        :param per_channel: whether to compute ranges per each channel.
        z model_path should be model path.NCPUExecutionProvider)rc   rd   r   r   modelr=   r   augmented_model_path	symmetricuse_external_data_formatper_channelaugment_modelinfer_sessionexecution_providers)rC   r   r   r   r   r   r   r   r   r   rG      s   

zCalibraterBase.__init__r   c                 C   s   || _ |   dS )zz
        reset the execution providers to execute the collect_data. It triggers to re-creating inference session.
        N)r   create_inference_session)rC   r   r   r   r   set_execution_providers   s   z&CalibraterBase.set_execution_providersc                 C   s,   t  }t jj|_t j| j|| jd| _dS )z9
        create an OnnxRuntime InferenceSession.
        )sess_options	providersN)	onnxruntimeSessionOptionsGraphOptimizationLevelORT_DISABLE_ALLgraph_optimization_levelInferenceSessionr   r   r   )rC   r   r   r   r   r      s   
z'CalibraterBase.create_inference_sessionr   c           	      C   s   dd |j jD }|dd |j jD  |dd |j jD  dd |j jD }t }tjtj	h}|j j
D ]4}| jrB|j| jv rkt|j|jD ] }||v rj|| }|jdrj|jjj|v rj||vrj|| qJq7||fS )z
        select input/output tensors of candidate nodes to calibrate.
        returns:
            tensors (set): set of tensor name.
            value_infos (dict): tensor name to value info.
        c                 S      i | ]}|j |qS r   namerP   vir   r   r   rQ          z>CalibraterBase.select_tensors_to_calibrate.<locals>.<dictcomp>c                 S   r   r   r   )rP   otr   r   r   rQ      r   c                 S   r   r   r   )rP   itr   r   r   rQ      r   c                 S      h | ]}|j qS r   r   )rP   initr   r   r   	<setcomp>       z=CalibraterBase.select_tensors_to_calibrate.<locals>.<setcomp>tensor_type)graph
value_infoupdateoutputinputinitializersetr
   FLOATFLOAT16noder   op_type	itertoolschainr@   HasFieldr   	elem_typeadd)	rC   r   value_infosr   tensors_to_calibratetensor_type_to_calibrater   tensor_namer   r   r   r   select_tensors_to_calibrate   s$   

z*CalibraterBase.select_tensors_to_calibratec                 C      | j S )zP
        return: augmented onnx model. Call after calling augment_graph
        )r   rK   r   r   r   get_augment_model  s   z CalibraterBase.get_augment_modelc                 C   r   )z
        abstract method: augment the input model to prepare for collecting data. It will:
            1. augment the model to be able to collect desired statistics data
            2. save augmented model to augmented_model_paths
        r   rK   r   r   r   augment_graph  s   zCalibraterBase.augment_graphdata_readerc                 C   r   )z
        abstract method: collect the tensors that will be used for range computation. It can be called multiple times.
        r   )rC   r   r   r   r   collect_data     zCalibraterBase.collect_datar   c                 C   r   )ze
        abstract method: compute data based on the calibration method stored in TensorsData
        r   rK   r   r   r   compute_data"  r   zCalibraterBase.compute_data)Nr   FFF)rT   rX   rY   r   rd   r   r   r   rG   r   r   r	   r   r   r   rz   r   r\   r   r   r   r   r   r      s$    


"r   c                       s|   e Zd Z								ddeeef deee  f fddZd	d
 Z	dd Z
defddZdd ZdefddZ  ZS )MinMaxCalibraterNr   F{Gz?r   r   c
           
         s|   t  j||||||	d g | _d| _t| jjj| _dd | jjjD | _	|| _
|r6|dk s2|dkr6td|| _|| _dS )aw  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
        :param averaging_constant: constant smoothing factor to use when computing the moving average.
        :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
        :param per_channel: whether to compute ranges per each channel.
        )r   r   r   r   r   Nc                 S   r   r   r   rP   r   r   r   r   r   L  r   z,MinMaxCalibrater.__init__.<locals>.<setcomp>r   r   z;Invalid averaging constant, which should not be < 0 or > 1.)superrG   intermediate_outputscalibrate_tensors_rangeri   r   r   r   num_model_outputsmodel_original_outputsmoving_averager=   averaging_constantmax_intermediate_outputs)
rC   r   r   r   r   r   r   r   r   r   rS   r   r   rG   *  s"   
zMinMaxCalibrater.__init__c                    s    j\}}tt ttjdgtj	d}jj
j| dd   fdd}|D ]}||d ||d q1tjjjjd	 d
S )z
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        r   c                 S   s6   |j D ]}tj| |jr|j  S qtd|  d)Nz&Model does not contain a version for 'z'.)opset_importonnxdefshasdomainversionrr   )r   r   r   r   r   r   get_op_version^  s
   

z6MinMaxCalibrater.augment_graph.<locals>.get_op_versionc                    s  d}| d | }|d }t jj|| g|g||d}t jjd|g|g|d}dd jjjD }|d	d jjjD  |d
d jjjD  | |v rV||  j	j
j}ntd| djrt||  j	j
jj}	dgtd|	}
 |jdk r|jtd|
 n tt }ttj|
tjd|}|j| jjj| jjj||g jjjt ||d g d S )Nr   __Reshape)r$   r   Reshape)inputsoutputsr   c                 S   r   r   r   r   r   r   r   rQ   w  r   zNMinMaxCalibrater.augment_graph.<locals>.add_reduce_min_max.<locals>.<dictcomp>c                 S   r   r   r   )rP   or   r   r   rQ   x  r   c                 S   r   r   r   rP   ir   r   r   rQ   y  r   z'Unable to guess tensor type for tensor zE, running shape inference before quantization may resolve this issue.r   r]      axesr   )!r   r   	make_noder   r   r   r   r   r   r@   r   r   r=   r   ri   r   dimrange	attributeappendmake_attributerd   uuiduuid4r   
from_arrayr   arrayint64r   r   extendmake_tensor_value_info)r   reduce_op_namer$   reduce_outputintermediate_outputreduce_nodereshape_noder   	onnx_typetensor_rankreduced_axesreduce_axes_namereduce_axesr   reshape_shape_namerC   r   r   add_reduce_min_maxd  s>   
 z:MinMaxCalibrater.augment_graph.<locals>.add_reduce_min_max	ReduceMin	ReduceMaxsave_as_external_dataN)r   r   rd   r   r   r   r   r   r   r   r   r   r   r   saver   r   )rC   tensorsr   reshape_shaper	  tensorr   r  r   r   S  s   .

zMinMaxCalibrater.augment_graphc                 C   
   g | _ d S rj   r   rK   r   r   r   clear_collected_data  rp   z%MinMaxCalibrater.clear_collected_datar   c                 C   s   	 |  }|sn| j| jd | | jd ur$t| j| jkr$|   qt| jdkr5| jd u r5t	d| 
 }t|tsHtdt| d|   d S )NTr   No data is collected.z+compute_data must return a TensorsData not r6   )r{   r   r   r   runr   ri   r  r   r=   r   rc   r\   re   r@   )rC   r   r   tr   r   r   r     s   

zMinMaxCalibrater.collect_datac                 C   s   |s|S |  D ]o\}}t|tr|jd }|jd }n|\}}t|| tr6|| jd }|| jd }n|| \}}| jrR|| j||   }	|| j||   }
n
t||}	t||}
t|tsht|| trqt|	|
d||< q|	|
f||< q|S )Nr   r   r^   )r;   rc   r.   rL   r   r   minmax)rC   	old_range	new_rangern   rs   old_minold_maxnew_minnew_max	min_value	max_valuer   r   r   merge_range  s(   



zMinMaxCalibrater.merge_ranger   c                    s  t jdkr
jS fddtt jd D fddjD }i |D ]}| D ]\}}|g | q.q(jd   fddtdt  dD }fdd	D }g }tdt  dD ]\}jrt	j
| |  dd
}	t	j
| |d   dd
}
nt	j| |  dd
}	t	j| |d   dd
}
jrt	jt	|	t	|
gdd
}|t| |g qg|t|	|
g qgttjtt||}jr݈j|_jS |_jS )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs }
        r   c                    s   g | ]
} j  | jqS r   )r   get_outputsr   r   rK   r   r   
<listcomp>  s    z1MinMaxCalibrater.compute_data.<locals>.<listcomp>c                       g | ]	}t t |qS r   r   ziprP   r   output_namesr   r   r$        Nc                    s   g | ]} |  d d qS )r   r   )
rpartitionr   )added_output_namesr   r   r$    s    r]   c                    s    i | ]}|j vr| | qS r   )r   r   )merged_output_dictrC   r   r   rQ     s    z1MinMaxCalibrater.compute_data.<locals>.<dictcomp>r%   r   )ri   r   r   r   r;   
setdefaultr   r   r   r   meanr  r  r   absrf   r\   rg   rh   r   r'  r"  )rC   output_dicts_listdrE   rF   calibrate_tensor_namesmerged_added_output_dictpairsr   min_value_arraymax_value_arraymax_absolute_valuenew_calibrate_tensors_ranger   )r-  r.  r*  rC   r   r     sF    

zMinMaxCalibrater.compute_data)Nr   FFFr   NF)rT   rX   rY   r   rd   r   r   r   rG   r   r  rz   r   r"  r\   r   __classcell__r   r   r   r   r   )  s&    

)I!r   c                       sv   e Zd Z									dd	eeef d
eee  f fddZdd Z	dd Z
defddZdefddZ  ZS )HistogramCalibraterNr   F
percentile      -X@samer   r   c                    sv   t  j|||||d g | _d| _t| jjj| _dd | jjjD | _	d| _
|| _|| _|| _|	| _d| _|
| _dS )a=  
        :param model_path: ONNX model to calibrate. It is a model path.
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        :param scenario: see :class:`DistributionCalibrater`
        )r   r   r   r   Nc                 S   r   r   r   r   r   r   r   r   +  r   z/HistogramCalibrater.__init__.<locals>.<setcomp>)r   rG   r   r   ri   r   r   r   r   r   	collectormethodnum_binsnum_quantized_binsr=  r   scenario)rC   r   r   r   r   rC  r   rD  rE  r=  rF  r   r   r   rG     s$   
zHistogramCalibrater.__init__c                 C   sV   |  | j\| _}| jD ]}|| jvr| jjj||  qtj| j| j	| j
d dS )z
        make all quantization_candidates op type nodes as part of the graph output.
        :return: augmented ONNX model
        r  N)r   r   r   r   r   r   r   r   r  r   r   )rC   r   r  r   r   r   r   4  s   


z!HistogramCalibrater.augment_graphc                 C   r  rj   r  rK   r   r   r   r  D  rp   z(HistogramCalibrater.clear_collected_datar   c                    sB  dd j  D }dd j  D 	 | }|sn-j d|}g }t|D ]\}}| |v r<|t| q)|| q)j| qt	jdkrTt
dfd	djD }i  |D ]}	|	 D ]\}
} |
g | qhqb fd
d D }jstjjjjjjd_j|   dS )zy
        Entropy Calibrator collects operators' tensors as well as generates tensor histogram for each operator.
        c                 S   r   r   r   rP   node_argr   r   r   r   K  r   z3HistogramCalibrater.collect_data.<locals>.<setcomp>c                 S   s   g | ]}|j qS r   r   rG  r   r   r   r$  L  r   z4HistogramCalibrater.collect_data.<locals>.<listcomp>TNr   r  c                    r%  r   r&  r(  r)  r   r   r$  b  r+  c                    s    i | ]}|j v r| | qS r   )r   r   )merged_dictrC   r   r   rQ   k  s     z4HistogramCalibrater.collect_data.<locals>.<dictcomp>)rC  r   rD  rE  r=  rF  )r   
get_inputsr#  r{   r  	enumerater   copyr   ri   r=   r;   r/  rB  HistogramCollectorrC  r   rD  rE  r=  rF  collectr  )rC   r   input_names_setr   r   fixed_outputsoutput_indexr   r2  r3  rE   rF   clean_merged_dictr   )rI  r*  rC   r   r   G  sH   
z HistogramCalibrater.collect_datar   c                 C   sh   | j stdt| trtj}nt| trtj}nt| tr"tj	}n
t
dt|  dt|| j  S )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {tensor name: (min value, max value)}
        z9No collector created and can't generate calibration data.zUnknown calibrater z". This method must be overwritten.)rB  r=   rc   EntropyCalibraterrg   rw   PercentileCalibraterrx   DistributionCalibraterry   re   r@   r\   compute_collection_result)rC   calr   r   r   r   z  s   


z HistogramCalibrater.compute_data)	Nr   Fr=  Fr>  r?  r@  rA  )rT   rX   rY   r   rd   r   r   r   rG   r   r  rz   r   r\   r   r;  r   r   r   r   r<    s&    

,3r<  c                       sF   e Zd Z							d
deeef deee  f fdd	Z  Z	S )rS  Nr   Fr-   r>  r   r   c	           	   
          t  j||||||||d dS )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        )rC  r   rD  rE  Nr   rG   )	rC   r   r   r   r   rC  r   rD  rE  r   r   r   rG        
zEntropyCalibrater.__init__)Nr   Fr-   Fr>  r>  
rT   rX   rY   r   rd   r   r   r   rG   r;  r   r   r   r   rS        

rS  c                       sF   e Zd Z							ddeeef deee  f fd	d
Z  Z	S )rT  Nr   Fr=  r?  r@  r   r   c	           	   
      rX  )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        )rC  r   rD  r=  NrY  )	rC   r   r   r   r   rC  r   rD  r=  r   r   r   rG     rZ  zPercentileCalibrater.__init__)Nr   Fr=  Fr?  r@  r[  r   r   r   r   rT    r\  rT  c                       sD   e Zd Z						ddeeef deee  f fd	d
Z  Z	S )rU  Nr   Fdistributionr>  rA  r   r   c              	      s   t  j|||||||d dS )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param scenario: for float 8 only, if `scenario="same"`,
            the algorithm weights and float 8 follow the same distribution,
            if `scenario="p3"`, it assumes the weights follow
            a gaussian law and float 8 ~ X^3 where X is a gaussian law
        )rC  rD  rF  NrY  )rC   r   r   r   r   rC  rD  rF  r   r   r   rG     s   
zDistributionCalibrater.__init__)Nr   Fr]  r>  rA  r[  r   r   r   r   rU    s    

rU  c                   @   s,   e Zd ZdZejdd Zejdd ZdS )CalibrationDataCollectorzL
    Base class for collecting data for calibration-based quantization.
    c                 C   r   )z
        Generate informative data based on given data.
            name_to_arr : dict
                tensor name to NDArray data
        r   rC   name_to_arrr   r   r   rN    s   z CalibrationDataCollector.collectc                 C   r   )z?
        Get the optimal result among collection data.
        r   rK   r   r   r   rV    s   z2CalibrationDataCollector.compute_collection_resultN)rT   rX   rY   __doc__r   r   rN  rV  r   r   r   r   r^    s    
r^  c                   @   sv   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd ZedddZdd Zdd ZdS )rM  a`  
    Collecting histogram for each tensor. Percentile and Entropy method are supported.

    ref: https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/
                 pytorch_quantization/calib/histogram.html
    c                 C   s.   i | _ || _|| _|| _|| _|| _|| _d S rj   )histogram_dictrC  r   rD  rE  r=  rF  )rC   rC  r   rD  rE  r=  rF  r   r   r   rG     s   
zHistogramCollector.__init__c                 C   r   rj   )rb  rK   r   r   r   get_histogram_dict  s   z%HistogramCollector.get_histogram_dictc                 C   sH   t d | jdv r| |S | jdkr | jr| |S | |S td)Nz/Collecting tensor data and making histogram ...>   r-   r]  r=  DOnly 'entropy', 'percentile' or 'distribution' methods are supported)printrC  collect_valuer   collect_absolute_valuer=   r_  r   r   r   rN    s   




zHistogramCollector.collectc                 C   sv  |  D ]3\}}t|trD|D ]}t|tjs$J dt| d|qtdd |D }t|dks>J d| d|t|}nt|tjsVt	dt| d||}|
 }|jdkrlt|}t|}ntjd|jd}tjd|jd}t|}|| jvrtj|| jd	\}	}
|
|j}
|jtjksJ d
|	|
||f| j|< q| j| }|d }|d }t|dsJ dt| t|dsJ dt| |d }|d }t|}||d kr|d |d  }t|d | || |}t||f}tj||d	\}	}
|
|j}
|	dt|  |7  < |jtjks)J d
|	|
t||t||f| j|< qdS )z5
        Collect histogram on absolute value
        r7   z for tensor=c                 s   s    | ]}|j V  qd S rj   r   )rP   ar   r   r   	<genexpr>6  s    z<HistogramCollector.collect_absolute_value.<locals>.<genexpr>r   z6The calibration expects only one element type but got r   r   )r5   zMonly float32 or float16 is supported, every constant must be explicitly typedr]   r`   r   z'old_min should be a numpy array but is r   N)r;   rc   r8   r   ndarrayr@   r   ri   r&   r=   flattensizer  r  r   r   absoluterb  	histogramrD  r'   float64r?   arangehstack)rC   r`  r  data_arrarrdtypesdata_arr_npr   r!  r3   r4   old_histogramr  r  old_histold_hist_edges	temp_amaxwidthnew_bin_edgesr   r   r   rg  .  s\   
&





 z)HistogramCollector.collect_absolute_valuec           
      C   s   |  D ]k\}}t|}| }|jdkr!t|}t|}ntjd|jd}tjd|jd}tjtt	|t	||jd}|| j
v rW| j
| }| |||||| j
|< qtj|| j| |fd\}}	||	|||f| j
|< qdS )z1
        Collect histogram on real value
        r   r   r   N)r;   r   r&   rk  rl  r  r  r   r   r1  rb  merge_histogramrn  rD  )
rC   r`  r  rr  r   r!  	thresholdrv  r3   r4   r   r   r   rf  h  s,   





z HistogramCollector.collect_valuec                 C   s  |\}}}}	}
||
kr)t j|t||
 |
fd\}}|| |t||t|	||
fS |
dkrAt j|t|| |fd\}}||7 }n9t|}d|
 | }t||
 | d }|d|  }|| |
 }t j||| |fd\}}||||   |7  < ||t||t|	||fS )Nr|  r   r]   r   )r   rn  ri   r  r  r   )rC   rv  rr  r  r  new_thresholdrw  rx  r  r  old_thresholdnew_histr   r3   r4   old_num_bins
old_stridehalf_increased_binsnew_num_binsr   r   r   r}    s2   
z"HistogramCollector.merge_histogramc                 C   sl   | j r
t| j dkrtdtd| jd | jdkr |  S | jdkr)|  S | jdkr2|  S td)	Nr   z=Histogram has not been collected. Please run collect() first.z0Finding optimal threshold for each tensor using z algorithm ...r-   r=  r]  rd  )rb  ri   r=   re  rC  compute_entropycompute_percentilecompute_distributionrK   r   r   r   rV    s   


z,HistogramCollector.compute_collection_resultc                 C   s  | j dk s
| j dkrtd| j}| j }i }tdt|  td| j  tdd|  d| d	 | D ]\}}|d }|d
 }| }t	|| }	| j
rrt|	|d }
tj||
 |jd tj||
 |jdf||< n*d| d }t|	d| }
t|	|}tj|| |jdtj||
 |jdf||< |d }|d }|| d |k r||| d
 f||< || d
 |kr|| d |f||< g || |d d R ||< tjdddv rt|| q8|S )Nr   d   z<Invalid percentile. Must be in range 0 <= percentile <= 100.Number of tensors : Number of histogram bins : zPercentile : (g      Y@,)r   r   g      i@r#   r]   r`   QUANTIZATION_DEBUGr   1)r=  r=   rb  re  ri   rD  r;   r)   r   cumsumr   searchsortedr   r   osenvirongetr   )rC   rb  r=  thresholds_dictr  rn  r3   r4   totalcdf	idx_rightpercent_to_cut_one_sideidx_leftr   r!  r   r   r   r    sF   

z%HistogramCollector.compute_percentilec                 C   s   | j }| j}i }tdt|  td| j d td| j  | D ]-\}}| ||}|||< g ||d d R ||< tj	dddv rSt
|d |d	  q&|S )
Nr  r  z: (The number may increase depends on the data it collects)zNumber of quantized bins : r]   r  r   r  r   )rb  rE  re  ri   rD  r;   get_entropy_thresholdr  r  r  r   )rC   rb  rE  r  r  rn  optimal_thresholdr   r   r   r    s   z"HistogramCollector.compute_entropyr   c                 C   s  |dkrt d| d|d d |dd   d }|dkrJ| |  |   }| |d   |   |d  d }tj||jdtj||jdfS t||krt|d dkr| ||   |   }| || | d   |   d }tj||jdtj||jdfS t|| }d|t|< d|t|< t|| | }| |  |   }| |d   |   |d  d }tj||jdtj||jdfS )	Nr   zpower=z <= 0 is invalid.r   r   g      ?r]   r   )	r=   r)   r   r   r   r   r1  isnanisinf)r3   r4   powerrv   r/   r0   factr   r   r   _avg_std  s$   $ $ $ zHistogramCollector._avg_stdc           	   	   C   s:  | j dk r	td| j}i }tdt|  td| j   td| jd | D ]n\}}|d }|d }|jtj	ks@J | jd	krP| j
||dd
\}}n| jdkr`| j
||dd
\}}ntd|jtj	kslJ |jtj	kstJ |jtj	ks|J t||||| | d||< tjdddv rt|| q,|S )Ni   z3Invalid num_bins. Must be in range 512 <= num_bins.r  r  zScenario : r  r   r   rA  )r  p3gUUUUUU?z,Invalid scenario. Must be in {'same', 'p3'}.)r/   r0   r3   r4   r1   r2   r  r  )rD  r=   rb  re  ri   rF  r;   r   r   ro  r  r.   r  r  r  r  r  r   )	rC   rb  r  r  rn  r3   r4   avg_coefstd_coefr   r   r   r    s>   





z'HistogramCollector.compute_distributionc                    s  |d }|d }|j }|d }|d }|d j t|| d } fddt|j D }	t||d dD ]}
||
 }t||
 d |}|| || f|	|
| < t||| }| }t|d| }t||d }|d  |7  < |d  |7  < |dk	tj
}tj|tj
d}|j | }t|D ]}|| }|| }t||| ||< q|d  t||| d 7  < tj|j tj
d}t|D ] }|| }|| }t||| }|dkr|| | |||< qt|}t|}|du s|du rtjtj d}n
tjt|| d}|||
| < q5t|}|	| }|d }|d	 }|d |k r2||d f}|d |kr?|d |f}t|d d
sIJ t|d d
sSJ |S )aF  Given a dataset, find the optimal threshold for quantizing it.
        The reference distribution is `q`, and the candidate distribution is `p`.
        `q` is a truncated version of the original distribution.
        Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
        r   r   r]   c                    s(   g | ]}t jd  dt jd  dfqS )r   r   )r   r   r   r   r   r   r$  E  s   ( z<HistogramCollector.get_entropy_threshold.<locals>.<listcomp>Nr   r   r`   r   )rl  r   r   zerosr   r  rL  deepcopyr)   r'   r   r   r   r   r-   argminr?   )rC   rn  rE  r3   r4   rD  zero_bin_indexnum_half_quantized_binkl_divergence
thresholdsr   r   r   sliced_distributionpleft_outliers_countright_outliers_countnonzerosquantized_binsnum_merged_binsindexstartendqnormdivmin_kl_divergence_idxr  r   r!  r   r   r   r  7  sf   

 
z(HistogramCollector.get_entropy_thresholdN)r   )rT   rX   rY   ra  rG   rc  rN  rg  rf  r}  rV  r  r  staticmethodr  r  r  r   r   r   r   rM  
  s    	:  .(rM  r   Fr   r   c                 C   sN  d }|t jkr3|dd}|dd}|dd}	|dd }
|dd}t| ||||||	|
|d	}na|t jkrV|d	d
}|dd
}|dd}t| ||||||d}n>|t jkry|d	d}|dd}|dd}t| ||||||d}n|t jkr|d	d}|dd}t	| |||||d}|r|
  |  |S td| )Nr   Fr   r   r   r   r   )r   r   r   r   r   r   rD  r>  rE  )r   r   rD  rE  r?  r=  r@  T)r   r   rD  r=  rF  rA  )r   rD  rF  zUnsupported calibration method )rg   rh   r  r   rw   rS  rx   rT  ry   rU  r   r   r=   )r   r   r   calibrate_methodr   extra_options
calibratorr   r   r   r   r   rD  rE  r=  rF  r   r   r   create_calibrator  sx   


	

	r  )Nr   )0r   rL  r   r  r   enumr   pathlibr   typingr   r   r   r   r   numpyr   r   r	   r
   r   r   r   quant_utilsr   r   r   rj  r    floatr   r-   r.   r\   rg   ABCMetarz   r   r   r<  rS  rT  rU  r^  rM  rh   rd   r  r   r   r   r   <module>   sl   
#4n _ ""#   

