o
    gO                     @   sF  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlmZmZ e	G dd dZe	G d	d
 d
Z			d'ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Z dd Z!d d! Z"d"d# Z#d$d% Z$e%d&krdZ&e$  dS dS )(    N)	dataclass)datetime)Path)Optional)generate_test_dataget_bert_inputsc                   @   sv   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< eed< dS )TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr r   r   ^/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/bert_perf_test.pyr   "   s   
 r   c                   @   sV   e Zd ZU eed< eed< eed< eed< eed< ee ed< ee ed< eed< d	S )
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelinput_tuning_resultsoutput_tuning_results	mask_typeN)r   r   r   r   r   r   r   r   r   r   r   r   3   s   
 r      c                 C   sL  dd l }|| |rd| vrtd |rI|dkr ddg}n,|dkr)ddg}n#|d	kr2g d
}n|dkr;ddg}n|dkrDg d}nddg}ndg}| }	||	_|jj|	_|d u rb|j	j
|	_n+|dkrl|j	j|	_n!|dkrv|j	j|	_n|dkr|j	j|	_n|dkr|j	j
|	_n||	_|d ur||	_|j| |	|d}
|r|dkrd|
 v sJ nV|dkrd|
 v sJ nI|d	krd|
 v sJ d|
 v sJ n4|dkrd|
 v sJ n'|dkrd|
 v sJ d|
 v sJ nd|
 v sJ n	d|
 v sJ |d ur$t|}|
t| W d    |
S 1 sw   Y  |
S )Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderr.   r,   cudatensorrt)TensorrtExecutionProviderr)   r,      r(   c   )	providersr0   r3   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r    r   r   r   rB   r   tuning_results_pathr7   execution_providerssess_optionssessionfr   r   r   create_session?   st   	










rQ   c                 C   s,   t jtjt jtjt jtjt jtji}||  S )N)torchfloat32npfloat16int32int64longlong)
torch_typetype_mapr   r   r   
numpy_type   s   r[   c                    s4    fdd|   D } fdd|  D }||fS )Nc                    "   i | ]\}}|t | qS r   rR   
from_numpyto.0namearraydevicer   r   
<dictcomp>      " z/create_input_output_tensors.<locals>.<dictcomp>c                    r\   r   r]   r`   rd   r   r   rf      rg   )items)inputsoutputsre   input_tensorsoutput_tensorsr   rd   r   create_input_output_tensors   s   rm   c              
   C   sx   |   }| D ]\}}|||jjdt|j|j|  q| D ]\}}|	||jjdt|j|j|  q#|S Nr   )

io_bindingrh   
bind_inputre   typer[   dtypeshapedata_ptrbind_output)sessrk   rl   ro   rb   tensorr   r   r   create_io_binding   s(   rx   c                 C   s   g }g }|j r	dnd}t|D ]I\}}| ||}	||	 i }
tt|D ]
}|	| |
|| < q&t||
|\}}t| ||}| | t	
 }| | t	
 | }|| q||fS )Nr1   cpu)r   	enumeraterunappendrangelenrm   rx   run_with_iobindingtimeitdefault_timer)rO   
all_inputsoutput_namestest_settingresultslatency_listre   _test_case_idri   resultrj   irk   rl   ro   
start_timelatencyr   r   r   %onnxruntime_inference_with_io_binding   s"   


r   c           
      C   st   t |dkr| |t| g }g }t|D ]\}}t }| ||}t | }	|| ||	 q||fS rn   )r~   r{   randomchoicerz   r   r   r|   )
rO   r   r   r   r   r   ri   r   r   r   r   r   r   onnxruntime_inference   s   
r   c                 C   s   |  }dtj|  d}|d|j d|j ddd7 }|d|j d|j d7 }|d	|j	 d
|j
 d7 }|d|j d|j d7 }|d|j d7 }|d|j 7 }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerB   r   replacer	   r
   r   r   r   r   r   r   )r    rO   r   rN   optionr   r   r   	to_string   s   r   c              	   C   s   t | j|j|j|| j|j| jd}dd | D }t| j||}||v r,t	d| d S t	d| g }|j
rMt|jD ]}	t||||\}
}|| q;nt|jD ]}	t|||\}
}|| qRt|d }t|}t|d}t|d}t|d	}t|d
}t|d}|jd|  }|||||||f||< t	dt|dt|d | jrtj| j}tj|r|}|ddd  dt   d}t	d|d|d |  }t!|d}t"#|| W d    n1 sw   Y  t	d| d S d S )N)r   rL   c                 S   s   g | ]}|j qS r   )rb   )ra   outputr   r   r   
<listcomp>  s    z run_one_test.<locals>.<listcomp>zskip duplicated test:zRunning test:  2   K   Z   _   r5   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr4   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)$rQ   r    r   r   r$   r   r%   get_outputsr   r:   r   r}   r   r   extendr   rT   rc   
statisticsmean
percentiler	   formatr&   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrH   rJ   dump)model_settingr   perf_resultsr   r   rO   r   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrP   r   r   r   run_one_test   sr   	



$r   c                 C   s,   t jt| ||||fd}|  |  d S )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processr   r   r   launch_test<  s   
r   c           	      C   s   |j d urt| ||||j  d S tjdd}tjdd}t||h}tdtd|D ]}||vr5|| q*|jdd |D ]
}t| |||| q>d S )NF)logicalTr4      )reverse)	r   r   psutil	cpu_countlistr}   minr|   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   r   r   r   run_perf_testsK  s(   

r   c                 C   s|   t | j| j| j| j\}}}td|j d|j d|j  t	|j|j|j|j
|j||||j|j| jd}t| ||| d S )NzGenerating z samples for batch_size=z sequence_length=)r'   )r   r    r!   r"   r#   r:   r   r	   r
   r   r   r   r   r   r'   r   )r   r   r   	input_idssegment_ids
input_maskr   r   r   r   run_performancec  s.   
r   c                  C   s  t  } | jddtdd | jdddtddd	 | jd
ddtdd | jddtddd | jdddtddd | jddtg dddd | jddtddd | jdddd d! | jdd" | jd#dtd$g d%d&d' | jd(ddd)d! | jdd* | jd+ddd,d! | jdd- | jd.dtd d/d | jd0d1dtd d2d | jd3dtd d4d | jd5dtd d6d | jd7dtd d8d | jd9d td:d; | jd<d td=d; | jd>d?d@tdAd; | jdBdCdddDd! | jddE | jdFdtd$dGd |  }|S )HNz--modelTzbert onnx model path)requiredrq   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rq   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rq   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r4   r(   r5   r5   zZonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 99 - enable all.)r   rq   choicesr   r   z--seed   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr(   )r   r4   r(   r      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rq   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rq   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   r   r   r   parse_arguments  s  					r   c                  C   s  t  } | jdkrtdtd| j | _| jdkr| j| _t }|	 }t
| j}t|dkr5t|dks9tdt| j| j| j| j| j| j| j| j}|D ])}t|| j| j| j| j| j| j| j| j| j| j| j| j}t d| t!||| qNt"|# ddd	 d
}t$j%&t'| jj(d)| jrdndd&dd t"t*|D | jt+, -d}t.|dddK}	t/j0|	ddd}
d }|D ]6\}}|1d}|d u rg d}|2dd |D  |
3| dd |D }|2dd |D  |
3| qW d    n	1 sw   Y  t d| d S )Nr   r4   r      z batch_size not in range [1, 128]ztest settingFc                 S   s   | d S )Nr4   r   )xr   r   r   <lambda>R  s    zmain.<locals>.<lambda>)r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-c                 S   s   g | ]}t |qS r   )r   ra   r   r   r   r   r   X  s    zmain.<locals>.<listcomp>z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)c                 S      g | ]	}| d d qS )=r   splitr   r   r   r   r   l      c                 S   s   g | ]}t |d qS )r   )r   r   r   r   r   r   o  s    c                 S   r   )r   r4   r   r   r   r   r   r   p  r   zTest summary is saved to)4r   r   maxr   samplesr   r
   r   Managerdictsetr	   r   	Exceptionr   modelr!   r"   r#   r$   r%   r&   r'   r   r   r   r   r   r   r   r   r   r:   r   sortedrh   r   r   r   r   parentr   r   r   r   strftimerH   csvwriterr   r   writerow)r   managerr   batch_size_setr   r	   r   sorted_resultssummary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluesr   r   r   main"  s~   




	
	
r  __main__)Nr(   N)'r   r  rJ   r   r   r   r   r   dataclassesr   r   pathlibr   typingr   numpyrT   r   rR   bert_test_datar   r   r   r   rQ   r[   rm   rx   r   r   r   r   r   r   r   r   r  r   __spec__r   r   r   r   <module>   sR   
Y
E #T
