o
    gFB                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlmZmZ eeee
ee f f ZeeZG dd dZG dd dZG d	d
 d
ZG dd deZG dd dZdS )    N)OrderedDict)AnyDictListMappingOptionalTupleUnion)InferenceSession
RunOptionsc                   @   s   e Zd ZedededefddZededefddZedefd	d
ZedefddZ	ede
jfddZedejfddZededeee
jf fddZdS )
TypeHelperort_sessionnamereturnc                 C   :   t |  D ]\}}|j|kr|j  S qtd| d)Nzinput name 
 not found)	enumerate
get_inputsr   type
ValueError)r   r   _iinput r   a/var/www/visachat/venv/lib/python3.10/site-packages/onnxruntime/transformers/io_binding_helper.pyget_input_type   s
   

zTypeHelper.get_input_typec                 C   r   )Nzoutput name r   )r   get_outputsr   r   r   )r   r   r   outputr   r   r   get_output_type   s
   

zTypeHelper.get_output_typeort_typec                 C   s6   t jt jt jt jtd}| |vrt|  d||  S N)ztensor(int64)ztensor(int32)ztensor(float)ztensor(float16)ztensor(bool) not found in map)numpylonglongintcfloat32float16boolr   )r   ort_type_to_numpy_type_mapr   r   r   ort_type_to_numpy_type!   s   z!TypeHelper.ort_type_to_numpy_typec                 C   s8   t jt jt jt jt jd}| |vrt|  d||  S r   )torchint64int32r$   r%   r&   r   )r   ort_type_to_torch_type_mapr   r   r   ort_type_to_torch_type/   s   z!TypeHelper.ort_type_to_torch_type
numpy_typec                 C   sP   t jtjt jtjt jtjt jtjt jtjttji}| |vr$t	|  d||  S Nr    )
r!   r"   r)   r*   r#   r+   r$   r%   r&   r   )r.   numpy_type_to_torch_type_mapr   r   r   numpy_type_to_torch_type=   s   z#TypeHelper.numpy_type_to_torch_type
torch_typec              
   C   sH   t jtjt jtjt jtjt jtjt jti}| |vr t	|  d||  S r/   )
r)   r*   r!   r"   r+   r#   r$   r%   r&   r   )r2   torch_type_to_numpy_type_mapr   r   r   torch_type_to_numpy_typeL   s   z#TypeHelper.torch_type_to_numpy_typec                 C   sH   i }|   D ]}t|j||j< q|  D ]}t|j||j< q|S )z:Create a mapping from input/output name to numpy data type)r   r   r(   r   r   r   )r   name_to_numpy_typer   r   r   r   r   get_io_numpy_type_mapZ   s   z TypeHelper.get_io_numpy_type_mapN)__name__
__module____qualname__staticmethodr
   strr   r   r(   r-   r!   dtyper1   r)   r4   r   r6   r   r   r   r   r      s    "r   c                   @   sZ   e Zd ZedefddZe	ddejdejdejdeej fd	d
Z	edddZ
dS )IOBindingHelperr   c                 C   sJ   i }|  D ]\}}t| |}t|}tjt|||d||< q|S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)r<   device)itemsr   r   r-   r)   emptyr!   prod)r   output_shapesr>   output_buffersr   shaper   r2   r   r   r   get_output_buffersg   s   
z"IOBindingHelper.get_output_buffersN	input_idsposition_idsattention_maskpastc              
   C   s  |du r	t | }|  }| sJ |d|jjd|d t| |	  |dur^t
|D ].\}	}
|
 s9J |
	 }|dkrE|	 }|d|	 |
jjd|d|	  t|
 | q/|dur|| shJ |d|jjd|d t| |	  |dur| sJ |d|jjd|d t| |	  |  D ].}|j}|| }t| d|jj dt|   |||jjd|| || |	  q|S )	z)Returnas IO binding object for a session.NrF   r   past_rH   rG   z device type=z shape=)r   r6   
io_bindingis_contiguous
bind_inputr>   r   listsizedata_ptrr   r   r   loggerdebugbind_output)r   rF   rG   rH   rI   rC   rB   name_to_np_typerK   ipast_irP   r   output_nameoutput_bufferr   r   r   prepare_io_bindingq   sv   

	
	
	

&	z"IOBindingHelper.prepare_io_bindingTc           
      C   sn   g }|   D ].}|j}|| }|| }|dt| |  }	|r/||	   q||	 q|S )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r   r!   rA   reshapeclonedetachappendcpu)
r   rC   rB   return_numpyort_outputsr   rW   bufferrD   copy_tensorr   r   r   "get_outputs_from_io_binding_buffer   s    z2IOBindingHelper.get_outputs_from_io_binding_bufferN)T)r7   r8   r9   r:   r
   rE   r)   Tensorr   rY   rc   r   r   r   r   r=   f   s     		Ur=   c                   @   s   e Zd ZdZd"dedejfddZdedefd	d
Z	dd Z
dedejfddZdefddZd#deeejf dedefddZed$dedededeeef fd d!ZdS )%CudaSessionzLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerFr   r>   c                 C   sr   || _ dd | j  D | _dd | j  D | _t| j | _| j  | _|| _	t
 | _t
 | _|| _i | _d S )Nc                 S      g | ]}|j qS r   r   ).0r   r   r   r   
<listcomp>       z(CudaSession.__init__.<locals>.<listcomp>c                 S   rg   r   rh   )ri   r   r   r   r   rj      rk   )r   r   input_namesr   output_namesr   r6   io_name_to_numpy_typerK   enable_cuda_graphr   input_tensorsoutput_tensorsr>   buffer_sharing)selfr   r>   ro   r   r   r   __init__   s   
zCudaSession.__init__
input_namerW   c                 C   s4   || j v sJ || jv sJ || j|< || j|< d S rd   )rl   rm   rr   )rs   ru   rW   r   r   r   set_buffer_sharing   s   
zCudaSession.set_buffer_sharingc                 C   s   | ` | `| `d S rd   )rp   rq   rK   )rs   r   r   r   __del__   s   zCudaSession.__del__r   tensorc              	   C   s   |j jd ur
|j jnd}t|jdkrdgnt|j}| j||j j|| j| ||	  || j
v rQ| j| j
| |j j|| j| ||	  || j| j
| < d S d S )Nr      )r>   indexlenrD   rN   rK   rM   r   rn   rP   rr   rS   rq   )rs   r   rx   	device_idtensor_shaper   r   r   bind_input_and_buffer_sharing   s*   
	z)CudaSession.bind_input_and_buffer_sharing
shape_dictc              
   C   sP  | j rH| D ]@\}}|| jv rG|| jv r&t| j| jt|kr"qtd| j| }tj	t|t
|dj| jd}|| j|< | || q| D ]Y\}}|| jv r|| jv rgt| j| jt|krgqL|| jv rmqL| j| }tj	t|t
|dj| jd}|| j|< | j||jj|jjdur|jjnd|t| |  qLdS )z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)r<   )r>   Nr   )ro   r?   rl   rp   tuplerD   RuntimeErrorrn   r)   r@   r   r1   tor>   r~   rm   rq   rr   rK   rS   r   rz   rN   rO   rP   )rs   r   r   rD   numpy_dtyperx   r   r   r   allocate_buffers  sF   




"



zCudaSession.allocate_buffersNT	feed_dictrun_optionssynchronizec                 C   s   |  D ]G\}}t|tjr| sJ || jv rK| jrE| j|  | ks)J | j| j	|j	ks4J |j
jdks<J | j| | q| || q|rc| j  | j| j| | j  | jS | j| j| | jS )z$Bind input tensors and run inferencecuda)r?   
isinstancer)   re   rL   rl   ro   rp   nelementr<   r>   r   copy_r~   rK   synchronize_inputsr   run_with_iobindingsynchronize_outputsrq   )rs   r   r   r   r   rx   r   r   r   infer6  s"   


zCudaSession.inferr   r|   ro   streamr   c                 C   s$   | d|d}|dkrt ||d< |S )NkSameAsRequested)r|   arena_extend_strategyro   r   user_compute_stream)r;   )r|   ro   r   optionsr   r   r   get_cuda_provider_optionsL  s   z%CudaSession.get_cuda_provider_optionsF)NT)r   )r7   r8   r9   __doc__r
   r)   r>   rt   r;   rv   rw   re   r~   	ShapeDictr   r   r   r&   r   r:   intr   r   r   r   r   r   rf      s    "**rf   c                       s   e Zd Z				ddedejdeded	ed
ede	e
eef  f fddZddedefddZdde
eejf def fddZ  ZS )
GpuBindingFr   Nr   r>   r   enable_gpu_graphgpu_graph_idr   rr   c           
         sf   t  ||| |r| D ]
\}}	| ||	 q| | || _|r(t|nd | _|| _	d | _
d S rd   )superrt   r?   rv   r   r   copydeepcopyr   r   last_run_gpu_graph_id)
rs   r   r>   r   r   r   r   rr   ru   rW   	__class__r   r   rt   \  s   


zGpuBinding.__init__disable_cuda_graph_in_runr   c                 C   s.   t  }|rdn| j}|dt| || _|S )Nr   r   )r   r   add_run_config_entryr;   r   )rs   r   r   r   r   r   r   get_run_optionss  s
   zGpuBinding.get_run_optionsr   c                    s*   |  |}| jr|dd t ||S )N'disable_synchronize_execution_providers1)r   r   r   r   r   )rs   r   r   r   r   r   r   r   ~  s   
zGpuBinding.infer)Fr   r   Nr   )r7   r8   r9   r
   r)   r>   r   r&   r   r   r   r;   rt   r   r   re   r   __classcell__r   r   r   r   r   [  s,    *r   c                
   @   sZ   e Zd ZdZddedejdedefdd	Z	
	dde	de
deeeef  defddZdS )GpuBindingManagerzA manager for I/O bindings that support multiple CUDA Graphs.
    One cuda graph is reused for same input shape. Automatically add a new cuda graph for new input shape.
    r   ry   r   r>   r   max_cuda_graphsc                 C   s(   || _ || _g | _d | _|| _|| _d S rd   )r   r>   graph_bindingsno_graph_bindingr   r   )rs   r   r>   r   r   r   r   r   rt     s   
zGpuBindingManager.__init__FNr   use_cuda_graphrr   r   c              	   C   s   | j D ]}|j|kr|  S qt| j | jks|s7| jd u r.t| j| j|| j|d| _| jS | j	| | jS t| j| j|dt| j | j|d}| j 
| |S )N)r   rr   T)r   r   r   rr   )r   r   r{   r   r   r   r   r>   r   r   r]   )rs   r   r   rr   gpu_graph_bindingr   r   r   get_binding  s.   


	zGpuBindingManager.get_binding)r   ry   )FN)r7   r8   r9   r   r
   r)   r>   r   rt   r   r&   r   r   r;   r   r   r   r   r   r   r     s    r   )r   loggingcollectionsr   typingr   r   r   r   r   r   r	   r!   r)   onnxruntimer
   r   r;   r   r   	getLoggerr7   rQ   r   r=   rf   r   r   r   r   r   r   <module>   s    $
Uq ,