o
    Ô2úh“  ã                   @   s   d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ e d	¡d
ejfdd„ƒZe d¡d
ejfdd„ƒZe d¡d
ejfdd„ƒZe d¡d
ejfdd„ƒZe d¡d
ejfdd„ƒZe d¡d
ejfdd„ƒZe d¡d
ejfdd„ƒZe d¡d
ejfd d!„ƒZe d"¡d
ejfd#d$„ƒZe d%¡d
ejfd&d'„ƒZe d(¡d
ejfd)d*„ƒZe d+¡d
ejfd,d-„ƒZe d.¡d
ejfd/d0„ƒZe d1¡d
ejfd2d3„ƒZe d4¡d
ejfd5d6„ƒZe d7¡d
ejfd8d9„ƒZe d:¡d;ejfd<d=„ƒZe d>¡d
ejfd?d@„ƒZe dA¡d
ejfdBdC„ƒZ e dD¡d
ejfdEdF„ƒZ!e dG¡d
ejfdHdI„ƒZ"e dJ¡d
ejfdKdL„ƒZ#e dM¡d
ejfdNdO„ƒZ$e dP¡d
ejfdQdR„ƒZ%e dS¡d
ejfdTdU„ƒZ&e dV¡d
ejfdWdX„ƒZ'e dY¡d
ejfdZd[„ƒZ(e d\¡d
ejfd]d^„ƒZ)e d_¡d
ejfd`da„ƒZ*e db¡d
ejfdcdd„ƒZ+dedf„ Z,e dg¡d
ejfdhdi„ƒZ-e dj¡d
ejfdkdl„ƒZ.e dm¡d
ejfdndo„ƒZ/e dp¡d
ejfdqdr„ƒZ0e ds¡d
ejfdtdu„ƒZ1e dv¡d
ejfdwdx„ƒZ2e dy¡d
ejfdzd{„ƒZ3e d|¡d
ejfd}d~„ƒZ4e d¡d
ejfd€d„ƒZ5e d‚¡d
ejfdƒd„„ƒZ6e d…¡d
ejfd†d‡„ƒZ7e dˆ¡d‰dŠ„ ƒZ8e d‹¡d
ejfdŒd„ƒZ9e dŽ¡d
ejfdd„ƒZ:e d‘¡d
ejfd’d“„ƒZ;e d”¡d
ejfd•d–„ƒZ<e d—¡d
ejfd˜d™„ƒZ=d
ejfdšd›„Z>e dœ¡d
ejfddž„ƒZ?e dŸ¡d
ejfd d¡„ƒZ@e d¢¡d
ejfd£d¤„ƒZAe d¥¡d
ejfd¦d§„ƒZBe d¨¡e d©¡d
ejfdªd«„ƒƒZCe d¬¡d
ejfd­d®„ƒZDe d¯¡d
ejfd°d±„ƒZEd²d³„ ZFe d´¡d
ejfdµd¶„ƒZGdS )·z-Gradients for operators defined in nn_ops.py.é    N)Údtypes)Úops)Ú	array_ops)Úarray_ops_stack)Ú
gen_nn_ops)Úmath_opsÚConv2DBackpropInputÚopc                 C   s    dt j|t | jd ¡| jd |  d¡|  d¡|  d¡|  d¡|  d¡|  d	¡ ¡ d
	t j|| jd |  d¡|  d¡|  d¡|  d¡|  d¡|  d	¡ ¡ d
gS )úËThe derivatives for deconvolution.

  Args:
    op: the Deconvolution op.
    grad: the tensor representing the gradient w.r.t. the output

  Returns:
    the gradients w.r.t. the input and the filter
  Né   é   Ú	dilationsÚstridesÚpaddingÚexplicit_paddingsÚuse_cudnn_on_gpuÚdata_format©r   r   r   r   r   r   )r   Úconv2d_backprop_filterr   ÚshapeÚinputsÚget_attrÚdecodeÚconv2d©r	   Úgrad© r   úX/var/www/html/chatgem/venv/lib/python3.10/site-packages/tensorflow/python/ops/nn_grad.pyÚ_Conv2DBackpropInputGrad   s.   ÷
øôr   ÚConv2DBackpropFilterc                 C   s    t jt | jd ¡|| jd |  d¡|  d¡|  d¡|  d¡|  d¡|  d¡ ¡ d		d t j| jd ||  d¡|  d¡|  d¡|  d¡|  d¡|  d¡ ¡ d	gS )
Nr   r   r   r   r   r   r   r   r   )r   Úconv2d_backprop_inputr   r   r   r   r   r   r   r   r   r   Ú_Conv2DBackpropFilterGradB   s.   ÷	øõr!   Ú"DepthwiseConv2dNativeBackpropInputc                 C   sˆ   dt j|t | jd ¡| jd |  d¡|  d¡|  d¡|  d¡|  d¡d	t j|| jd |  d¡|  d¡|  d¡|  d¡|  d¡d	gS )
r
   Nr   r   r   r   r   r   r   ©r   r   r   r   r   )r   Ú'depthwise_conv2d_native_backprop_filterr   r   r   r   Údepthwise_conv2d_nativer   r   r   r   Ú'_DepthwiseConv2dNativeBackpropInputGrad]   s*   ø	ùõr&   Ú#DepthwiseConv2dNativeBackpropFilterc                 C   sˆ   t jt | jd ¡|| jd |  d¡|  d¡|  d¡|  d¡|  d¡dd t j| jd ||  d¡|  d¡|  d¡|  d¡|  d¡dgS )	Nr   r   r   r   r   r   r   r#   )r   Ú&depthwise_conv2d_native_backprop_inputr   r   r   r   r%   r   r   r   r   Ú(_DepthwiseConv2dNativeBackpropFilterGrad~   s*   øùör)   ÚConv3Dc              
   C   sŒ   |   d¡ ¡ }t | jd | jd g¡\}}tj|| jd ||   d¡|   d¡|   d¡|dtj| jd |||   d¡|   d¡|   d¡|dgS )Nr   r   r   r   r   r   ©r   r   r   r   )r   r   r   Úshape_nr   r   Úconv3d_backprop_input_v2Úconv3d_backprop_filter_v2)r	   r   r   Úshape_0Úshape_1r   r   r   Ú_Conv3DGrad•   s*   ùù÷r1   ÚConv3DBackpropInputV2c              
   C   sz   |   d¡ ¡ }d tj|t | jd ¡| jd |   d¡|   d¡|   d¡|dtj|| jd |   d¡|   d¡|   d¡|dgS )Nr   r   r   r   r   r   r+   )r   r   r   r.   r   r   r   Úconv3d©r	   r   r   r   r   r   Ú_Conv3DBackpropInputGrad®   s(   ùúör5   ÚConv3DBackpropFilterV2c              
   C   sz   |   d¡ ¡ }tjt | jd ¡|| jd |   d¡|   d¡|   d¡|dd tj| jd ||   d¡|   d¡|   d¡|dgS )Nr   r   r   r   r   r   r+   )r   r   r   r-   r   r   r   r3   r4   r   r   r   Ú_Conv3DBackpropFilterGradÅ   s(   ùú÷r7   Ú	AvgPool3Dc              	   C   s>   t jt | jd ¡||  d¡|  d¡|  d¡|  d¡ ¡ dS ©Nr   Úksizer   r   r   )r:   r   r   r   )r   Úavg_pool3d_gradr   r   r   r   r   r   r   r   r   Ú_AvgPool3DGradÛ   s   úr<   ÚAvgPool3DGradc              	   C   s@   t  | jd ¡tj||  d¡|  d¡|  d¡|  d¡ ¡ dfS ©Nr   r:   r   r   r   ©r   )r   Ústop_gradientr   r   Ú
avg_pool3dr   r   r   r   r   r   Ú_AvgPool3DGradGradæ   s   ûÿrB   Ú	MaxPool3Dc              
   C   s@   t j| jd | jd ||  d¡|  d¡|  d¡|  d¡ ¡ dS r9   )r   Úmax_pool3d_gradr   Úoutputsr   r   r   r   r   r   Ú_MaxPool3DGradñ   s   ùrF   ÚMaxPool3DGradc                 C   ó^   t  | jd ¡t  | jd ¡tj| jd | jd ||  d¡|  d¡|  d¡|  d¡ ¡ dfS ©Nr   r   r:   r   r   r   ©r   r   )r   Ú
zeros_liker   r   Úmax_pool3d_grad_gradr   r   r   r   r   r   Ú_MaxPool3DGradGradý   ó   ùþrM   ÚMaxPool3DGradGradc                 C   rH   rI   )r   rK   r   r   rD   r   r   r   r   r   r   Ú_MaxPool3DGradGradGrad  rN   rP   ÚSoftmaxc                 C   s*   | j d }tj|| ddd}|| | S )a  The derivative of the softmax nonlinearity.

  We assume that probs is of shape [batch_size * dim]
  The formula for dsoftmax / dx = (diag(softmax) - softmax * softmax').
  This matrix is diagonal minus a rank one matrix, so it is easy to implement
  as follows:

    grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax

  Args:
     op: the Softmax op.
     grad_softmax:  the tensor representing the gradient w.r.t. the softmax
       output.

  Returns:
     gradient w.r.t the input to the softmax

  r   éÿÿÿÿT©Úkeepdims)rE   r   Ú
reduce_sum)r	   Úgrad_softmaxÚsoftmaxÚsum_channelsr   r   r   Ú_SoftmaxGrad  s   
rY   Ú
LogSoftmaxc                 C   s(   t  | jd ¡}|t j|ddd|  S )a  The gradient for log_softmax.

      log_softmax = input - log(sum(exp(input))
      dlog_softmax/dinput = diag - softmax(input)

  Args:
    op: The log softmax op.
    grad: The tensor representing the gradient w.r.t. the output.

  Returns:
    The gradients w.r.t. the input.
  r   rR   TrS   )r   ÚexprE   rU   )r	   r   rW   r   r   r   Ú_LogSoftmaxGrad2  s   r\   ÚBiasAddc                 C   s8   z|   d¡}W n ty   d}Y nw |tj||dfS )a§  Return the gradients for the 2 inputs of bias_op.

  The first input of unused_bias_op is the tensor t, and its gradient is
  just the gradient the unused_bias_op received.

  The second input of unused_bias_op is the bias vector which has one fewer
  dimension than "received_grad" (the batch dimension.)  Its gradient is the
  received gradient Summed on the batch dimension, which is the first dimension.

  Args:
    op: The BiasOp for which we need to generate gradients.
    received_grad: Tensor.  The gradients passed to the BiasOp.

  Returns:
    Two tensors, the first one for the "tensor" input of the BiasOp,
    the second one for the "bias" input of the BiasOp.
  r   N)Úout_backpropr   )r   Ú
ValueErrorr   Úbias_add_grad)r	   Úreceived_gradr   r   r   r   Ú_BiasAddGradD  s   ÿÿÿrb   ÚBiasAddGradc              	   C   sì   z|   d¡}W n ty   d}Y nw t | jd ¡}t |¡}|dkrNt t |dd… ¡|t |dd… ¡gd¡}t |dd… dg|dd… gd¡}nt t |dd… ¡|gd¡}t |dd… dggd¡}t ||¡}t ||¡S )a(  Gradient for the BiasAddGrad op.

  Args:
    op: BiasAddGrad op for which we are calculating gradients.
    received_grad: The gradients passed to the BiasAddGrad op.

  Returns:
    A single gradient Tensor for the input to BiasAddGrad (which
    is the gradient of the bias term in BiasAdd)
  r   Nr   ó   NCHWr   r   rR   )	r   r_   r   r   r   ÚconcatÚ	ones_likeÚreshapeÚtile)r	   ra   r   r   Ú
bias_shapeÚexpanded_shapeÚ
tile_multsÚexpanded_gradr   r   r   Ú_BiasAddGradGrad`  s*   ÿ
þý&ÿrm   Ú	BiasAddV1Úunused_bias_opc                 C   s$   t  t |¡d ¡}|t  ||¡fS )a³  Return the gradients for the 2 inputs of bias_op.

  The first input of unused_bias_op is the tensor t, and its gradient is
  just the gradient the unused_bias_op received.

  The second input of unused_bias_op is the bias vector which has one fewer
  dimension than "received_grad" (the batch dimension.)  Its gradient is the
  received gradient Summed on the batch dimension, which is the first dimension.

  Args:
    unused_bias_op: The BiasOp for which we need to generate gradients.
    received_grad: Tensor.  The gradients passed to the BiasOp.

  Returns:
    Two tensors, the first one for the "tensor" input of the BiasOp,
    the second one for the "bias" input of the BiasOp.
  r   )r   Úranger   ÚrankrU   )ro   ra   Úreduction_dim_tensorr   r   r   Ú_BiasAddGradV1„  s   ÿrs   ÚReluc                 C   ó   t  || jd ¡S ©Nr   )r   Ú	relu_gradrE   r   r   r   r   Ú	_ReluGradœ  ó   rx   ÚEluGradc                 C   s8   | j d }t ||¡t |dk || j d  t |¡¡fS )Nr   r   )r   r   Úelu_gradr   ÚwhererK   )r	   r   Úelu_xr   r   r   Ú_EluGradGrad¡  ó   

ÿÿr~   ÚSeluGradc                 C   s8   | j d }t ||¡t |dk || j d  t |¡¡fS )Nr   g        r   )r   r   Ú	selu_gradr   r|   rK   )r	   r   Úselu_xr   r   r   Ú_SeluGradGrad©  r   rƒ   ÚRelu6c                 C   ru   rv   )r   Ú
relu6_gradrE   r   r   r   r   Ú
_Relu6Grad±  ry   r†   Ú	Relu6Gradc                 C   ó    | j d }t ||¡t |¡fS ©Nr   )r   r   r…   r   rK   ©r	   r   Úxr   r   r   Ú_Relu6GradGrad¶  ó   
rŒ   Ú	LeakyReluc                 C   s$   | j d }|  d¡}tj|||dS )Nr   Úalpha©r   )r   r   r   Úleaky_relu_grad©r	   r   r‹   r   r   r   r   Ú_LeakyReluGrad¼  s   

r“   ÚLeakyReluGradc                 C   s.   | j d }|  d¡}tj|||dt |¡fS )Nr   r   r   )r   r   r   r‘   r   rK   r’   r   r   r   Ú_LeakyReluGradGradÃ  s   

ÿÿr•   ÚEluc                 C   ru   rv   )r   r{   rE   r   r   r   r   Ú_EluGradË  ry   r—   ÚSeluc                 C   ru   rv   )r   r   rE   r   r   r   r   Ú	_SeluGradÐ  ry   r™   ÚSoftplusc                 C   s   |t  | jd ¡ S rv   )r   Úsigmoidr   r   r   r   r   Ú_SoftplusGradÕ  s   rœ   ÚSoftplusGradc                 C   sp   | j \}}t |g¡# t ||¡}|| t | ¡d t |¡  }||fW  d   ƒ S 1 s1w   Y  d S )Ng       @)r   r   Úcontrol_dependenciesr   Úsoftplus_gradr   r[   )r	   r   Údyr‹   ÚddyÚd2xr   r   r   Ú_SoftplusGradGradÚ  s   
"$ýr£   ÚSoftsignc                 C   ru   rv   )r   Úsoftsign_gradr   r   r   r   r   Ú_SoftsignGradç  ry   r¦   ÚReluGradc                 C   rˆ   r‰   )r   r   rw   r   rK   rŠ   r   r   r   Ú_ReluGradGradì  r   r¨   c                 C   s   t  | d¡} | | S )zãMultiply after broadcasting vec to match dimensions of mat.

  Args:
    vec: A 1-D tensor of dimension [D0]
    mat: A 2-D tensor of dimension [D0, D1]

  Returns:
    A tensor of dimension [D0, D1], the result of vec * mat
  rR   )r   Úexpand_dims)ÚvecÚmatr   r   r   Ú_BroadcastMulò  s   r¬   ÚSoftmaxCrossEntropyWithLogitsc              
   C   s„   | j d }t||ƒ}| jd }|dur7t|ddƒs7t |¡}||tjt 	t 
|d¡t 
|d¡¡dd | 7 }|t|t |¡ ƒfS )z4Gradient function for SoftmaxCrossEntropyWithLogits.r   r   NÚ_is_zeros_tensorFr   ©Úaxis)rE   r¬   r   Úgetattrr   rW   r   Úsqueezer   Úmatmulr©   Úlog_softmax©r	   Ú	grad_lossÚ	grad_gradÚsoftmax_gradr   ÚlogitsrW   r   r   r   Ú"_SoftmaxCrossEntropyWithLogitsGrad  s"   



ÿ


þüürº   Ú#SparseSoftmaxCrossEntropyWithLogitsc              
   C   sv   | j d }t||ƒ}| jd }|dur7t|ddƒs7t |¡}||tjt 	t 
|d¡t 
|d¡¡dd | 7 }|dfS )z:Gradient function for SparseSoftmaxCrossEntropyWithLogits.r   r   Nr®   Fr   r¯   )rE   r¬   r   r±   r   rW   r   r²   r   r³   r©   rµ   r   r   r   Ú(_SparseSoftmaxCrossEntropyWithLogitsGrad  s"   



ÿ


þüür¼   ÚConv2Dc           
      C   sž   |   d¡}|   d¡}|   d¡}|   d¡}|   d¡}|   d¡}t | jd | jd g¡\}}	tj|| jd |||||||d		tj| jd |	|||||||d		gS )
zGradient function for Conv2D.r   r   r   r   r   r   r   r   r   )r   r   r,   r   r   r    r   )
r	   r   r   r   r   r   r   r   r/   r0   r   r   r   Ú_Conv2DGrad6  s<   





	÷
÷õr¾   ÚDepthwiseConv2dNativec                 C   s”   t jt | jd ¡| jd ||  d¡|  d¡|  d¡|  d¡|  d¡dt j| jd t | jd ¡||  d¡|  d¡|  d¡|  d¡|  d¡dgS )	Nr   r   r   r   r   r   r   r#   )r   r(   r   r   r   r   r$   r   r   r   r   Ú_DepthwiseConv2dNativeGrad_  s*   ø	øörÀ   Ú
Dilation2Dc                 C   sd   t  | jd | jd ||  d¡|  d¡|  d¡¡t  | jd | jd ||  d¡|  d¡|  d¡¡gS )Nr   r   r   Úratesr   )r   Údilation2d_backprop_inputr   r   Údilation2d_backprop_filterr   r   r   r   Ú_Dilation2DGradw  s   ýýûrÅ   ÚLRNc              	   C   sL   |   d¡}|   d¡}|   d¡}|   d¡}t || jd | jd ||||¡gS )NÚdepth_radiusÚbiasr   Úbetar   )r   r   Úlrn_gradr   rE   )r	   r   rÇ   rÈ   r   rÉ   r   r   r   Ú_LRNGrad…  s   



ÿÿrË   ÚAvgPoolc              	   C   s@   t jtj| jd tjd||  d¡|  d¡|  d¡|  d¡dS )Nr   )Úout_typer:   r   r   r   r?   )r   Úavg_pool_gradr   r   r   r   Úint32r   r   r   r   r   Ú_AvgPoolGrad‘  s   úrÐ   ÚAvgPoolGradc              	   C   s<   t  | jd ¡tj||  d¡|  d¡|  d¡|  d¡dfS r>   )r   r@   r   r   Úavg_poolr   r   r   r   r   Ú_AvgPoolGradGradœ  s   ûÿrÓ   ÚMaxPoolc                 C   sD   t j| jd | jd ||  d¡|  d¡|  d¡|  d¡|  d¡dS )Nr   r:   r   r   r   r   )r   r   r   )r   Úmax_pool_gradr   rE   r   r   r   r   r   Ú_MaxPoolGrad§  s   ørÖ   Ú	MaxPoolV2c              
   C   sJ   | j d }| j d }tj| j d | jd ||||  d¡|  d¡dd d fS )Nr   r   r   r   r   rJ   )r   r   Úmax_pool_grad_v2rE   r   ©r	   r   r:   r   r   r   r   Ú_MaxPoolGradV2´  s   

ùùrÚ   ÚMaxPoolWithArgmaxc              
   C   s>   ~t j| jd || jd |  d¡|  d¡|  d¡|  d¡dS )Nr   r   r:   r   r   Úinclude_batch_in_index)r   rÜ   )r   Úmax_pool_grad_with_argmaxr   rE   r   )r	   r   Úunused_argmax_gradr   r   r   Ú_MaxPoolGradWithArgmaxÂ  s   ùrß   ÚMaxPoolGradc                 C   óZ   t  | jd ¡t  | jd ¡tj| jd | jd ||  d¡|  d¡|  d¡|  d¡dfS rI   )r   rK   r   r   Úmax_pool_grad_gradr   r   r   r   r   Ú_MaxPoolGradGradÐ  ó   ùýrã   ÚMaxPoolGradV2c                 C   sf   | j d }| j d }t | j d ¡t | j d ¡tj| j d | j d ||||  d¡|  d¡dd d fS )Né   é   r   r   r   r   rJ   )r   r   rK   r   Úmax_pool_grad_grad_v2r   rÙ   r   r   r   Ú_MaxPoolGradGradV2á  s    

ù	óré   ÚMaxPoolGradGradc                 C   rá   rI   )r   rK   r   r   rÕ   r   r   r   r   r   Ú_MaxPoolGradGradGradö  rä   rë   ÚFractionalMaxPoolc              
   C   s2   t  | jd | jd || jd | jd |  d¡¡S )a  Returns gradient for FractionalMaxPool.

  Since FractionalMaxPool has three outputs, there are three gradients passed in
  for each of the outputs. Only the first one is useful, the other two gradients
  are empty.

  Args:
    op: The FractionalMaxPoolOp.
    grad_0: Gradient with respect to op.outputs[0]
    unused_grad_1: Gradient with respect to op.outputs[1]/row_seq. It is empty.
    unused_grad_2: Gradient with respect to op.outputs[2]/col_seq. It is empty.

  Returns:
    Input backprop for FractionalMaxPool op.
  r   r   r   Úoverlapping)r   Úfractional_max_pool_gradr   rE   r   ©r	   Úgrad_0Úunused_grad_1Úunused_grad_2r   r   r   Ú_FractionalMaxPoolGrad  s   úró   ÚFractionalAvgPoolc              	   C   s.   t  | jd  ¡ || jd | jd |  d¡¡S )a  Returns gradient for FractionalAvgPool.

  Since FractionalAvgPool has three outputs, there are three gradients passed in
  for each of the outputs. Only the first one is useful, the other two gradients
  are empty.

  Args:
    op: The FractionalAvgPoolOp.
    grad_0: Gradient with respect to op.outputs[0]
    unused_grad_1: Gradient with respect to op.outputs[1]/row_seq. It is empty.
    unused_grad_2: Gradient with respect to op.outputs[2]/col_seq. It is empty.

  Returns:
    Input backprop for FractionalAvgPool op.
  r   r   r   rí   )r   Úfractional_avg_pool_gradr   Ú	get_shaperE   r   rï   r   r   r   Ú_FractionalAvgPoolGrad$  s   þr÷   Ú BatchNormWithGlobalNormalizationc                 C   sR   t  | jd | jd | jd | jd ||  d¡|  d¡¡\}}}}}|||||fS )a$  Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization.

  We do not backprop anything for the mean and var intentionally as they are
  not being trained with backprop in the operation.

  Args:
    op: The BatchNormOp for which we need to generate gradients.
    grad: Tensor.  The gradients passed to the BatchNormOp.

  Returns:
    dx: Backprop for input, which is (grad * (g * rsqrt(v + epsilon)))
    dm: Backprop for mean, which is
        sum_over_rest(grad * g) * (-1 / rsqrt(v + epsilon))
    dv: Backprop for variance, which is
        sum_over_rest(grad * g * (x - m)) * (-1/2) * (v + epsilon) ^ (-3/2)
    db: Backprop for beta, which is grad reduced in all except the
        last dimension.
    dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon)))
  r   r   r   rç   Úvariance_epsilonÚscale_after_normalization)r   Ú)batch_norm_with_global_normalization_gradr   r   )r	   r   ÚdxÚdmÚdvÚdbÚdgr   r   r   Ú%_BatchNormWithGlobalNormalizationGrad<  s
   "þr  c              	   G   s°  | j d }|d }| j d }|  d¡}|  d¡}|  d¡}|dkr%tj}	n|dkr-tj}	ntj}	|r[|||| jd | jd |||d	œ}
|dkrN| jd
 |
d< |	di |
¤Ž\}}}}}nv| j d }| j d }|dkrzt |g d¢¡}t |g d¢¡}n|dkrŽt |g d¢¡}t |g d¢¡}|dv r”dnd}||||||||d	œ}
|dkr¬| jd
 |
d< |	di |
¤Ž\}}}}}|dkrÅt |g d¢¡}n|dkrÑt |g d¢¡}|||ddfS )aã  Return the gradients for the 3 inputs of BatchNorm.

  Args:
    op: The BatchNormOp for which we need to compute gradients.
    version: Integer indicating which version to use of the fused batch
      norm gradient.
    *grad: An argument list for tensors of gradients wrt the outputs
      with grad[0] as grad_y.

  Returns:
    grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) *
            [grad_y - mean(grad_y) - (x - mean(x)) *
            mean(grad_y * (x - mean(x))) / (variance + epsilon)]
            in training mode; grad_y * scale * rsqrt(pop_variance + epsilon)
            in freeze mode.

    grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) *
                rsqrt(variance + epsilon)) in training mode;
                sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon))
                in freeze mode.

    grad_offset: gradient for offset, which is sum(grad_y) in training mode;
                 sum(grad_y) in freeze mode.
  r   r   Úepsilonr   Úis_trainingr   ræ   rç   )Ú
y_backpropr‹   ÚscaleÚreserve_space_1Úreserve_space_2r  r   r  é   Úreserve_space_3rd   )r   r   ræ   r   s   NCDHW)r   r   ræ   rç   r   )rd   s   NHWCÚNHWCÚNDHWC)r   ræ   r   r   )r   rç   r   r   ræ   Nr   )	r   r   r   Úfused_batch_norm_grad_v3Úfused_batch_norm_grad_v2Úfused_batch_norm_gradrE   r   Ú	transpose)r	   Úversionr   r‹   Úgrad_yr  r  r   r  Úgrad_funÚargsrü   ÚdscaleÚdoffsetÚ_Úpop_meanÚpop_varÚtarget_data_formatr   r   r   Ú_BaseFusedBatchNormGradW  sh   




ø


ÿø
r  ÚFusedBatchNormc                 G   ó   t | dg|¢R Ž S rv   ©r  r   r   r   r   Ú_FusedBatchNormGrad©  ry   r  ÚFusedBatchNormV2c                 G   r  r‰   r  r   r   r   r   Ú_FusedBatchNormV2Grad®  ry   r   ÚFusedBatchNormV3c                 G   r  )Nr   r  r   r   r   r   Ú_FusedBatchNormV3Grad³  ry   r"  ÚL2Lossc                 C   s   | j d | S )zÊReturn the gradients for L2Loss.

  Args:
    op: The L2LossOp for which we need to generate gradients.
    grad: Tensor containing a single number.

  Returns:
    The gradient, which is (x * grad).
  r   )r   r   r   r   r   Ú_L2LossGrad¸  s   r$  ÚTopKÚTopKV2c           
      C   s  t  | jd ¡}t  | jd ¡}t  t |tj¡t  	|¡d ¡}t  
| jd t d|g¡¡}t  t |tj¡t  	|¡d ¡}t  |¡d }t  
|t t  t dt |tj¡| |¡d¡tj¡ dg¡}	t  
t  t  |	d¡t  
|dg¡t |¡g¡|¡t jg tjdgS )aE  Return the gradients for TopK.

  Args:
    op: The TopKOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the TopKOp.

  Returns:
    A list of two tensors, the first being the gradient w.r.t to the input and
    TopK, and the second being the gradient w.r.t. to the indices (all zero).
  r   r   rR   )Údtype)r   r   r   rE   Úgatherr   Úcastr   Úint64Úsizerg   r   Ústackr©   rp   rÏ   Ú
scatter_ndÚreduce_prodÚzeros)
r	   r   r  Úin_shapeÚ	ind_shapeÚind_lastdimÚind_2dÚ
in_lastdimÚouterdimÚindr   r   r   Ú	_TopKGradÆ  sH   þÿþþýüû

þýûr7  Ú
ApproxTopKc                    s°   ˆj d j‰ˆdg ‰t tjˆ¡}ˆj‰ˆ d¡‰ˆdk r"ˆˆ ‰‡‡‡‡‡fdd„‰ tj	t
‡ fdd„tˆƒD ƒƒˆd}t ||ˆg¡}t ||g¡}t ||ˆjd j¡S )	zÕReturn the gradients for ApproxTopK.

  Args:
    op: The ApproxTopK for which we need to generate gradients.
    grad: The gradients for backprop.

  Returns:
    Scattered gradient based on the top-k indices.
  r   Úreduction_dimensionr   c                    s\   | ˆkrt  ˆjd ˆ¡S ˆ |  }tt dˆd ¡ƒ}||| < t  t |¡|¡}t  |ˆ¡S r‰   )	r   rg   rE   ÚlistÚ	itertoolsÚrepeatr   rp   Úbroadcast_to)ÚdÚiota_lenÚ
iota_shapeÚiota)Ú	idx_shapeÚlifted_idx_shaper	   rq   Úreduction_dimr   r   ÚGetLiftedIdx  s   z)_ApproxTopKGradient.<locals>.GetLiftedIdxc                 3   s    | ]}ˆ |ƒV  qd S )Nr   )Ú.0r>  )rE  r   r   Ú	<genexpr>  s   € z&_ApproxTopKGradient.<locals>.<genexpr>r¯   )rE   r   Ú	functoolsÚreduceÚoperatorÚmulrq   r   r   re   r:  rp   rg   r-  r   )r	   r   r  Úflat_shape_lenÚ
lifted_idxÚflat_idxÚ	flat_gradr   )rE  rB  rC  r	   rq   rD  r   Ú_ApproxTopKGradientõ  s   

	ÿrP  Ú
NthElementc                 C   sf   | j d }| jd }t t t |d¡|¡|j¡}t |d¡}t t |d¡d¡}t 	||¡| dgS )a:  Return the gradients for NthElement.

  Args:
    op: The NthElementOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the NthElementOp

  Returns:
    A list of two tensors, the first being the gradient w.r.t. the input,
    the second being the gradient w.r.t. the N (None).
  r   rR   N)
r   rE   r   r)  Úequalr   r©   r'  rU   Údivide)r	   r   ÚinputÚoutputÚ
indicatorsÚnum_selectedr   r   r   Ú_NthElementGrad  s   

ÿrX  c              	   C   sx   g }t t | | jd ¡t ||jd ¡ƒD ]\}}tj||t |¡d d}| t t 	||¡dg¡¡ qt
j|ddS )a}  Replaces each segment with its mean along the last axis.

  Specifically, each value in the `inputs` tensor gets replaced by the mean
  value computed from the values that belong to the same segment.

  Args:
   inputs: A 2-tensor. Aggregation is done over dimension 1.
   segments: A 2-tensor, same shape as `input`.

  Returns:
    The result, same shape and type as `inputs`.
  r   r   )Únum_segmentsrR   r¯   )Úzipr   Úsplitr   r   Úunsorted_segment_meanÚ
reduce_maxÚappendrg   r(  r   r,  )r   ÚsegmentsÚresultÚinputs_iÚ
segments_iÚmeans_ir   r   r   Ú_MeanAggregator9  s   þÿÿrd  ÚIsotonicRegressionc                 C   s   ~| j d }t||ƒS )af  Gradient for the isotonic regression function.

  Args:
    op: The IsotonicRegression tensorflow op.
    grad_output: Tensor of incoming gradients with respect to the output.
    grad_segments: Tensor of incoming gradients with respect to the segments.

  Returns:
    A tensor, same size as `grad_output` with the gradient with respect to
    the input.
  r   )rE   rd  )r	   Úgrad_outputÚgrad_segmentsr_  r   r   r   Ú_IsotonicRegressionGradT  s   

rh  )HÚ__doc__rH  r;  rJ  Útensorflow.python.frameworkr   r   Útensorflow.python.opsr   r   r   r   ÚRegisterGradientÚ	Operationr   r!   r&   r)   r1   r5   r7   r<   rB   rF   rM   rP   rY   r\   rb   rm   rs   rx   r~   rƒ   r†   rŒ   r“   r•   r—   r™   rœ   r£   r¦   r¨   r¬   rº   r¼   r¾   rÀ   rÅ   rË   rÐ   rÓ   rÖ   rÚ   rß   rã   ré   rë   ró   r÷   r  r  r  r   r"  r$  r7  rP  rX  rd  rh  r   r   r   r   Ú<module>   s   $ 

#(


ÿÿR-(