o
    2h@                     @   sb  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ edejej	d!ddZeddd Zedejd"ddZedddgdejedd!ddZedejd"ddZejdddedgdejd!d dZdS )#zEOperations for clipping (gradient, weight) tensors to min/max values.    N)constant_op)dtypes)indexed_slices)ops)	array_ops)array_ops_stack)gen_array_ops)
gen_nn_ops)math_ops)deprecation)dispatch)collections_abc)	tf_exportclip_by_valuec                 C   s   t |d| ||gL}t jt| tjr| jn| dd}t||}|j	
|j	 tj|||d}|j	
|j	 t| tjrNt|| j| j}W d   |S W d   |S 1 sYw   Y  |S )aq	  Clips tensor values to a specified min and max.

  Given a tensor `t`, this operation returns a tensor of the same type and
  shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
  Any values less than `clip_value_min` are set to `clip_value_min`. Any values
  greater than `clip_value_max` are set to `clip_value_max`.

  Note: `clip_value_min` needs to be smaller or equal to `clip_value_max` for
  correct results.

  For example:

  Basic usage passes a scalar as the min and max value.

  >>> t = tf.constant([[-10., -1., 0.], [0., 2., 10.]])
  >>> t2 = tf.clip_by_value(t, clip_value_min=-1, clip_value_max=1)
  >>> t2.numpy()
  array([[-1., -1.,  0.],
         [ 0.,  1.,  1.]], dtype=float32)

  The min and max can be the same size as `t`, or broadcastable to that size.

  >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]])
  >>> clip_min = [[2],[1]]
  >>> t3 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100)
  >>> t3.numpy()
  array([[ 2.,  2., 10.],
         [ 1.,  1., 10.]], dtype=float32)

  Broadcasting fails, intentionally, if you would expand the dimensions of `t`

  >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]])
  >>> clip_min = [[[2, 1]]] # Has a third axis
  >>> t4 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100)
  Traceback (most recent call last):
  ...
  InvalidArgumentError: Incompatible shapes: [2,3] vs. [1,1,2]

  It throws a `TypeError` if you try to clip an `int` to a `float` value
  (`tf.cast` the input to `float` first).

  >>> t = tf.constant([[1, 2], [3, 4]], dtype=tf.int32)
  >>> t5 = tf.clip_by_value(t, clip_value_min=-3.1, clip_value_max=3.1)
  Traceback (most recent call last):
  ...
  TypeError: Cannot convert ...


  Args:
    t: A `Tensor` or `IndexedSlices`.
    clip_value_min: The minimum value to clip to. A scalar `Tensor` or one that
      is broadcastable to the shape of `t`.
    clip_value_max: The maximum value to clip to. A scalar `Tensor` or one that
      is broadcastable to the shape of `t`.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor` or `IndexedSlices`.

  Raises:
    `tf.errors.InvalidArgumentError`: If the clip tensors would trigger array
      broadcasting that would make the returned tensor larger than the input.
    TypeError: If dtype of the input is `int32` and dtype of
      the `clip_value_min` or `clip_value_max` is `float32`
  r   tnameN)r   
name_scopeconvert_to_tensor
isinstancer   IndexedSlicesvaluesr
   minimumshapeassert_is_compatible_withmaximumindicesdense_shape)r   clip_value_minclip_value_maxr   r   t_mint_max r"   Y/var/www/html/chatgem/venv/lib/python3.10/site-packages/tensorflow/python/ops/clip_ops.pyr   "   s*   F

ClipByValuec                 C   s   | j d }| j d }| j d }|j}t|}t|}t|}t|}	t|	|}
t||}t||}t	||\}}t	||\}}t
t|||
|}t
|||
}t
|||
}tt|||}tt|||}|||fS )zReturns grad of clip_by_value.r         )inputsdtyper   r   zerosr
   lessgreaterr   broadcast_gradient_argswhere
logical_orreshape
reduce_sum)opgradxyzgdtypesxsysz	gradshaper)   xymaskxzmask_ryrzxgradygradzgradgygzr"   r"   r#   _clip_by_value_grad   s&   







rE   clip_by_normc                 C   s*  t |d| |g}t jt| tjr| jn| dd}t|r&|dk r%d}nt	j
t	|d|jd}t	j|| |dd}|dk}t||t|}t|t	||}|| }	|j|	j tj|	t	|| |d}
t| tjrt|
| j| jW  d   S |
W  d   S 1 sw   Y  dS )	a  Clips tensor values to a maximum L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its L2-norm is less than or equal to `clip_norm`,
  along the dimensions given in `axes`. Specifically, in the default case
  where all dimensions are used for calculation, if the L2-norm of `t` is
  already less than or equal to `clip_norm`, then `t` is not modified. If
  the L2-norm is greater than `clip_norm`, then this operation returns a
  tensor of the same type and shape as `t` with its values set to:

  `t * clip_norm / l2norm(t)`

  In this case, the L2-norm of the output tensor is `clip_norm`.

  As another example, if `t` is a matrix and `axes == [1]`, then each row
  of the output will have L2-norm less than or equal to `clip_norm`. If
  `axes == [0]` instead, each column of the output will be clipped.

  Code example:

  >>> some_nums = tf.constant([[1, 2, 3, 4, 5]], dtype=tf.float32)
  >>> tf.clip_by_norm(some_nums, 2.0).numpy()
  array([[0.26967996, 0.5393599 , 0.80903983, 1.0787199 , 1.3483998 ]],
        dtype=float32)

  This operation is typically used to clip gradients before applying them with
  an optimizer.  Most gradient data is a collection of different shaped tensors
  for different parts of the model.  Thus, this is a common usage:

  ```
  # Get your gradients after training
  loss_value, grads = grad(model, features, labels)

  # Apply some clipping
  grads = [tf.clip_by_norm(g, norm)
               for g in grads]

  # Continue on with training
  optimizer.apply_gradients(grads)
  ```

  Args:
    t: A `Tensor` or `IndexedSlices`.  This must be a floating point type.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value, also
      floating point.
      Note: If a negative clip_norm is provided, it will be treated as zero.
    axes: A 1-D (vector) `Tensor` of type int32 containing the dimensions to use
      for computing the L2-norm. If `None` (the default), uses all dimensions.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor` or `IndexedSlices`.

  Raises:
    ValueError: If the clip_norm tensor is not a 0-D scalar tensor.
    TypeError: If dtype of the input is not a floating point or
      complex type.
  rF   r   r   r   r(   T)keepdimsN)r   r   r   r   r   r   r   npisscalarr
   castr   r(   r0   r   r-   	ones_likesqrtr   r   identityr   r   )r   	clip_normaxesr   r   l2sumpred
l2sum_safel2normintermediatevalues_clipr"   r"   r#   rF      s4   =
$zlinalg.global_normglobal_norm)v1c              
   C   s   t | tjrt | trtdt|  dt| } t|d| Q}dd t	| D }g }|D ]#}|durQt
| |t| W d   n1 sLw   Y  q.tt|}tj|tjd|jd dd	}W d   |S 1 stw   Y  |S )
aD  Computes the global norm of multiple tensors.

  Given a tuple or list of tensors `t_list`, this operation returns the
  global norm of the elements in all tensors in `t_list`. The global norm is
  computed as:

  `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`

  Any entries in `t_list` that are of type None are ignored.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    name: A name for the operation (optional).

  Returns:
    A 0-D (scalar) `Tensor` of type `float`.

  Raises:
    TypeError: If `t_list` is not a sequence.
  3`t_list` should be a sequence of tensors. Received .rW   c                 S   @   g | ]\}}|d urt jt|tjr|jn|d| dn|qS Nzt_%dr   r   r   r   r   r   r   .0ir   r"   r"   r#   
<listcomp>      zglobal_norm.<locals>.<listcomp>Ng       @rG   r   )r   r   Sequencestr	TypeErrortypelistr   r   	enumeratecolocate_withappendr	   l2_lossr
   r0   r   stackrM   r   constantr(   )t_listr   r   half_squared_normsvhalf_squared_normnormr"   r"   r#   rW      s>   

clip_by_global_normc                 C   sZ  t | tjrt | trtdt|  dt| } |du r"t| |}t	|d| |g u}|t
d| tjd|jd|  }|||  }dd t| D }g }t|D ]8\}}	|	du rb|d qTt|	 |tj|	t
||	j d	||f d
 W d   n1 sw   Y  qTdd t|| D }
W d   |
|fS 1 sw   Y  |
|fS )a  Clips values of multiple tensors by the ratio of the sum of their norms.

  Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
  this operation returns a list of clipped tensors `list_clipped`
  and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
  if you've already computed the global norm for `t_list`, you can specify
  the global norm with `use_norm`.

  To perform the clipping, the values `t_list[i]` are set to:

      t_list[i] * clip_norm / max(global_norm, clip_norm)

  where:

      global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))

  If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
  otherwise they're all shrunk by the global ratio.

  If `global_norm == infinity` then the entries in `t_list` are all set to `NaN`
  to signal that an error occurred.

  Any of the entries of `t_list` that are of type `None` are ignored.

  This is the correct way to perform gradient clipping (Pascanu et al., 2012).

  However, it is slower than `clip_by_norm()` because all the parameters must be
  ready before the clipping operation can be performed.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
    use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
      norm to use. If not provided, `global_norm()` is used to compute the norm.
    name: A name for the operation (optional).

  Returns:
    list_clipped: A list of `Tensors` of the same type as `list_t`.
    global_norm: A 0-D (scalar) `Tensor` representing the global norm.

  Raises:
    TypeError: If `t_list` is not a sequence.

  References:
    On the difficulty of training Recurrent Neural Networks:
      [Pascanu et al., 2012](http://proceedings.mlr.press/v28/pascanu13.html)
      ([pdf](http://proceedings.mlr.press/v28/pascanu13.pdf))
  rY   rZ   Nrs         ?rG   c                 S   r[   r\   r]   r^   r"   r"   r#   ra   o  rb   z'clip_by_global_norm.<locals>.<listcomp>z%s_%dr   c                 S   s2   g | ]\}}t |tjrt||j|jn|qS r"   )r   r   r   r   r   )r_   c_vr   r"   r"   r#   ra     s    
)r   r   rc   rd   re   rf   rg   rW   r   r   r
   r   r   rm   r(   rh   rj   ri   r   rN   rK   zip)rn   rO   use_normr   scale_for_finitescaler   values_clippedr`   rp   list_clippedr"   r"   r#   rs   *  sP   3


##zclip_by_average_norm is deprecated in TensorFlow 2.0. Please use clip_by_norm(t, clip_norm * tf.cast(tf.size(t), tf.float32), name) instead.)dateinstructionsclip_by_average_normc                 C   s   t |d| |gA}t j| dd} tt| tj}t	t
| |  tt| }tj| | t|| td|  |d}W d   |S 1 sMw   Y  |S )a]  Clips tensor values to a maximum average L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its average L2-norm is less than or equal to
  `clip_norm`. Specifically, if the average L2-norm is already less than or
  equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
  greater than `clip_norm`, then this operation returns a tensor of the same
  type and shape as `t` with its values set to:

  `t * clip_norm / l2norm_avg(t)`

  In this case, the average L2-norm of the output tensor is `clip_norm`.

  This operation is typically used to clip gradients before applying them with
  an optimizer.

  Args:
    t: A `Tensor`.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  r~   r   r   rt   N)r   r   r   r
   rK   r   sizer   float32rsqrtr0   rangerankrN   r   r   rm   )r   rO   r   	n_element
l2norm_invtclipr"   r"   r#   r~     s     

)N)NN) __doc__numpyrI   tensorflow.python.frameworkr   r   r   r   tensorflow.python.opsr   r   r   r	   r
   tensorflow.python.utilr   r   tensorflow.python.util.compatr    tensorflow.python.util.tf_exportr   register_unary_elementwise_apiadd_dispatch_supportr   RegisterGradientrE   rF   deprecated_endpointsrW   rs   
deprecatedr~   r"   r"   r"   r#   <module>   sN   [
[2_
