-
Notifications
You must be signed in to change notification settings - Fork 0
/
lr_func.py
153 lines (141 loc) · 7.04 KB
/
lr_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
from tensorflow.python.eager import context
import logging
logger = logging.getLogger(__name__)
def get_learning_rate(batch,config):
if 'learning_rate' in config:
if config['learning_rate']['name'] in globals():
logger.info('using loss name %s',config['learning_rate']['name'])
return globals()[config['learning_rate']['name']](batch,config)
else:
raise Exception('failed to find learning rate %s in globals %s' % (config['learning_rate']['name'],globals()))
elif 'optimizer' in config and 'learning_rate' in config['optimizer']:
return config['optimizer']['learning_rate']
else:
raise Exception('failed to find learning rate function or value defined')
# From: https://github.com/charlesq34/pointnet
def exponential_decay(global_step,config):
learning_rate = tf.compat.v1.train.exponential_decay(
config['optimizer']['lr'], # Base learning rate.
batch, # Current index into the dataset.
config['optimizer']['step_size'], # Decay step.
config['optimizer']['decay'], # Decay rate.
staircase=True)
learning_rate = tf.maximum(learning_rate, config['optimizer']['lr_min']) # CLIP THE LEARNING RATE!
return learning_rate
def cyclic_learning_rate(global_step, config):
# taken from
# https://github.com/mhmoodlan/cyclic-learning-rate/blob/master/clr.py
"""Applies cyclic learning rate (CLR).
From the paper:
Smith, Leslie N. "Cyclical learning
rates for training neural networks." 2017.
[https://arxiv.org/pdf/1506.01186.pdf]
This method lets the learning rate cyclically
vary between reasonable boundary values
achieving improved classification accuracy and
often in fewer iterations.
This code varies the learning rate linearly between the
minimum (learning_rate) and the maximum (max_lr).
It returns the cyclic learning rate. It is computed as:
```python
cycle = floor( 1 + global_step / ( 2 * step_size ) )
x = abs( global_step / step_size – 2 * cycle + 1 )
clr = learning_rate + ( max_lr – learning_rate ) * max( 0 , 1 - x )
```
Polices:
'triangular':
Default, linearly increasing then linearly decreasing the
learning rate at each cycle.
'triangular2':
The same as the triangular policy except the learning
rate difference is cut in half at the end of each cycle.
This means the learning rate difference drops after each cycle.
'exp_range':
The learning rate varies between the minimum and maximum
boundaries and each boundary value declines by an exponential
factor of: gamma^global_step.
Example: 'triangular2' mode cyclic learning rate.
'''python
...
global_step = tf.Variable(0, trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=
clr.cyclic_learning_rate(global_step=global_step, mode='triangular2'))
train_op = optimizer.minimize(loss_op, global_step=global_step)
...
with tf.Session() as sess:
sess.run(init)
for step in range(1, num_steps+1):
assign_op = global_step.assign(step)
sess.run(assign_op)
...
'''
Args:
global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
Global step to use for the cyclic computation. Must not be negative.
learning_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The initial learning rate which is the lower bound
of the cycle (default = 0.1).
max_lr: A scalar. The maximum learning rate boundary.
step_size: A scalar. The number of iterations in half a cycle.
The paper suggests step_size = 2-8 x training iterations in epoch.
gamma: constant in 'exp_range' mode:
gamma**(global_step)
mode: one of {triangular, triangular2, exp_range}.
Default 'triangular'.
Values correspond to policies detailed above.
name: String. Optional name of the operation. Defaults to
'CyclicLearningRate'.
Returns:
A scalar `Tensor` of the same type as `learning_rate`. The cyclic
learning rate.
Raises:
ValueError: if `global_step` is not supplied.
@compatibility(eager)
When eager execution is enabled, this function returns
a function which in turn returns the decayed learning
rate Tensor. This can be useful for changing the learning
rate value across different invocations of optimizer functions.
@end_compatibility
"""
config = config['learning_rate']['cyclic_learning_rate']
learning_rate = config['learning_rate']
max_lr = config['max_lr']
step_size = config['step_size']
gamma = config['gamma']
mode = config['mode']
name = None
if global_step is None:
raise ValueError("global_step is required for cyclic_learning_rate.")
with ops.name_scope(name, "CyclicLearningRate",
[learning_rate, global_step]) as name:
learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
dtype = learning_rate.dtype
global_step = math_ops.cast(global_step, dtype)
step_size = math_ops.cast(step_size, dtype)
def cyclic_lr():
"""Helper to recompute learning rate; most helpful in eager-mode."""
# computing: cycle = floor( 1 + global_step / ( 2 * step_size ) )
double_step = math_ops.multiply(2., step_size)
global_div_double_step = math_ops.divide(global_step, double_step)
cycle = math_ops.floor(math_ops.add(1., global_div_double_step))
# computing: x = abs( global_step / step_size – 2 * cycle + 1 )
double_cycle = math_ops.multiply(2., cycle)
global_div_step = math_ops.divide(global_step, step_size)
tmp = math_ops.subtract(global_div_step, double_cycle)
x = math_ops.abs(math_ops.add(1., tmp))
# computing: clr = learning_rate + ( max_lr – learning_rate ) * max( 0, 1 - x )
a1 = math_ops.maximum(0., math_ops.subtract(1., x))
a2 = math_ops.subtract(max_lr, learning_rate)
clr = math_ops.multiply(a1, a2)
if mode == 'triangular2':
clr = math_ops.divide(clr, math_ops.cast(math_ops.pow(2, math_ops.cast(
cycle - 1, tf.int32)), tf.float32))
if mode == 'exp_range':
clr = math_ops.multiply(math_ops.pow(gamma, global_step), clr)
return math_ops.add(clr, learning_rate, name=name)
if not context.executing_eagerly():
cyclic_lr = cyclic_lr()
return cyclic_lr