From 4ccd6ee1a7fd7f019e1c7acb73f604f1731f8375 Mon Sep 17 00:00:00 2001 From: Paul Kruse Date: Wed, 30 Oct 2019 18:31:39 +0100 Subject: [PATCH 1/9] Added OneCycleLR scheduler to delira --- delira/training/callbacks/__init__.py | 2 + .../training/callbacks/pytorch_schedulers.py | 92 ++++++++++++++++++- 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/delira/training/callbacks/__init__.py b/delira/training/callbacks/__init__.py index 5ab2b9e8..ddadf6fc 100644 --- a/delira/training/callbacks/__init__.py +++ b/delira/training/callbacks/__init__.py @@ -20,3 +20,5 @@ ReduceLROnPlateauCallback as ReduceLROnPlateauCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import StepLRCallback \ as StepLRCallbackPyTorch + from delira.training.callbacks.pytorch_schedulers import OneCycleLRCallback \ + as OneCycleLRCallbackPyTorch \ No newline at end of file diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index 05e1164c..1c67a08b 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -3,7 +3,8 @@ if 'TORCH' in get_backends(): from torch.optim.lr_scheduler import ReduceLROnPlateau, \ - CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR + CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR, OneCycleLR + class DefaultPyTorchSchedulerCallback(AbstractCallback): """ @@ -47,6 +48,95 @@ def at_epoch_end(self, trainer, **kwargs): self.scheduler.step(epoch=kwargs.get("curr_epoch", None)) return {} + class OneCycleLRCallback(DefaultPyTorchSchedulerCallback): + """ + Wraps PyTorch's `OneCycleLR` Scheduler as Callback + + """ + + def __init__(self, optimizer, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, pct_start=0.3, + anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, + final_div_factor=10000.0, last_epoch=-1): + """ + + Parameters + ---------- + optimizer (Optimizer): Wrapped optimizer. + max_lr (float or list): Upper learning rate boundaries in the cycle + for each parameter group. + total_steps (int): The total number of steps in the cycle. Note that + if a value is provided here, then it must be inferred by providing + a value for epochs and steps_per_epoch. + Default: None + epochs (int): The number of epochs to train for. This is used along + with steps_per_epoch in order to infer the total number of steps in the cycle + if a value for total_steps is not provided. + Default: None + steps_per_epoch (int): The number of steps per epoch to train for. This is + used along with epochs in order to infer the total number of steps in the + cycle if a value for total_steps is not provided. + Default: None + pct_start (float): The percentage of the cycle (in number of steps) spent + increasing the learning rate. + Default: 0.3 + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy. + Default: 'cos' + cycle_momentum (bool): If ``True``, momentum is cycled inversely + to learning rate between 'base_momentum' and 'max_momentum'. + Default: True + base_momentum (float or list): Lower momentum boundaries in the cycle + for each parameter group. Note that momentum is cycled inversely + to learning rate; at the peak of a cycle, momentum is + 'base_momentum' and learning rate is 'max_lr'. + Default: 0.85 + max_momentum (float or list): Upper momentum boundaries in the cycle + for each parameter group. Functionally, + it defines the cycle amplitude (max_momentum - base_momentum). + Note that momentum is cycled inversely + to learning rate; at the start of a cycle, momentum is 'max_momentum' + and learning rate is 'base_lr' + Default: 0.95 + div_factor (float): Determines the initial learning rate via + initial_lr = max_lr/div_factor + Default: 25 + final_div_factor (float): Determines the minimum learning rate via + min_lr = initial_lr/final_div_factor + Default: 1e4 + last_epoch (int): The index of the last batch. This parameter is used when + resuming a training job. Since `step()` should be invoked after each + batch instead of after each epoch, this number represents the total + number of *batches* computed, not the total number of epochs computed. + When last_epoch=-1, the schedule is started from the beginning. + Default: -1 + """ + super().__init__() + self.scheduler = OneCycleLR(optimizer, max_lr, total_steps, epochs, steps_per_epoch, pct_start, + anneal_strategy, cycle_momentum, base_momentum, max_momentum, div_factor, + final_div_factor, last_epoch) + + def at_iter_begin(self, trainer, + **kwargs): + """ + Executes a single scheduling step + + Parameters + ---------- + trainer : :class:`PyTorchNetworkTrainer` + the trainer class, which can be changed + kwargs : + additional keyword arguments + + Returns + ------- + :class:`PyTorchNetworkTrainer` + modified trainer + + """ + self.scheduler.step() + + return {} + class ReduceLROnPlateauCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `ReduceLROnPlateau` Scheduler as Callback From 6606aaa1eaae0ee0a7dd7ded04c7d718f520c1ca Mon Sep 17 00:00:00 2001 From: Paul Kruse Date: Thu, 31 Oct 2019 09:18:05 +0100 Subject: [PATCH 2/9] Overwrote at_epoch_end in onecycle scheduler so that it does not get reset every epoch --- delira/training/callbacks/pytorch_schedulers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index 1c67a08b..e126a539 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -137,6 +137,9 @@ def at_iter_begin(self, trainer, return {} + def at_epoch_end(self, trainer, **kwargs): + return {} + class ReduceLROnPlateauCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `ReduceLROnPlateau` Scheduler as Callback From 8c88179e39c2779fbdde9b0b7b13937c9218cc5c Mon Sep 17 00:00:00 2001 From: Paul Kruse Date: Thu, 31 Oct 2019 10:09:15 +0100 Subject: [PATCH 3/9] The oneylce scheduler should only make steps while training --- delira/training/callbacks/pytorch_schedulers.py | 5 +++-- delira/training/predictor.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index e126a539..0b030d1c 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -115,7 +115,7 @@ def __init__(self, optimizer, max_lr, total_steps=None, epochs=None, steps_per_e anneal_strategy, cycle_momentum, base_momentum, max_momentum, div_factor, final_div_factor, last_epoch) - def at_iter_begin(self, trainer, + def at_iter_begin(self, trainer, train, **kwargs): """ Executes a single scheduling step @@ -133,7 +133,8 @@ def at_iter_begin(self, trainer, modified trainer """ - self.scheduler.step() + if train: + self.scheduler.step() return {} diff --git a/delira/training/predictor.py b/delira/training/predictor.py index 84436cfa..da0cba2f 100644 --- a/delira/training/predictor.py +++ b/delira/training/predictor.py @@ -275,7 +275,7 @@ def predict_data_mgr( batch_list = [] for i, batch in iterable: - self._at_iter_begin(iter_num=i) + Predictor._at_iter_begin(self, iter_num=i) if not batch_list and (n_batches - i) < batchsize: batchsize = n_batches - i From 682cb389bdb36b576b6e4e5419099fd8db0fe23b Mon Sep 17 00:00:00 2001 From: Travis AutoPEP8 Fixes Date: Mon, 2 Dec 2019 17:38:55 +0000 Subject: [PATCH 4/9] PEP-8 Auto-Fix --- .../training/callbacks/pytorch_schedulers.py | 40 ++++++++++++++----- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index 0b030d1c..82cf8fdd 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -3,8 +3,7 @@ if 'TORCH' in get_backends(): from torch.optim.lr_scheduler import ReduceLROnPlateau, \ - CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR, OneCycleLR - + CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR, OneCycleLR class DefaultPyTorchSchedulerCallback(AbstractCallback): """ @@ -54,9 +53,21 @@ class OneCycleLRCallback(DefaultPyTorchSchedulerCallback): """ - def __init__(self, optimizer, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, pct_start=0.3, - anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, - final_div_factor=10000.0, last_epoch=-1): + def __init__( + self, + optimizer, + max_lr, + total_steps=None, + epochs=None, + steps_per_epoch=None, + pct_start=0.3, + anneal_strategy='cos', + cycle_momentum=True, + base_momentum=0.85, + max_momentum=0.95, + div_factor=25.0, + final_div_factor=10000.0, + last_epoch=-1): """ Parameters @@ -111,12 +122,23 @@ def __init__(self, optimizer, max_lr, total_steps=None, epochs=None, steps_per_e Default: -1 """ super().__init__() - self.scheduler = OneCycleLR(optimizer, max_lr, total_steps, epochs, steps_per_epoch, pct_start, - anneal_strategy, cycle_momentum, base_momentum, max_momentum, div_factor, - final_div_factor, last_epoch) + self.scheduler = OneCycleLR( + optimizer, + max_lr, + total_steps, + epochs, + steps_per_epoch, + pct_start, + anneal_strategy, + cycle_momentum, + base_momentum, + max_momentum, + div_factor, + final_div_factor, + last_epoch) def at_iter_begin(self, trainer, train, - **kwargs): + **kwargs): """ Executes a single scheduling step From b13677475b28bb01390f03ea0660fffce40b8a9e Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Tue, 3 Dec 2019 09:56:39 +0100 Subject: [PATCH 5/9] PEP8-Fixes --- .../training/callbacks/pytorch_schedulers.py | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index 82cf8fdd..f54a8501 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -3,7 +3,8 @@ if 'TORCH' in get_backends(): from torch.optim.lr_scheduler import ReduceLROnPlateau, \ - CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR, OneCycleLR + CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR, \ + OneCycleLR class DefaultPyTorchSchedulerCallback(AbstractCallback): """ @@ -75,20 +76,21 @@ def __init__( optimizer (Optimizer): Wrapped optimizer. max_lr (float or list): Upper learning rate boundaries in the cycle for each parameter group. - total_steps (int): The total number of steps in the cycle. Note that - if a value is provided here, then it must be inferred by providing - a value for epochs and steps_per_epoch. + total_steps (int): The total number of steps in the cycle. Note + that if a value is provided here, then it must be inferred by + providing a value for epochs and steps_per_epoch. Default: None epochs (int): The number of epochs to train for. This is used along - with steps_per_epoch in order to infer the total number of steps in the cycle - if a value for total_steps is not provided. + with steps_per_epoch in order to infer the total number of steps + in the cycle if a value for total_steps is not provided. Default: None - steps_per_epoch (int): The number of steps per epoch to train for. This is - used along with epochs in order to infer the total number of steps in the - cycle if a value for total_steps is not provided. + steps_per_epoch (int): The number of steps per epoch to train for. + This is used along with epochs in order to infer the total + number of steps in the cycle if a value for total_steps is + not provided. Default: None - pct_start (float): The percentage of the cycle (in number of steps) spent - increasing the learning rate. + pct_start (float): The percentage of the cycle (in number of steps) + spent increasing the learning rate. Default: 0.3 anneal_strategy (str): {'cos', 'linear'} Specifies the annealing strategy. @@ -96,17 +98,17 @@ def __init__( cycle_momentum (bool): If ``True``, momentum is cycled inversely to learning rate between 'base_momentum' and 'max_momentum'. Default: True - base_momentum (float or list): Lower momentum boundaries in the cycle - for each parameter group. Note that momentum is cycled inversely - to learning rate; at the peak of a cycle, momentum is + base_momentum (float or list): Lower momentum boundaries in the + cycle for each parameter group. Note that momentum is cycled + inversely to learning rate; at the peak of a cycle, momentum is 'base_momentum' and learning rate is 'max_lr'. Default: 0.85 - max_momentum (float or list): Upper momentum boundaries in the cycle - for each parameter group. Functionally, + max_momentum (float or list): Upper momentum boundaries in the + cycle for each parameter group. Functionally, it defines the cycle amplitude (max_momentum - base_momentum). Note that momentum is cycled inversely - to learning rate; at the start of a cycle, momentum is 'max_momentum' - and learning rate is 'base_lr' + to learning rate; at the start of a cycle, momentum is + 'max_momentum' and learning rate is 'base_lr' Default: 0.95 div_factor (float): Determines the initial learning rate via initial_lr = max_lr/div_factor @@ -114,11 +116,13 @@ def __init__( final_div_factor (float): Determines the minimum learning rate via min_lr = initial_lr/final_div_factor Default: 1e4 - last_epoch (int): The index of the last batch. This parameter is used when - resuming a training job. Since `step()` should be invoked after each - batch instead of after each epoch, this number represents the total - number of *batches* computed, not the total number of epochs computed. - When last_epoch=-1, the schedule is started from the beginning. + last_epoch (int): The index of the last batch. This parameter is + used when resuming a training job. Since `step()` should be + invoked after each batch instead of after each epoch, this + number represents the total number of *batches* computed, + not the total number of epochs computed. + When last_epoch=-1, the schedule is started from the + beginning. Default: -1 """ super().__init__() From 7c403b5b2d57c900f0a6605a53a55a9512c25183 Mon Sep 17 00:00:00 2001 From: Travis AutoPEP8 Fixes Date: Tue, 3 Dec 2019 09:30:04 +0000 Subject: [PATCH 6/9] PEP-8 Auto-Fix --- .../training/callbacks/pytorch_schedulers.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index f54a8501..1b9a7136 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -76,20 +76,20 @@ def __init__( optimizer (Optimizer): Wrapped optimizer. max_lr (float or list): Upper learning rate boundaries in the cycle for each parameter group. - total_steps (int): The total number of steps in the cycle. Note - that if a value is provided here, then it must be inferred by + total_steps (int): The total number of steps in the cycle. Note + that if a value is provided here, then it must be inferred by providing a value for epochs and steps_per_epoch. Default: None epochs (int): The number of epochs to train for. This is used along with steps_per_epoch in order to infer the total number of steps in the cycle if a value for total_steps is not provided. Default: None - steps_per_epoch (int): The number of steps per epoch to train for. - This is used along with epochs in order to infer the total - number of steps in the cycle if a value for total_steps is + steps_per_epoch (int): The number of steps per epoch to train for. + This is used along with epochs in order to infer the total + number of steps in the cycle if a value for total_steps is not provided. Default: None - pct_start (float): The percentage of the cycle (in number of steps) + pct_start (float): The percentage of the cycle (in number of steps) spent increasing the learning rate. Default: 0.3 anneal_strategy (str): {'cos', 'linear'} @@ -98,16 +98,16 @@ def __init__( cycle_momentum (bool): If ``True``, momentum is cycled inversely to learning rate between 'base_momentum' and 'max_momentum'. Default: True - base_momentum (float or list): Lower momentum boundaries in the - cycle for each parameter group. Note that momentum is cycled + base_momentum (float or list): Lower momentum boundaries in the + cycle for each parameter group. Note that momentum is cycled inversely to learning rate; at the peak of a cycle, momentum is 'base_momentum' and learning rate is 'max_lr'. Default: 0.85 - max_momentum (float or list): Upper momentum boundaries in the + max_momentum (float or list): Upper momentum boundaries in the cycle for each parameter group. Functionally, it defines the cycle amplitude (max_momentum - base_momentum). Note that momentum is cycled inversely - to learning rate; at the start of a cycle, momentum is + to learning rate; at the start of a cycle, momentum is 'max_momentum' and learning rate is 'base_lr' Default: 0.95 div_factor (float): Determines the initial learning rate via @@ -116,12 +116,12 @@ def __init__( final_div_factor (float): Determines the minimum learning rate via min_lr = initial_lr/final_div_factor Default: 1e4 - last_epoch (int): The index of the last batch. This parameter is - used when resuming a training job. Since `step()` should be - invoked after each batch instead of after each epoch, this - number represents the total number of *batches* computed, - not the total number of epochs computed. - When last_epoch=-1, the schedule is started from the + last_epoch (int): The index of the last batch. This parameter is + used when resuming a training job. Since `step()` should be + invoked after each batch instead of after each epoch, this + number represents the total number of *batches* computed, + not the total number of epochs computed. + When last_epoch=-1, the schedule is started from the beginning. Default: -1 """ From a9c9c1dd010ef5de1c0f16a8243e046fe9236657 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Tue, 3 Dec 2019 10:50:13 +0100 Subject: [PATCH 7/9] Pep8 --- delira/training/callbacks/pytorch_schedulers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index 1b9a7136..e9b71bde 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -81,8 +81,8 @@ def __init__( providing a value for epochs and steps_per_epoch. Default: None epochs (int): The number of epochs to train for. This is used along - with steps_per_epoch in order to infer the total number of steps - in the cycle if a value for total_steps is not provided. + with steps_per_epoch in order to infer the total number of + steps in the cycle if a value for total_steps is not provided. Default: None steps_per_epoch (int): The number of steps per epoch to train for. This is used along with epochs in order to infer the total From fbc5bab2d86d6c682423ee9fe2bb953ceda8d86c Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Tue, 3 Dec 2019 10:51:00 +0100 Subject: [PATCH 8/9] Pep8 --- delira/training/callbacks/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/delira/training/callbacks/__init__.py b/delira/training/callbacks/__init__.py index ddadf6fc..1e2d9ee8 100644 --- a/delira/training/callbacks/__init__.py +++ b/delira/training/callbacks/__init__.py @@ -20,5 +20,6 @@ ReduceLROnPlateauCallback as ReduceLROnPlateauCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import StepLRCallback \ as StepLRCallbackPyTorch - from delira.training.callbacks.pytorch_schedulers import OneCycleLRCallback \ - as OneCycleLRCallbackPyTorch \ No newline at end of file + from delira.training.callbacks.pytorch_schedulers import \ + OneCycleLRCallback as OneCycleLRCallbackPyTorch + From 29e3da5f9ec7e51d5cf275b866220056fb97b218 Mon Sep 17 00:00:00 2001 From: Travis AutoPEP8 Fixes Date: Tue, 3 Dec 2019 10:04:28 +0000 Subject: [PATCH 9/9] PEP-8 Auto-Fix --- delira/training/callbacks/__init__.py | 1 - delira/training/callbacks/pytorch_schedulers.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/delira/training/callbacks/__init__.py b/delira/training/callbacks/__init__.py index 1e2d9ee8..74495507 100644 --- a/delira/training/callbacks/__init__.py +++ b/delira/training/callbacks/__init__.py @@ -22,4 +22,3 @@ as StepLRCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import \ OneCycleLRCallback as OneCycleLRCallbackPyTorch - diff --git a/delira/training/callbacks/pytorch_schedulers.py b/delira/training/callbacks/pytorch_schedulers.py index e9b71bde..b569a792 100644 --- a/delira/training/callbacks/pytorch_schedulers.py +++ b/delira/training/callbacks/pytorch_schedulers.py @@ -81,7 +81,7 @@ def __init__( providing a value for epochs and steps_per_epoch. Default: None epochs (int): The number of epochs to train for. This is used along - with steps_per_epoch in order to infer the total number of + with steps_per_epoch in order to infer the total number of steps in the cycle if a value for total_steps is not provided. Default: None steps_per_epoch (int): The number of steps per epoch to train for.