Source code for trainer.scheduler
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Scheduler utilities for pytorch optimization."""
import math
from torch.optim.lr_scheduler import LambdaLR
from torch.optim.optimizer import Optimizer
scheduler_list = [
'constant',
'constant_with_warmup',
'linear',
'cosine',
'cosine_with_restarts',
'polynomial'
]
[docs]def check_scheduler(scheduler_type: str) -> bool:
"""
Check if the scheduler type is supported.
Args:
scheduler_type (`str`):
the type of the scheduler.
Return (`bool`):
if the scheduler type is supported.
Example:
>>> from towhee.trainer.scheduler import check_scheduler
>>> check_scheduler('constant')
True
"""
if scheduler_list.count(scheduler_type) == 0:
return False
else:
return True
[docs]def configure_constant_scheduler(optimizer: Optimizer, last_epoch: int = -1):
"""
Return a scheduler with a constant learning rate, using the learning rate set in optimizer.
Args:
optimizer (`Optimizer`):
The optimizer for which to schedule the learning rate.
last_epoch (`int`):
The last epoch when resuming training.
Return (`LambdaLR`):
A constant scheduler
Example:
>>> from towhee.trainer.scheduler import configure_constant_scheduler
>>> from towhee.trainer.optimization.adamw import AdamW
>>> from torch import nn
>>> def unwrap_scheduler(scheduler, num_steps=10):
>>> lr_sch = []
>>> for _ in range(num_steps):
>>> lr_sch.append(scheduler.get_lr()[0])
>>> scheduler.step()
>>> return lr_sch
>>> mdl = nn.Linear(50, 50)
>>> optimizer = AdamW(mdl.parameters(), lr=10.0)
>>> num_steps = 2
>>> scheduler = configure_constant_scheduler(optimizer)
>>> lr_sch_1 = unwrap_scheduler(scheduler, num_steps)
[10.0, 10.0]
"""
return LambdaLR(optimizer, lambda _: 1, last_epoch=last_epoch)
[docs]def configure_constant_scheduler_with_warmup(optimizer: Optimizer, num_warmup_steps: int, last_epoch: int = -1):
"""
Return a schedule with a constant learning rate preceded by a warmup period during which the learning rate
increases linearly between 0 and the initial lr set in the optimizer.
Args:
optimizer (`Optimizer`):
The optimizer to be scheduled.
num_warmup_steps (`int`):
Warmup steps.
last_epoch (`int`):
The last epoch when training is resumed.
Return (`LambdaLR`):
A constant scheduler with warmup.
Example:
>>> from towhee.trainer.scheduler import configure_constant_scheduler_with_warmup
>>> from towhee.trainer.optimization.adamw import AdamW
>>> from torch import nn
>>> def unwrap_scheduler(scheduler, num_steps=10):
>>> lr_sch = []
>>> for _ in range(num_steps):
>>> lr_sch.append(scheduler.get_lr()[0])
>>> scheduler.step()
>>> return lr_sch
>>> mdl = nn.Linear(50, 50)
>>> optimizer = AdamW(mdl.parameters(), lr=10.0)
>>> num_steps = 10
>>> num_warmup_steps = 4
>>> scheduler = configure_constant_scheduler_with_warmup(optimizer, num_warmup_steps)
>>> lr_sch_1 = unwrap_scheduler(scheduler, num_steps)
[0.0, 2.5, 5.0, 7.5, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
"""
def lr_lambda(current_step: int):
if current_step < num_warmup_steps:
return float(current_step) / float(max(1.0, num_warmup_steps))
return 1.0
return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)
[docs]def configure_linear_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):
"""
Return a scheduler with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after
a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.
Args:
optimizer (`Optimizer`):
The optimizer to be scheduled.
num_warmup_steps (`int`):
Warmup steps.
num_training_steps (`int`):
Training steps.
last_epoch (`int`):
The last epoch when training is resumed.
Return (`LambdaLR`):
A linear scheduler with warmup.
Example:
>>> from towhee.trainer.scheduler import configure_linear_scheduler_with_warmup
>>> from towhee.trainer.optimization.adamw import AdamW
>>> from torch import nn
>>> def unwrap_scheduler(scheduler, num_steps=10):
>>> lr_sch = []
>>> for _ in range(num_steps):
>>> lr_sch.append(scheduler.get_lr()[0])
>>> scheduler.step()
>>> return lr_sch
>>> mdl = nn.Linear(50, 50)
>>> optimizer = AdamW(mdl.parameters(), lr=10.0)
>>> num_steps = 10
>>> num_warmup_steps = 4
>>> num_training_steps = 10
>>> scheduler = configure_constant_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps)
>>> lr_sch_1 = unwrap_scheduler(scheduler, num_steps)
[0.0, 2.5, 5.0, 7.5, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
"""
def lr_lambda(current_step: int):
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
return max(
0.0, float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps))
)
return LambdaLR(optimizer, lr_lambda, last_epoch)
[docs]def configure_cosine_scheduler_with_warmup(
optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int,num_cycles: float = 0.5, last_epoch: int = -1
):
"""
Return a scheduler with a learning rate that decreases following the values of the cosine function between the
initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
initial lr set in the optimizer.
Args:
optimizer (`Optimizer`):
The optimizer to be scheduled.
num_warmup_steps (`int`):
The steps for the warmup phase.
num_training_steps (`int`):
The number of training steps.
num_cycles (`int`):
The number of periods in te cosine scheduler.
last_epoch (`int`):
The last epoch when training is resumed.
Return (`LambdaLR`):
A cosine scheduler with warmup.
Example:
>>> from towhee.trainer.scheduler import configure_cosine_scheduler_with_warmup
>>> from towhee.trainer.optimization.adamw import AdamW
>>> from torch import nn
>>> def unwrap_scheduler(scheduler, num_steps=10):
>>> lr_sch = []
>>> for _ in range(num_steps):
>>> lr_sch.append(scheduler.get_lr()[0])
>>> scheduler.step()
>>> return lr_sch
>>> mdl = nn.Linear(50, 50)
>>> optimizer = AdamW(mdl.parameters(), lr=10.0)
>>> num_steps = 10
>>> num_warmup_steps = 4
>>> num_training_steps = 10
>>> scheduler = configure_cosine_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps)
>>> lr_sch_1 = unwrap_scheduler(scheduler, num_steps)
[0.0, 5.0, 10.0, 9.61, 8.53, 6.91, 5.0, 3.08, 1.46, 0.38]
"""
def lr_lambda(current_step):
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
return LambdaLR(optimizer, lr_lambda, last_epoch)
[docs]def configure_cosine_with_hard_restarts_scheduler_with_warmup(
optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: int = 1, last_epoch: int = -1
):
"""
Return a scheduler with a learning rate that decreases following the values of the cosine function between the
initial lr set in the optimizer to 0, with several hard restarts, after a warmup period during which it increases
linearly between 0 and the initial lr set in the optimizer.
Args:
optimizer (`Optimizer`):
The optimizer to be scheduled.
num_warmup_steps (`int`):
The steps for the warmup phase.
num_training_steps (`int`):
The number of training steps.
num_cycles (`int`):
The number of hard restarts to be used.
last_epoch (`int`):
The index of the last epoch when training is resumed.
Return (`LambdaLR`):
A cosine with hard restarts scheduler with warmup.
Example:
>>> from towhee.trainer.scheduler import configure_cosine_with_hard_restarts_scheduler_with_warmup
>>> from towhee.trainer.optimization.adamw import AdamW
>>> from torch import nn
>>> def unwrap_scheduler(scheduler, num_steps=10):
>>> lr_sch = []
>>> for _ in range(num_steps):
>>> lr_sch.append(scheduler.get_lr()[0])
>>> scheduler.step()
>>> return lr_sch
>>> mdl = nn.Linear(50, 50)
>>> optimizer = AdamW(mdl.parameters(), lr=10.0)
>>> num_steps = 10
>>> num_warmup_steps = 4
>>> num_training_steps = 10
>>> num_cycles = 2
>>> scheduler = configure_cosine_with_hard_restarts_scheduler_with_warmup(optimizer,
num_warmup_steps, num_training_steps, num_cycles)
>>> lr_sch_1 = unwrap_scheduler(scheduler, num_steps)
[0.0, 5.0, 10.0, 8.53, 5.0, 1.46, 10.0, 8.53, 5.0, 1.46]
"""
def lr_lambda(current_step):
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
if progress >= 1.0:
return 0.0
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * ((float(num_cycles) * progress) % 1.0))))
return LambdaLR(optimizer, lr_lambda, last_epoch)
[docs]def configure_polynomial_decay_scheduler_with_warmup(
optimizer, num_warmup_steps, num_training_steps, lr_end=1e-7, power=1.0, last_epoch=-1
):
"""
Return a scheduler with a learning rate that decreases as a polynomial decay from the initial lr set in the
optimizer to end lr defined by `lr_end`, after a warmup period during which it increases linearly from 0 to the
initial lr set in the optimizer.
Args:
optimizer (`Optimizer`):
The optimizer to be scheduled.
num_warmup_steps (`int`):
The steps for the warmup phase.
num_training_steps (`int`):
The number of training steps
lr_end (`float`):
The end LR.
power (`float`):
Power factor.
last_epoch (`int`):
The index of the last epoch when training is resumed.
Return (`LambdaLR`):
A polynomial decay scheduler with warmup.
Example:
>>> from towhee.trainer.scheduler import configure_polynomial_decay_scheduler_with_warmup
>>> from towhee.trainer.optimization.adamw import AdamW
>>> from torch import nn
>>> def unwrap_scheduler(scheduler, num_steps=10):
>>> lr_sch = []
>>> for _ in range(num_steps):
>>> lr_sch.append(scheduler.get_lr()[0])
>>> scheduler.step()
>>> return lr_sch
>>> mdl = nn.Linear(50, 50)
>>> optimizer = AdamW(mdl.parameters(), lr=10.0)
>>> num_steps = 10
>>> num_warmup_steps = 4
>>> num_training_steps = 10
>>> power = 2.0
>>> lr_end = 1e-7
>>> scheduler = configure_polynomial_decay_scheduler_with_warmup(optimizer,
num_warmup_steps, num_training_steps, num_cycles)
>>> lr_sch_1 = unwrap_scheduler(scheduler, num_steps)
[0.0, 5.0, 10.0, 7.656, 5.625, 3.906, 2.5, 1.406, 0.625, 0.156]
"""
lr_init = optimizer.defaults['lr']
assert lr_init > lr_end, f'lr_end ({lr_end}) must be be smaller than initial lr ({lr_init})'
def lr_lambda(current_step: int):
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
elif current_step > num_training_steps:
return lr_end / lr_init # as LambdaLR multiplies by lr_init
else:
lr_range = lr_init - lr_end
decay_steps = num_training_steps - num_warmup_steps
pct_remaining = 1 - (current_step - num_warmup_steps) / decay_steps
decay = lr_range * pct_remaining ** power + lr_end
return decay / lr_init # as LambdaLR multiplies by lr_init
return LambdaLR(optimizer, lr_lambda, last_epoch)
# class AdafactorScheduler(LambdaLR):
# """
# Adafactor scheduler.
#
# It returns ``initial_lr`` during startup and the actual ``lr`` during stepping.
# """
#
# def __init__(self, optimizer: Optimizer, initial_lr=0.0):
# def lr_lambda(_):
# return initial_lr
#
# for group in optimizer.param_groups:
# group['initial_lr'] = initial_lr
# super().__init__(optimizer, lr_lambda)
# for group in optimizer.param_groups:
# del group['initial_lr']
#
# def get_lr(self):
# opt = self.optimizer
# lrs = [
# opt._get_lr(group, opt.state[group['params'][0]])
# for group in opt.param_groups
# if group['params'][0].grad is not None
# ]
# if len(lrs) == 0:
# lrs = self.base_lrs # if called before stepping
# return lrs
#
#
# def congigure_adafactor_scheduler(optimizer: Optimizer, initial_lr=0.0):
# """
# Return an Adafactor scheduler.
#
# Args:
# optimizer:
# The optimizer to be scheduled.
# initial_lr:
# Initial lr.
#
# Return:
# An Adafactor scheduler.
# """
# return AdafactorScheduler(optimizer, initial_lr)