From df3c4eafb505154353a5046b663392c83dca77ec Mon Sep 17 00:00:00 2001 From: kozistr Date: Sat, 26 Oct 2024 16:38:14 +0900 Subject: [PATCH] update: CPUOffloadOptimizer --- pytorch_optimizer/optimizer/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_optimizer/optimizer/utils.py b/pytorch_optimizer/optimizer/utils.py index cbcf0791..4bb7b867 100644 --- a/pytorch_optimizer/optimizer/utils.py +++ b/pytorch_optimizer/optimizer/utils.py @@ -58,7 +58,7 @@ def compare_versions(v1: str, v2: str) -> bool: TORCH_VERSION_AT_LEAST_2_4: bool = compare_versions(torch.__version__, '2.4.0') -class CPUOffloadOptimizer: +class CPUOffloadOptimizer: # pragma: no cover """Offload optimizer to CPU for single-GPU training. This will reduce GPU memory by the size of optimizer state. Reference: https://github.com/pytorch/ao/blob/main/torchao/prototype/low_bit_optim/cpu_offload.py @@ -94,7 +94,7 @@ def __init__( self.queue = {} - def backward_hook(p_cuda: torch.Tensor) -> None: # pragma: no cover + def backward_hook(p_cuda: torch.Tensor) -> None: if p_cuda.grad is None: return @@ -127,7 +127,7 @@ def backward_hook(p_cuda: torch.Tensor) -> None: # pragma: no cover self.optim_dict[p_cuda] = optimizer_class([{'params': p_cpu, **param_group}], **kwargs) @torch.no_grad() - def step(self, closure: CLOSURE = None) -> LOSS: # pragma: no cover + def step(self, closure: CLOSURE = None) -> LOSS: loss = None if closure is not None: loss = closure()