Skip to content

Commit

Permalink
Update the training notebook with the latest training updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jshuadvd committed Jul 15, 2024
1 parent 5189c33 commit 4e34fa8
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion notebooks/01_LongRoPE_training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@
" scaler.scale(loss).backward()\n",
"\n",
" if (i + 1) % gradient_accumulation_steps == 0:\n",
" # Gradient clipping\n",
" scaler.unscale_(optimizer)\n",
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
"\n",
" # Update weights and reset gradients\n",
" scaler.step(optimizer)\n",
" scaler.update()\n",
Expand Down Expand Up @@ -348,6 +352,12 @@
" f\"Val Loss: {avg_val_loss:.4f}, Val Perplexity: {val_perplexity:.4f}\"\n",
" )\n",
"\n",
" # Log GPU memory usage\n",
" for gpu in GPUtil.getGPUs():\n",
" gpu_memory_used = gpu.memoryUsed\n",
" logger.info(f\"GPU {gpu.id} memory use: {gpu_memory_used}MB\")\n",
" wandb.log({f\"GPU_{gpu.id}_memory_used\": gpu_memory_used})\n",
"\n",
" # Save checkpoint\n",
" accelerator.save_state(\n",
" {\n",
Expand Down Expand Up @@ -388,7 +398,7 @@
" break\n",
"\n",
" if max_steps and global_step >= max_steps:\n",
" break"
" break\n"
]
},
{
Expand Down

0 comments on commit 4e34fa8

Please sign in to comment.