Skip to content

Commit

Permalink
Drop python 3.8, add python 3.12 support
Browse files Browse the repository at this point in the history
  • Loading branch information
araffin committed Nov 18, 2024
1 parent b8ff1a6 commit e5a1028
Show file tree
Hide file tree
Showing 16 changed files with 97 additions and 86 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
include:
# Default version
- gymnasium-version: "1.0.0"
Expand Down Expand Up @@ -51,6 +51,7 @@ jobs:
- name: Install specific version of gym
run: |
uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
uv pip install --system "numpy<2"
# Only run for python 3.10, downgrade gym to 0.29.1

- name: Lint with ruff
Expand All @@ -65,8 +66,6 @@ jobs:
- name: Type check
run: |
make type
# Do not run for python 3.8 (mypy internal error)
if: matrix.python-version != '3.8'
- name: Test with pytest
run: |
make pytest
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
## Release 2.5.0a0 (WIP)

### Breaking Changes
- Upgraded to Pytorch >= 2.3.0
- Upgraded to SB3 >= 2.5.0

### New Features
- Added support for Numpy v2

### Bug fixes

### Documentation

### Other


## Release 2.4.0 (2024-11-18)

**New algorithm: CrossQ, Gymnasium v1.0 support, and better defaults for SAC/TQC on Swimmer-v4 env**
Expand Down
3 changes: 1 addition & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import datetime
import os
import sys
from typing import Dict

# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
# PyEnchant.
Expand Down Expand Up @@ -151,7 +150,7 @@ def setup(app):

# -- Options for LaTeX output ------------------------------------------------

latex_elements: Dict[str, str] = {
latex_elements: dict[str, str] = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[tool.ruff]
# Same as Black.
line-length = 127
# Assume Python 3.8
target-version = "py38"
# Assume Python 3.9
target-version = "py39"

[tool.ruff.lint]
# See https://beta.ruff.rs/docs/rules/
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
gym==0.26.2
stable-baselines3[extra,tests,docs]>=2.4.0,<3.0
stable-baselines3[extra,tests,docs]>=2.5.0a0,<3.0
box2d-py==2.3.8
pybullet_envs_gymnasium>=0.5.0
# minigrid
Expand Down
3 changes: 1 addition & 2 deletions rl_zoo3/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import shutil
import subprocess
from typing import Dict, List

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -33,7 +32,7 @@
trained_models.update(get_hf_trained_models())

n_experiments = len(trained_models)
results: Dict[str, List] = {
results: dict[str, list] = {
"algo": [],
"env_id": [],
"mean_reward": [],
Expand Down
4 changes: 2 additions & 2 deletions rl_zoo3/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from copy import deepcopy
from functools import wraps
from threading import Thread
from typing import Optional, Type, Union
from typing import Optional, Union

import optuna
from sb3_contrib import TQC
Expand Down Expand Up @@ -119,7 +119,7 @@ def __init__(self, gradient_steps: int = 100, verbose: int = 0, sleep_time: floa
self._model: Union[SAC, TQC]
self.gradient_steps = gradient_steps
self.process: Thread
self.model_class: Union[Type[SAC], Type[TQC]]
self.model_class: Union[type[SAC], type[TQC]]
self.sleep_time = sleep_time

def _init_callback(self) -> None:
Expand Down
48 changes: 24 additions & 24 deletions rl_zoo3/exp_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import OrderedDict
from pathlib import Path
from pprint import pprint
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Optional, Union

import gymnasium as gym
import numpy as np
Expand Down Expand Up @@ -71,9 +71,9 @@ def __init__(
eval_freq: int = 10000,
n_eval_episodes: int = 5,
save_freq: int = -1,
hyperparams: Optional[Dict[str, Any]] = None,
env_kwargs: Optional[Dict[str, Any]] = None,
eval_env_kwargs: Optional[Dict[str, Any]] = None,
hyperparams: Optional[dict[str, Any]] = None,
env_kwargs: Optional[dict[str, Any]] = None,
eval_env_kwargs: Optional[dict[str, Any]] = None,
trained_agent: str = "",
optimize_hyperparameters: bool = False,
storage: Optional[str] = None,
Expand Down Expand Up @@ -112,10 +112,10 @@ def __init__(
default_path = Path(__file__).parent.parent

self.config = config or str(default_path / f"hyperparams/{self.algo}.yml")
self.env_kwargs: Dict[str, Any] = env_kwargs or {}
self.env_kwargs: dict[str, Any] = env_kwargs or {}
self.n_timesteps = n_timesteps
self.normalize = False
self.normalize_kwargs: Dict[str, Any] = {}
self.normalize_kwargs: dict[str, Any] = {}
self.env_wrapper: Optional[Callable] = None
self.frame_stack = None
self.seed = seed
Expand All @@ -124,23 +124,23 @@ def __init__(
self.vec_env_class = {"dummy": DummyVecEnv, "subproc": SubprocVecEnv}[vec_env_type]
self.vec_env_wrapper: Optional[Callable] = None

self.vec_env_kwargs: Dict[str, Any] = {}
self.vec_env_kwargs: dict[str, Any] = {}
# self.vec_env_kwargs = {} if vec_env_type == "dummy" else {"start_method": "fork"}

# Callbacks
self.specified_callbacks: List = []
self.callbacks: List[BaseCallback] = []
self.specified_callbacks: list = []
self.callbacks: list[BaseCallback] = []
# Use env-kwargs if eval_env_kwargs was not specified
self.eval_env_kwargs: Dict[str, Any] = eval_env_kwargs or self.env_kwargs
self.eval_env_kwargs: dict[str, Any] = eval_env_kwargs or self.env_kwargs
self.save_freq = save_freq
self.eval_freq = eval_freq
self.n_eval_episodes = n_eval_episodes
self.n_eval_envs = n_eval_envs

self.n_envs = 1 # it will be updated when reading hyperparams
self.n_actions = 0 # For DDPG/TD3 action noise objects
self._hyperparams: Dict[str, Any] = {}
self.monitor_kwargs: Dict[str, Any] = {}
self._hyperparams: dict[str, Any] = {}
self.monitor_kwargs: dict[str, Any] = {}

self.trained_agent = trained_agent
self.continue_training = trained_agent.endswith(".zip") and os.path.isfile(trained_agent)
Expand Down Expand Up @@ -179,7 +179,7 @@ def __init__(
)
self.params_path = f"{self.save_path}/{self.env_name}"

def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
def setup_experiment(self) -> Optional[tuple[BaseAlgorithm, dict[str, Any]]]:
"""
Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
create the environment and possibly the model.
Expand Down Expand Up @@ -223,7 +223,7 @@ def learn(self, model: BaseAlgorithm) -> None:
"""
:param model: an initialized RL model
"""
kwargs: Dict[str, Any] = {}
kwargs: dict[str, Any] = {}
if self.log_interval > -1:
kwargs = {"log_interval": self.log_interval}

Expand Down Expand Up @@ -272,7 +272,7 @@ def save_trained_model(self, model: BaseAlgorithm) -> None:
assert vec_normalize is not None
vec_normalize.save(os.path.join(self.params_path, "vecnormalize.pkl"))

def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:
def _save_config(self, saved_hyperparams: dict[str, Any]) -> None:
"""
Save unprocessed hyperparameters, this can be use later
to reproduce an experiment.
Expand All @@ -290,15 +290,15 @@ def _save_config(self, saved_hyperparams: Dict[str, Any]) -> None:

print(f"Log path: {self.save_path}")

def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
def read_hyperparameters(self) -> tuple[dict[str, Any], dict[str, Any]]:
print(f"Loading hyperparameters from: {self.config}")

if self.config.endswith(".yml") or self.config.endswith(".yaml"):
# Load hyperparameters from yaml file
with open(self.config) as f:
hyperparams_dict = yaml.safe_load(f)
elif self.config.endswith(".py"):
global_variables: Dict = {}
global_variables: dict = {}
# Load hyperparameters from python file
exec(Path(self.config).read_text(), global_variables)
hyperparams_dict = global_variables["hyperparams"]
Expand Down Expand Up @@ -327,7 +327,7 @@ def read_hyperparameters(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
return hyperparams, saved_hyperparams

@staticmethod
def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]:
# Create schedules
for key in ["learning_rate", "clip_range", "clip_range_vf", "delta_std"]:
if key not in hyperparams:
Expand All @@ -345,7 +345,7 @@ def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]:
raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
return hyperparams

def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, Any]:
def _preprocess_normalization(self, hyperparams: dict[str, Any]) -> dict[str, Any]:
if "normalize" in hyperparams.keys():
self.normalize = hyperparams["normalize"]

Expand All @@ -370,8 +370,8 @@ def _preprocess_normalization(self, hyperparams: Dict[str, Any]) -> Dict[str, An
return hyperparams

def _preprocess_hyperparams( # noqa: C901
self, hyperparams: Dict[str, Any]
) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback], Optional[Callable]]:
self, hyperparams: dict[str, Any]
) -> tuple[dict[str, Any], Optional[Callable], list[BaseCallback], Optional[Callable]]:
self.n_envs = hyperparams.get("n_envs", 1)

if self.verbose > 0:
Expand Down Expand Up @@ -448,8 +448,8 @@ def _preprocess_hyperparams( # noqa: C901
return hyperparams, env_wrapper, callbacks, vec_env_wrapper

def _preprocess_action_noise(
self, hyperparams: Dict[str, Any], saved_hyperparams: Dict[str, Any], env: VecEnv
) -> Dict[str, Any]:
self, hyperparams: dict[str, Any], saved_hyperparams: dict[str, Any], env: VecEnv
) -> dict[str, Any]:
# Parse noise string
# Note: only off-policy algorithms are supported
if hyperparams.get("noise_type") is not None:
Expand Down Expand Up @@ -667,7 +667,7 @@ def make_env(**kwargs) -> gym.Env:

return env

def _load_pretrained_agent(self, hyperparams: Dict[str, Any], env: VecEnv) -> BaseAlgorithm:
def _load_pretrained_agent(self, hyperparams: dict[str, Any], env: VecEnv) -> BaseAlgorithm:
# Continue training
print("Loading pretrained agent")
# Policy should not be changed
Expand Down
26 changes: 13 additions & 13 deletions rl_zoo3/hyperparams_opt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict
from typing import Any

import numpy as np
import optuna
Expand All @@ -8,7 +8,7 @@
from rl_zoo3 import linear_schedule


def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for PPO hyperparams.
Expand Down Expand Up @@ -76,7 +76,7 @@ def sample_ppo_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
}


def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for RecurrentPPO hyperparams.
uses sample_ppo_params(), this function samples for the policy_kwargs
Expand All @@ -98,7 +98,7 @@ def sample_ppo_lstm_params(trial: optuna.Trial, n_actions: int, n_envs: int, add
return hyperparams


def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for TRPO hyperparams.
Expand Down Expand Up @@ -165,7 +165,7 @@ def sample_trpo_params(trial: optuna.Trial, n_actions: int, n_envs: int, additio
}


def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for A2C hyperparams.
Expand Down Expand Up @@ -229,7 +229,7 @@ def sample_a2c_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
}


def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for SAC hyperparams.
Expand Down Expand Up @@ -290,7 +290,7 @@ def sample_sac_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
return hyperparams


def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for TD3 hyperparams.
Expand Down Expand Up @@ -346,7 +346,7 @@ def sample_td3_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
return hyperparams


def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for DDPG hyperparams.
Expand Down Expand Up @@ -400,7 +400,7 @@ def sample_ddpg_params(trial: optuna.Trial, n_actions: int, n_envs: int, additio
return hyperparams


def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for DQN hyperparams.
Expand Down Expand Up @@ -444,7 +444,7 @@ def sample_dqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
return hyperparams


def sample_her_params(trial: optuna.Trial, hyperparams: Dict[str, Any], her_kwargs: Dict[str, Any]) -> Dict[str, Any]:
def sample_her_params(trial: optuna.Trial, hyperparams: dict[str, Any], her_kwargs: dict[str, Any]) -> dict[str, Any]:
"""
Sampler for HerReplayBuffer hyperparams.
Expand All @@ -461,7 +461,7 @@ def sample_her_params(trial: optuna.Trial, hyperparams: Dict[str, Any], her_kwar
return hyperparams


def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for TQC hyperparams.
Expand All @@ -480,7 +480,7 @@ def sample_tqc_params(trial: optuna.Trial, n_actions: int, n_envs: int, addition
return hyperparams


def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for QR-DQN hyperparams.
Expand All @@ -496,7 +496,7 @@ def sample_qrdqn_params(trial: optuna.Trial, n_actions: int, n_envs: int, additi
return hyperparams


def sample_ars_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> Dict[str, Any]:
def sample_ars_params(trial: optuna.Trial, n_actions: int, n_envs: int, additional_args: dict) -> dict[str, Any]:
"""
Sampler for ARS hyperparams.
:param trial:
Expand Down
Loading

0 comments on commit e5a1028

Please sign in to comment.