-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrun_training_baselines.py
156 lines (138 loc) · 4.12 KB
/
run_training_baselines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from distutils.util import strtobool
import numpy as np
import omnisafe
import gym_envs.omnisafe_adapter
from training_env_factory import get_env_id
def main(args):
algo = args.algo
cost_limit = args.cost_limit
env_id = args.env_id
use_cbf = args.use_cbf
use_ctrl = args.use_ctrl
seed = args.seed
total_timesteps = args.total_timesteps
num_envs = args.num_envs
logdir = args.log_dir
checkpoint_freq = args.checkpoint_freq
entropy_coef = args.entropy_coef
custom_cfgs = {
"seed": seed,
"train_cfgs": {
"total_steps": total_timesteps,
"vector_env_nums": num_envs,
"parallel": 1,
},
"algo_cfgs": {
"use_cost": True,
"steps_per_epoch": 2048,
},
"logger_cfgs": {
"use_wandb": False,
"use_tensorboard": True,
"save_model_freq": checkpoint_freq,
"log_dir": logdir,
"window_lens": 100,
},
}
# entropy coef
if algo in ["PPOLag", "PPOPID"]:
custom_cfgs["algo_cfgs"]["entropy_coef"] = entropy_coef
custom_cfgs["algo_cfgs"]["update_iters"] = 10
custom_cfgs["algo_cfgs"]["use_max_grad_norm"] = True
custom_cfgs["algo_cfgs"]["max_grad_norm"] = 0.5
# algo-specific configs
if algo.endswith("Lag"):
custom_cfgs["lagrange_cfgs"] = {
"cost_limit": cost_limit,
"lagrangian_multiplier_init": 1.0,
"lambda_lr": 5e-2,
}
elif algo.endswith("PID"):
kp = 10.0 if "particle-env" in env_id else 1.0
custom_cfgs["lagrange_cfgs"] = {
"cost_limit": cost_limit,
"pid_kp": kp,
"pid_ki": 0.01,
"pid_kd": 0.01,
}
elif algo == "PPOSaute":
custom_cfgs["algo_cfgs"]["safety_budget"] = cost_limit
else:
custom_cfgs["algo_cfgs"]["cost_limit"] = cost_limit
custom_env_id = get_env_id(env_id, use_cbf, use_ctrl)
agent = omnisafe.Agent(algo, custom_env_id, custom_cfgs=custom_cfgs)
agent.learn()
if __name__ == "__main__":
import argparse
envs = [
"particle-env-v0",
"particle-env-v1",
"f110-multi-agent-v0",
"f110-multi-agent-v1",
]
parser = argparse.ArgumentParser()
parser.add_argument(
"--algo", type=str, default="PPOPID", help="Baseline algorithm to use"
)
parser.add_argument(
"--cost-limit",
type=float,
default=0.1,
help="Cost limit for Lagrange multiplier",
)
parser.add_argument(
"--entropy-coef",
type=float,
default=0.0,
help="Entropy coefficient for exploration bonus",
)
parser.add_argument("--env-id", type=str, default="particle-env-v0", choices=envs)
parser.add_argument(
"--use-ctrl",
type=lambda x: bool(strtobool(x)),
default=False,
nargs="?",
const=True,
help="Toggles the use of a controller",
)
parser.add_argument(
"--use-cbf",
type=lambda x: bool(strtobool(x)),
default=False,
nargs="?",
const=True,
help="Toggles the use of a CBF",
)
parser.add_argument(
"--use-decay",
type=lambda x: bool(strtobool(x)),
default=False,
nargs="?",
const=False,
help="Toggles the use of optimal-decay coefficient in CBF",
)
parser.add_argument(
"--seed", type=int, default=None, help="Seed for the environment"
)
parser.add_argument(
"--total-timesteps",
type=int,
default=1024000,
help="Total timesteps for training",
)
parser.add_argument(
"--num-envs", type=int, default=1, help="Number of vectorized environments"
)
parser.add_argument(
"--log-dir", type=str, default="logs/baselines", help="Directory to save logs"
)
parser.add_argument(
"--checkpoint-freq",
type=int,
default=10,
help="Save agent model every N updates",
)
args = parser.parse_args()
if args.seed is None:
args.seed = np.random.randint(0, 1000000)
main(args)