From 68bf53797876cf806f7fdf34588a599be0a62d1f Mon Sep 17 00:00:00 2001 From: marcinpaluch1994 Date: Thu, 24 Nov 2022 16:17:28 +0100 Subject: [PATCH 01/38] Starting template for integrating vector of target positions --- Controllers/TargetPositionGenerator.py | 8 ++++++++ Controllers/controller_mpc.py | 8 ++++++++ 2 files changed, 16 insertions(+) create mode 100644 Controllers/TargetPositionGenerator.py diff --git a/Controllers/TargetPositionGenerator.py b/Controllers/TargetPositionGenerator.py new file mode 100644 index 0000000..5a923d9 --- /dev/null +++ b/Controllers/TargetPositionGenerator.py @@ -0,0 +1,8 @@ + +class TargetPositionGenerator: + def __init__(self, lib, horizon): + self.horizon = horizon + self.lib = lib + + def step(self, time): + return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index d6cd4ae..ed906ea 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -13,6 +13,8 @@ from torch import inference_mode +from Control_Toolkit.Controllers.TargetPositionGenerator import TargetPositionGenerator + config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader) config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) @@ -64,6 +66,9 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: predictor_specification=predictor_specification ) + self.TargetPositionGeneratorInstance = TargetPositionGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon) + setattr(self, "target_positions_vector", self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)), self.computation_library.float32)) + if self.lib.lib == 'Pytorch': self.step = inference_mode()(self.step) else: @@ -71,6 +76,9 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + + target_positions_vector = self.TargetPositionGeneratorInstance.step(time) + updated_attributes['target_positions_vector'] = target_positions_vector self.update_attributes(updated_attributes) u = self.optimizer.step(s, time) self.update_logs(self.optimizer.logging_values) From 6a6d2187f4898f61c11bd601925b1168332ef121 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sun, 27 Nov 2022 09:26:16 +0100 Subject: [PATCH 02/38] to fix tensorflow JIT compile case insentivity problem, renamed cart mass M to m_cart and cart pole mass m to m_pole added comments about meaning of "stage" in MPPI renamed TargetPositionGenerator to TargetTrajectoryGenerator, since we target a complete trajectory, not just cart position. renamed my_logger() to get_logger() --- .../Controllers/controller_barebone.py | 1 + .../EnvironmentName/cost_function_barebone.py | 2 +- Controllers/TargetPositionGenerator.py | 8 ------- Controllers/TrajectoryGenerator.py | 22 +++++++++++++++++++ Controllers/__init__.py | 8 +++++++ Controllers/controller_mpc.py | 11 ++++++---- 6 files changed, 39 insertions(+), 13 deletions(-) delete mode 100644 Controllers/TargetPositionGenerator.py create mode 100644 Controllers/TrajectoryGenerator.py diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py index ecc1858..9f1b7eb 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py @@ -32,6 +32,7 @@ def configure(self): pass def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + # The controller has to adapt when environment-related attributes such as target positions change # Updated targets etc. are passed as a dictionary updated_attributes self.update_attributes(updated_attributes) # After this call, updated attributes are available as self.<> diff --git a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py index 97e9b53..9f5526a 100644 --- a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py +++ b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py @@ -37,7 +37,7 @@ def get_terminal_cost(self, terminal_states: TensorType): # return terminal_cost pass - # all stage costs together + # all stage costs together. A 'stage' is one timestep of a rollout. def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType): # Shape of states: [batch_size, mpc_horizon, num_states] # TODO: Compute stage cost diff --git a/Controllers/TargetPositionGenerator.py b/Controllers/TargetPositionGenerator.py deleted file mode 100644 index 5a923d9..0000000 --- a/Controllers/TargetPositionGenerator.py +++ /dev/null @@ -1,8 +0,0 @@ - -class TargetPositionGenerator: - def __init__(self, lib, horizon): - self.horizon = horizon - self.lib = lib - - def step(self, time): - return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3) diff --git a/Controllers/TrajectoryGenerator.py b/Controllers/TrajectoryGenerator.py new file mode 100644 index 0000000..96ab182 --- /dev/null +++ b/Controllers/TrajectoryGenerator.py @@ -0,0 +1,22 @@ +# stub for genrating desired future trajectory of cartpole + +class TrajectoryGenerator: + def __init__(self, lib, horizon:int): + """ Construct the trajectory generator. + + :param lib: the computation library, e.g. tensorflow + :param horizon: the MPC horizon in timesteps + """ + self.horizon = horizon + self.lib = lib + + def step(self, time): + """ Computes the desired future state trajectory at this time. + + :param time: the scalar time in seconds + + :returns: the target state trajectory of cartpole. + It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values. + + """ + return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index d8753d0..36bd71b 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -109,6 +109,14 @@ def update_attributes(self, updated_attributes: "dict[str, TensorType]"): @abstractmethod def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + """ + Execute one timestep of control. + @param s: the state array, dimensions are TODO add dimension to help users + @param time: the time in seconds + @param updated_attributes: dict of updated states + @return: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct? + + """ ### Any computations in order to retrieve the current control. Such as: ## If the environment's target positions etc. change, copy the new attributes over to this controller so the cost function knows about it: # self.update_attributes(updated_attributes) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index ed906ea..3781bd7 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -13,7 +13,7 @@ from torch import inference_mode -from Control_Toolkit.Controllers.TargetPositionGenerator import TargetPositionGenerator +from Control_Toolkit.Controllers.TrajectoryGenerator import TrajectoryGenerator config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader) @@ -22,6 +22,7 @@ class controller_mpc(template_controller): + _has_optimizer = True def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): @@ -66,8 +67,10 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: predictor_specification=predictor_specification ) - self.TargetPositionGeneratorInstance = TargetPositionGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon) - setattr(self, "target_positions_vector", self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)), self.computation_library.float32)) + self.TrajectoryGeneratorInstance = TrajectoryGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon) + setattr(self, "target_positions_vector", + self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)), + self.computation_library.float32)) if self.lib.lib == 'Pytorch': self.step = inference_mode()(self.step) @@ -77,7 +80,7 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - target_positions_vector = self.TargetPositionGeneratorInstance.step(time) + target_positions_vector = self.TrajectoryGeneratorInstance.step(time) updated_attributes['target_positions_vector'] = target_positions_vector self.update_attributes(updated_attributes) u = self.optimizer.step(s, time) From d1d9203507176d4cb653598e6399e77446b3dffe Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Mon, 28 Nov 2022 10:53:12 +0100 Subject: [PATCH 03/38] docstring for update_parameters --- Controllers/__init__.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index 36bd71b..0bd784a 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -1,6 +1,6 @@ import os from abc import ABC, abstractmethod -from typing import Tuple +from typing import Tuple, Union import numpy as np import yaml @@ -9,12 +9,12 @@ PyTorchLibrary, TensorFlowLibrary, TensorType) -config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) +config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) logger = get_logger(__name__) """ For a controller to be found and imported by CartPoleGUI/DataGenerator it must: -1. Be in Controller folder +1. Be in Controllers folder 2. Have a name starting with "controller_" 3. The name of the controller class must be the same as the name of the file. 4. It must have __init__ and step methods @@ -104,17 +104,27 @@ def configure(self, **kwargs): pass def update_attributes(self, updated_attributes: "dict[str, TensorType]"): + """ Update scalar float32 attributes in compiled code (tensorflow JIT) that have changed, i.e. copy them to the compiled/GPU instance. + + After this call, such attribute values are available to the TF function as self.key, where key is the key used in dict. + + Used in various controllers in Control_Toolkit/Control_Toolkit_ASF_Template/Controllers. + + :param updated_attributes: a dict with string keys and TensorType (i.e. float32) value attributes to copy + """ for property, new_value in updated_attributes.items(): self.computation_library.assign(getattr(self, property), self.lib.to_tensor(new_value, self.lib.float32)) @abstractmethod - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()): """ Execute one timestep of control. - @param s: the state array, dimensions are TODO add dimension to help users - @param time: the time in seconds - @param updated_attributes: dict of updated states - @return: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct? + + :param s: the state array, dimensions are TODO add dimension to help users + :param time: the time in seconds + :param updated_attributes: dict of updated attributes + + :returns: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct? """ ### Any computations in order to retrieve the current control. Such as: From cc2b58ce742edca11b1a874b50e98ef0d3a6e9d5 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Mon, 28 Nov 2022 11:21:02 +0100 Subject: [PATCH 04/38] controller_mpc.py now copies all modified controller, cost and optimizer config changes to updated_attributes before executing optimizer.step --- Controllers/controller_mpc.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 3781bd7..e059038 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -14,11 +14,11 @@ from torch import inference_mode from Control_Toolkit.Controllers.TrajectoryGenerator import TrajectoryGenerator - +from others.globals_and_utils import load_or_reload_config_if_modified config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader) -config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) -logger = get_logger(__name__) +config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) +log = get_logger(__name__) class controller_mpc(template_controller): @@ -28,13 +28,13 @@ class controller_mpc(template_controller): def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): if optimizer_name in {None, ""}: optimizer_name = str(self.config_controller["optimizer"]) - logger.info(f"Using optimizer {optimizer_name} specified in controller config file") + log.info(f"Using optimizer {optimizer_name} specified in controller config file") if predictor_specification in {None, ""}: predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None) - logger.info(f"Using predictor {predictor_specification} specified in controller config file") + log.info(f"Using predictor {predictor_specification} specified in controller config file") config_optimizer = config_optimizers[optimizer_name] - + # Create cost function cost_function_specification = self.config_controller.get("cost_function_specification", None) self.cost_function = CostFunctionWrapper() @@ -67,7 +67,9 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: predictor_specification=predictor_specification ) + # make a target position trajectory generator self.TrajectoryGeneratorInstance = TrajectoryGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon) + # set self.target_positions_vector to the correct vector tensor type for TF or whatever is doing the stepping setattr(self, "target_positions_vector", self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)), self.computation_library.float32)) @@ -79,10 +81,22 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - + log.debug(f'step time={time:.3f}s') + # following is hack to get a target trajectory passed to tensorflow. The trajectory is passed in + # as updated_attributes, and is transferred to tensorflow by the update_attributes call target_positions_vector = self.TrajectoryGeneratorInstance.step(time) updated_attributes['target_positions_vector'] = target_positions_vector self.update_attributes(updated_attributes) + for c in ('config_controllers.yml','config_cost_functions.yml', 'config_optimizers.yml'): + (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',c)) + # process changes to configs using new returned change list + if not changes is None: + for k,v in changes: + if isinstance(v, (int, float)): + updated_attributes[k]=v + self.update_attributes(updated_attributes) + log.info(f'updated config {c} with scalar updated_attributes {updated_attributes}') + u = self.optimizer.step(s, time) self.update_logs(self.optimizer.logging_values) return u From 06c79816da4bc592c9c7a72c034a8dae0877b43d Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Mon, 28 Nov 2022 11:27:00 +0100 Subject: [PATCH 05/38] rename config_cost_function.yml to config_cost_functions.yml for consistency with other config files. added more docstrings for undocumented methods and constructors. added updated_attributes to barebones controller. improved formatting of logging output added dictdiffer to requirements.txt renamed target_position cost function to cartpole_trajectory_cost.py. replaced some load_config with the load_or_reload_config_if_modified, so that the configs are cached for change checking. added logger to Compile.py --- .../EnvironmentName/cost_function_barebone.py | 2 +- Cost_Functions/__init__.py | 12 +++++++++- Cost_Functions/cost_function_wrapper.py | 23 +++++++++++++++---- Optimizers/__init__.py | 2 +- Optimizers/optimizer_mppi.py | 11 +++++++-- others/globals_and_utils.py | 23 +++++++++++-------- requirements.txt | 3 ++- 7 files changed, 56 insertions(+), 20 deletions(-) diff --git a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py index 9f5526a..6e9802d 100644 --- a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py +++ b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py @@ -9,7 +9,7 @@ # TODO: Load constants from the cost config file, like this: -config = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"), Loader=yaml.FullLoader) +config = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r"), Loader=yaml.FullLoader) # TODO: Rename parent folder from EnvironmentName to the actual name of you environment # TODO: Load constants like this: diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py index a98da9d..c86692e 100644 --- a/Cost_Functions/__init__.py +++ b/Cost_Functions/__init__.py @@ -9,9 +9,19 @@ class cost_function_base: # Default: Class supports all libs to compute costs supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary} - def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]") -> None: + def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]", config:dict=None) -> None: + """ makes a new cost function + + :param controller: the controller + :param ComputationLib: the library, e.g. python, tensorflow + :param config: the dict of configuration for this cost function. The caller can modify the config to change behavior during runtime. + + """ + self.controller = controller + self.config=config self.set_computation_library(ComputationLib) + logger.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}') def get_terminal_cost(self, terminal_states: TensorType) -> TensorType: """Compute a batch of terminal costs for a batch of terminal states. diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index d0a7ae5..2788e59 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -1,5 +1,7 @@ from importlib import import_module import os + +from Control_Toolkit.others.globals_and_utils import get_logger from SI_Toolkit.computation_library import TensorType import yaml from copy import deepcopy as dcp @@ -7,12 +9,12 @@ from Control_Toolkit.Controllers import template_controller from Control_Toolkit.Cost_Functions import cost_function_base +from others.globals_and_utils import load_or_reload_config_if_modified + +log=get_logger(__name__) # cost_function config -cost_function_config = yaml.load( - open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"), - Loader=yaml.FullLoader, -) +(cost_function_config,_) = load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")) class CostFunctionWrapper: @@ -21,12 +23,22 @@ def __init__(self): self.cost_function_name_default: str = cost_function_config[ "cost_function_name_default" ] + log.info(f'default cost function name is {self.cost_function_name_default}') def configure( self, controller: template_controller, cost_function_specification=None, + config:dict=None ): + """ + Configures the cost function. TODO This lazy constructor is needed why? + + :param controller: the controller that uses this cost function + :param cost_function_specification: the string name of the cost function class, to construct the class + :param config: the config dict() that holds all the configuration values + + """ environment_name = controller.environment_name computation_library = controller.computation_library # Use library dictated by controller @@ -41,6 +53,7 @@ def configure( self.cost_function: cost_function_base = getattr( cost_function_module, self.cost_function_name )(controller, computation_library) + log.info(f'configured controller {controller.__class__} with cost function {self.cost_function.__class__}') def update_cost_function_name_from_specification( self, environment_name: str, cost_function_specification: str = None @@ -63,7 +76,7 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: return self.cost_function.get_stage_cost(states, inputs, previous_input) def get_trajectory_cost( - self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None + self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None ): """Refer to :func:`the base cost function `""" return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input) diff --git a/Optimizers/__init__.py b/Optimizers/__init__.py index aecd35d..91875ba 100644 --- a/Optimizers/__init__.py +++ b/Optimizers/__init__.py @@ -54,7 +54,7 @@ def configure(self, **kwargs): """Pass any additional arguments from the controller to the optimizer.""" pass - def step(self, s: np.ndarray, time=None): + def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): raise NotImplementedError("Implement this function in a subclass.") def optimizer_reset(self): diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py index 81e48c2..2ad0f06 100644 --- a/Optimizers/optimizer_mppi.py +++ b/Optimizers/optimizer_mppi.py @@ -5,9 +5,9 @@ from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import CompileAdaptive from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper +from SI_Toolkit.Functions.TF.Compile import CompileAdaptive class optimizer_mppi(template_optimizer): @@ -121,7 +121,14 @@ def inizialize_pertubation(self, random_gen): return delta_u - def _predict_and_cost(self, s, u_nom, random_gen, u_old): + def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config): + """ Predict dynamics and compute costs of trajectories + TODO add params and return + + :param ?? + + :returns: ?? + """ s = self.lib.tile(s, (self.num_rollouts, 1)) # generate random input sequence and clip to control limits u_nom = self.lib.concat([u_nom[:, 1:, :], u_nom[:, -1:, :]], 1) diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index 13ac619..edbc331 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -9,39 +9,44 @@ import tensorflow as tf import torch from numpy.random import SFC64, Generator -from SI_Toolkit.Functions.TF.Compile import CompileTF, CompileAdaptive from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary LOGGING_LEVEL = logging.INFO - - class CustomFormatter(logging.Formatter): """Logging Formatter to add colors and count warning / errors""" + # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762 grey = "\x1b[38;21m" yellow = "\x1b[33;21m" + cyan = "\x1b[1;36m" # dark green + green = "\x1b[31;21m" # dark green red = "\x1b[31;21m" bold_red = "\x1b[31;1m" reset = "\x1b[0m" - format = ( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)" - ) + # File "{file}", line {max(line, 1)}'.replace("\\", "/") + format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' FORMATS = { logging.DEBUG: grey + format + reset, - logging.INFO: grey + format + reset, + logging.INFO: cyan + format + reset, logging.WARNING: yellow + format + reset, logging.ERROR: red + format + reset, - logging.CRITICAL: bold_red + format + reset, + logging.CRITICAL: bold_red + format + reset } def format(self, record): log_fmt = self.FORMATS.get(record.levelno) formatter = logging.Formatter(log_fmt) - return formatter.format(record) + return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links def get_logger(name): + """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL. + + :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it. + + :returns: the logger. + """ # logging.basicConfig(stream=sys.stdout, level=logging.INFO) logger = logging.getLogger(name) logger.setLevel(LOGGING_LEVEL) diff --git a/requirements.txt b/requirements.txt index d2c9d6b..9fa1784 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ tensorflow_probability numpy torch torchvision -gym \ No newline at end of file +gym +dictdiffer \ No newline at end of file From 068a0ac934959a6fd86a54b219adeeb369e95814 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sun, 11 Dec 2022 15:10:14 +0100 Subject: [PATCH 06/38] finally the dynamically modifiable control cost parameters are working and the cartpole is at least balancing itself and swinging up from down position. Big changes all over! Now anytime any of the config files is modified during runtime, the using class has its tf.variable assigned the new value. To get this work, the variable MUST NOT EXIST before it is set this way the first time. Otherwise the compiler just uses the field at compile time and the changes are never seen inside. The cartpole window now does not reset the initial position and angle sliders on each start. --- .../Controllers/controller_barebone.py | 5 +- .../Controllers/controller_do_mpc.py | 4 +- .../Controllers/controller_do_mpc_discrete.py | 3 +- .../Controllers/controller_lqr.py | 4 +- Controllers/TrajectoryGenerator.py | 22 -------- Controllers/__init__.py | 46 +++++++++------- Controllers/cartpole_trajectory_generator.py | 55 +++++++++++++++++++ Controllers/controller_mpc.py | 51 +++++++++-------- .../controller_neural_imitator_pytorch.py | 3 +- Controllers/controller_neural_imitator_tf.py | 3 +- Cost_Functions/__init__.py | 10 +++- Cost_Functions/cost_function_wrapper.py | 26 +++++---- Optimizers/__init__.py | 4 +- Optimizers/optimizer_cem_gmm_tf.py | 16 +++--- .../optimizer_cem_grad_bharadhwaj_tf.py | 10 ++-- Optimizers/optimizer_cem_naive_grad_tf.py | 10 ++-- Optimizers/optimizer_cem_tf.py | 18 +++--- Optimizers/optimizer_gradient_tf.py | 14 ++--- Optimizers/optimizer_mppi.py | 38 ++++++++----- Optimizers/optimizer_mppi_optimize_tf.py | 10 ++-- Optimizers/optimizer_mppi_var_tf.py | 10 ++-- Optimizers/optimizer_random_action_tf.py | 10 ++-- Optimizers/optimizer_rpgd_me_tf.py | 14 ++--- Optimizers/optimizer_rpgd_ml_tf.py | 20 +++---- Optimizers/optimizer_rpgd_particle_tf.py | 22 ++++---- Optimizers/optimizer_rpgd_tf.py | 22 ++++---- others/globals_and_utils.py | 2 +- 27 files changed, 257 insertions(+), 195 deletions(-) delete mode 100644 Controllers/TrajectoryGenerator.py create mode 100644 Controllers/cartpole_trajectory_generator.py diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py index 9f1b7eb..29c6add 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py @@ -9,7 +9,8 @@ import os from Control_Toolkit.Controllers import template_controller -from others.globals_and_utils import create_rng +from others.globals_and_utils import create_rng, update_attributes + # TODO: You can load and access config files here, like this: # config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader) @@ -35,7 +36,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # The controller has to adapt when environment-related attributes such as target positions change # Updated targets etc. are passed as a dictionary updated_attributes - self.update_attributes(updated_attributes) # After this call, updated attributes are available as self.<> + update_attributes(updated_attributes,self) # After this call, updated attributes are available as self.<> # TODO: Implement your controller here # Examples: diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py index 1d2f7d7..7d01b1e 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py @@ -9,7 +9,7 @@ from CartPole.cartpole_model import Q2u, cartpole_ode_namespace from CartPole.state_utilities import cartpole_state_vector_to_namespace from Control_Toolkit.Controllers import template_controller -from others.globals_and_utils import create_rng +from others.globals_and_utils import create_rng, update_attributes from SI_Toolkit.computation_library import NumpyLibrary, TensorType @@ -137,7 +137,7 @@ def tvp_fun(self, t_ind): def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - self.update_attributes(updated_attributes) + update_attributes(updated_attributes,self) s = cartpole_state_vector_to_namespace(s) diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py index 11f40c5..c2e5171 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py @@ -11,6 +11,7 @@ from CartPole.state_utilities import cartpole_state_vector_to_namespace from Control_Toolkit.Controllers import template_controller from SI_Toolkit.computation_library import NumpyLibrary, TensorType +from others.globals_and_utils import update_attributes def mpc_next_state(s, u, dt): @@ -148,7 +149,7 @@ def tvp_fun(self, t_ind): return self.tvp_template def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - self.update_attributes(updated_attributes) + update_attributes(updated_attributes,self) s = cartpole_state_vector_to_namespace(s) diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py index 4060ad1..6aef009 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py @@ -14,7 +14,7 @@ from CartPole.state_utilities import (ANGLE_IDX, ANGLED_IDX, POSITION_IDX, POSITIOND_IDX) from Control_Toolkit.Controllers import template_controller -from others.globals_and_utils import create_rng +from others.globals_and_utils import create_rng, update_attributes config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader) actuator_noise = config["cartpole"]["actuator_noise"] @@ -81,7 +81,7 @@ def configure(self): self.eigVals = eigVals def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - self.update_attributes(updated_attributes) + update_attributes(updated_attributes,self) state = np.array( [[s[POSITION_IDX] - self.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]]) diff --git a/Controllers/TrajectoryGenerator.py b/Controllers/TrajectoryGenerator.py deleted file mode 100644 index 96ab182..0000000 --- a/Controllers/TrajectoryGenerator.py +++ /dev/null @@ -1,22 +0,0 @@ -# stub for genrating desired future trajectory of cartpole - -class TrajectoryGenerator: - def __init__(self, lib, horizon:int): - """ Construct the trajectory generator. - - :param lib: the computation library, e.g. tensorflow - :param horizon: the MPC horizon in timesteps - """ - self.horizon = horizon - self.lib = lib - - def step(self, time): - """ Computes the desired future state trajectory at this time. - - :param time: the scalar time in seconds - - :returns: the target state trajectory of cartpole. - It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values. - - """ - return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index 0bd784a..a447167 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -8,6 +8,7 @@ from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType) +from others.globals_and_utils import load_or_reload_config_if_modified config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) logger = get_logger(__name__) @@ -26,7 +27,7 @@ class template_controller(ABC): _has_optimizer = False # Define the computation library in your controller class or in the controller's configuration: - _computation_library: "type[ComputationLibrary]" = None + _computation_library: ComputationLibrary = None def __init__( self, @@ -38,14 +39,13 @@ def __init__( initial_environment_attributes: "dict[str, TensorType]", ): # Load controller config and select the entry for the current controller - config_controllers = yaml.load( - open(os.path.join("Control_Toolkit_ASF", "config_controllers.yml")), - Loader=yaml.FullLoader - ) + (config_controllers,_) = load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_controllers.yml")) # ignore the _ changes return since this is initial call # self.controller_name is inferred from the class name, which is the class being instantiated # Example: If you create a controller_mpc, this controller_template.__init__ will be called # but the class name will be controller_mpc, not template_controller. - self.config_controller = dict(config_controllers[self.controller_name]) + config_key=self.controller_name + self.config_controller = config_controllers[config_key] + # add timestep .dt to all controllers here self.config_controller["dt"] = dt # Set computation library @@ -78,11 +78,20 @@ def __init__( self.num_control_inputs = num_control_inputs self.control_limits = control_limits self.action_low, self.action_high = self.control_limits + + # todo these are special for cartpole but we would need a base cartpole controller class to put them there + # self.target_position=None + # self.target_equilibrium=None # Set properties like target positions on this controller for property, new_value in initial_environment_attributes.items(): setattr(self, property, self.computation_library.to_variable(new_value, self.computation_library.float32)) - + + # set all controller config numerical values as float variables in the computation space, e.g. tensorflow, so they can be updqted during runtime + for property, value in self.config_controller.items(): + if value is float or value is int: + setattr(self, property, self.computation_library.to_variable(value, self.computation_library.float32)) + # Initialize control variable self.u = 0.0 @@ -103,18 +112,6 @@ def configure(self, **kwargs): # In your controller, implement any additional initialization steps here pass - def update_attributes(self, updated_attributes: "dict[str, TensorType]"): - """ Update scalar float32 attributes in compiled code (tensorflow JIT) that have changed, i.e. copy them to the compiled/GPU instance. - - After this call, such attribute values are available to the TF function as self.key, where key is the key used in dict. - - Used in various controllers in Control_Toolkit/Control_Toolkit_ASF_Template/Controllers. - - :param updated_attributes: a dict with string keys and TensorType (i.e. float32) value attributes to copy - """ - for property, new_value in updated_attributes.items(): - self.computation_library.assign(getattr(self, property), self.lib.to_tensor(new_value, self.lib.float32)) - @abstractmethod def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()): """ @@ -152,10 +149,17 @@ def controller_report(self): def controller_reset(self): raise NotImplementedError - @property + @property # decorates the controller so it has the field .controller_name that gets its short name which is the key in the .yml config file def controller_name(self): + """ Generates standard name for this controller, but use this method like it were a field. + + :returns: the short name which is the key to the controller in the config_controller.yml file, e.g. 'cartpole_mppi' + + """ name = self.__class__.__name__ if name != "template_controller": + if 'controller_' not in name: + raise AttributeError(f'this controller named "{name}" does not contain "controller_". Controllers should start or contain "controller_" and the key in the config_controllers.yml file should follow the underscore') return name.replace("controller_", "").replace("_", "-").lower() else: raise AttributeError() @@ -165,7 +169,7 @@ def controller_data_for_csv(self): return {} @property - def computation_library(self) -> "type[ComputationLibrary]": + def computation_library(self) -> ComputationLibrary: if self._computation_library == None: raise NotImplementedError("Controller class needs to specify its computation library") return self._computation_library diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py new file mode 100644 index 0000000..a00559f --- /dev/null +++ b/Controllers/cartpole_trajectory_generator.py @@ -0,0 +1,55 @@ +# stub for genrating desired future trajectory of cartpole +import numpy +from torch import TensorType + +from Control_Toolkit.Controllers import template_controller +from SI_Toolkit.computation_library import ComputationLibrary +from CartPole import state_utilities + +period=1 + + +class cartpole_trajectory_generator: + """ Generates target state trajectory for the cartpole """ + def __init__(self, lib:ComputationLibrary, controller:template_controller=None): + """ Construct the trajectory generator. + + :param lib: the computation library, e.g. tensorflow + :param horizon: the MPC horizon in timesteps + """ + self.lib = lib + self.controller:template_controller=controller + + def step(self, time: float, horizon: int) -> TensorType: + """ Computes the desired future state trajectory at this time. + + :param time: the scalar time in seconds + + :returns: the target state trajectory of cartpole. + It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values. + + """ + target_angleD=1000 + if time%period None: @@ -30,7 +30,7 @@ def __init__( self.lib = computation_library - self.num_rollouts = num_rollouts + self.batch_size = batch_size self.mpc_horizon = mpc_horizon self.cost_function = cost_function self.u = 0.0 diff --git a/Optimizers/optimizer_cem_gmm_tf.py b/Optimizers/optimizer_cem_gmm_tf.py index b371414..ca59be8 100644 --- a/Optimizers/optimizer_cem_gmm_tf.py +++ b/Optimizers/optimizer_cem_gmm_tf.py @@ -26,7 +26,7 @@ def __init__( mpc_horizon: int, cem_outer_it: int, cem_initial_action_stdev: float, - num_rollouts: int, + batch_size: int, cem_stdev_min: float, cem_best_k: int, optimizer_logging: bool, @@ -39,7 +39,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -61,12 +61,12 @@ def predict_and_cost(self, s, Q): @CompileTF def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor, rollout_trajectory: tf.Tensor, sampling_dist: tfpd.Distribution, rng: tf.random.Generator): #generate random input sequence and clip to control limits - Q = sampling_dist.sample(sample_shape=[self.num_rollouts]) + Q = sampling_dist.sample(sample_shape=[self.batch_size]) Q = tf.clip_by_value(Q, self.action_low, self.action_high) #rollout the trajectories and get cost traj_cost, rollout_trajectory = self.predict_and_cost(s, Q) - rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.num_rollouts, self.mpc_horizon+1, self.num_states]) + rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.batch_size, self.mpc_horizon + 1, self.num_states]) #sort the costs and find best k costs sorted_cost = tf.argsort(traj_cost) @@ -102,11 +102,11 @@ def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor, def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) - Q = tf.zeros((self.num_rollouts, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32) - rollout_trajectory = tf.zeros((self.num_rollouts, self.mpc_horizon+1, self.num_states), dtype=tf.float32) - traj_cost = tf.zeros((self.num_rollouts), dtype=tf.float32) + Q = tf.zeros((self.batch_size, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32) + rollout_trajectory = tf.zeros((self.batch_size, self.mpc_horizon + 1, self.num_states), dtype=tf.float32) + traj_cost = tf.zeros((self.batch_size), dtype=tf.float32) for _ in range(0, self.cem_outer_it): self.sampling_dist, Q, elite_Q, traj_cost, rollout_trajectory = self.update_distribution(s, Q, traj_cost, rollout_trajectory, self.sampling_dist, self.rng) diff --git a/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py b/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py index e56e999..fdd8b60 100644 --- a/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py +++ b/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py @@ -27,7 +27,7 @@ def __init__( seed: int, mpc_horizon: int, cem_outer_it: int, - num_rollouts: int, + batch_size: int, cem_initial_action_stdev: float, cem_stdev_min: float, cem_best_k: int, @@ -48,7 +48,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -74,7 +74,7 @@ def __init__( # Initialization self.Q_tf = tf.Variable( - initial_value=tf.zeros([self.num_rollouts, self.mpc_horizon, self.num_control_inputs]), + initial_value=tf.zeros([self.batch_size, self.mpc_horizon, self.num_control_inputs]), trainable=True, dtype=tf.float32, ) @@ -82,7 +82,7 @@ def __init__( @CompileTF def predict_and_cost(self, s, elite_Q, Q_tf: tf.Variable, opt, rng: tf.random.Generator): - Q_sampled = self._sample_actions(rng, self.num_rollouts - self.cem_best_k) + Q_sampled = self._sample_actions(rng, self.batch_size - self.cem_best_k) Q = tf.concat([elite_Q, Q_sampled], axis=0) Q = tf.clip_by_value(Q, self.action_low, self.action_high) Q_tf.assign(Q) @@ -141,7 +141,7 @@ def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} # tile s and convert inputs to tensor - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) # generate random input sequence and clip to control limits diff --git a/Optimizers/optimizer_cem_naive_grad_tf.py b/Optimizers/optimizer_cem_naive_grad_tf.py index 9624994..cd46795 100644 --- a/Optimizers/optimizer_cem_naive_grad_tf.py +++ b/Optimizers/optimizer_cem_naive_grad_tf.py @@ -26,7 +26,7 @@ def __init__( seed: int, mpc_horizon: int, cem_outer_it: int, - num_rollouts: int, + batch_size: int, cem_initial_action_stdev: float, cem_stdev_min: float, cem_best_k: int, @@ -42,7 +42,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -62,8 +62,8 @@ def __init__( @CompileTF def predict_and_cost(self, s, rng, dist_mue, stdev): # generate random input sequence and clip to control limits - Q = tf.tile(dist_mue, [self.num_rollouts, 1, 1]) + rng.normal( - [self.num_rollouts, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32) * stdev + Q = tf.tile(dist_mue, [self.batch_size, 1, 1]) + rng.normal( + [self.batch_size, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32) * stdev Q = tf.clip_by_value(Q, self.action_low, self.action_high) # rollout the trajectories and record gradient with tf.GradientTape(watch_accessed_variables=False) as tape: @@ -95,7 +95,7 @@ def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} # tile s and convert inputs to tensor - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) #cem steps updating distribution diff --git a/Optimizers/optimizer_cem_tf.py b/Optimizers/optimizer_cem_tf.py index 9c3a3ea..f51f3b7 100644 --- a/Optimizers/optimizer_cem_tf.py +++ b/Optimizers/optimizer_cem_tf.py @@ -25,7 +25,7 @@ def __init__( mpc_horizon: int, cem_outer_it: int, cem_initial_action_stdev: float, - num_rollouts: int, + batch_size: int, cem_stdev_min: float, cem_best_k: int, warmup: bool, @@ -40,7 +40,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -65,13 +65,13 @@ def predict_and_cost(self, s, Q): @CompileTF def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor, rollout_trajectory: tf.Tensor, dist_mue: tf.Tensor, stdev: tf.Tensor, rng: tf.random.Generator): #generate random input sequence and clip to control limits - Q = tf.tile(dist_mue,(self.num_rollouts,1,1)) + tf.multiply(rng.normal( - shape=(self.num_rollouts, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32), stdev) + Q = tf.tile(dist_mue, (self.batch_size, 1, 1)) + tf.multiply(rng.normal( + shape=(self.batch_size, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32), stdev) Q = tf.clip_by_value(Q, self.action_low, self.action_high) #rollout the trajectories and get cost traj_cost, rollout_trajectory = self.predict_and_cost(s, Q) - rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.num_rollouts, self.mpc_horizon+1, self.num_states]) + rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.batch_size, self.mpc_horizon + 1, self.num_states]) #sort the costs and find best k costs sorted_cost = tf.argsort(traj_cost) @@ -87,11 +87,11 @@ def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor, def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) - Q = tf.zeros((self.num_rollouts, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32) - rollout_trajectory = tf.zeros((self.num_rollouts, self.mpc_horizon+1, self.num_states), dtype=tf.float32) - traj_cost = tf.zeros((self.num_rollouts), dtype=tf.float32) + Q = tf.zeros((self.batch_size, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32) + rollout_trajectory = tf.zeros((self.batch_size, self.mpc_horizon + 1, self.num_states), dtype=tf.float32) + traj_cost = tf.zeros((self.batch_size), dtype=tf.float32) iterations = self.warmup_iterations if self.warmup and self.count == 0 else self.cem_outer_it for _ in range(0, iterations): diff --git a/Optimizers/optimizer_gradient_tf.py b/Optimizers/optimizer_gradient_tf.py index a70346e..0533f81 100644 --- a/Optimizers/optimizer_gradient_tf.py +++ b/Optimizers/optimizer_gradient_tf.py @@ -23,7 +23,7 @@ def __init__( seed: int, mpc_horizon: int, gradient_steps: int, - num_rollouts: int, + batch_size: int, initial_action_stdev: float, learning_rate: float, adam_beta_1: float, @@ -43,7 +43,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -105,7 +105,7 @@ def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} # Start all trajectories in current state - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) # warm start setup @@ -143,7 +143,7 @@ def step(self, s: np.ndarray, time=None): # Shift Q, Adam weights by one time step self.count += 1 Q_s = self.rng.uniform( - shape=[self.num_rollouts, 1, self.num_control_inputs], + shape=[self.batch_size, 1, self.num_control_inputs], minval=self.action_low, maxval=self.action_high, dtype=tf.float32, @@ -157,14 +157,14 @@ def step(self, s: np.ndarray, time=None): w1 = tf.concat( [ adam_weights[1][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) w2 = tf.concat( [ adam_weights[2][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) @@ -175,7 +175,7 @@ def step(self, s: np.ndarray, time=None): def optimizer_reset(self): # generate random input sequence and clip to control limits Q = self.rng.uniform( - [self.num_rollouts, self.mpc_horizon, self.num_control_inputs], + [self.batch_size, self.mpc_horizon, self.num_control_inputs], self.action_low, self.action_high, dtype=tf.float32, diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py index 2ad0f06..51a96e3 100644 --- a/Optimizers/optimizer_mppi.py +++ b/Optimizers/optimizer_mppi.py @@ -1,4 +1,6 @@ from typing import Tuple + +from Control_Toolkit.others.globals_and_utils import get_logger from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary import numpy as np @@ -9,6 +11,7 @@ from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper from SI_Toolkit.Functions.TF.Compile import CompileAdaptive +log = get_logger(__name__) class optimizer_mppi(template_optimizer): supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary} @@ -26,7 +29,7 @@ def __init__( R: float, LBD: float, mpc_horizon: int, - num_rollouts: int, + batch_size: int, NU: float, SQRTRHOINV: float, GAMMA: float, @@ -41,7 +44,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -51,12 +54,12 @@ def __init__( # MPPI parameters - self.cc_weight = cc_weight - self.R = self.lib.to_tensor(R, self.lib.float32) - self.LBD = LBD - self.NU = self.lib.to_tensor(NU, self.lib.float32) + self.cc_weight = self.lib.to_variable(cc_weight, self.lib.float32) + self.R = self.lib.to_variable(R, self.lib.float32) + self.LBD = self.lib.to_variable(LBD, self.lib.float32) + self.NU = self.lib.to_variable(NU, self.lib.float32) self._SQRTRHOINV = SQRTRHOINV - self.GAMMA = GAMMA + self.GAMMA = self.lib.to_variable(GAMMA, self.lib.float32) self.update_internal_state = self.update_internal_state_of_RNN # FIXME: There is one unnecessary operation in this function in case it is not an RNN. @@ -67,6 +70,9 @@ def __init__( self.Interpolator = Interpolator(self.mpc_horizon, period_interpolation_inducing_points, self.num_control_inputs, self.lib) + # here the predictor, cost computer, and optimizer are compiled to native instrutions by tensorflow graphs and XLA JIT + # before this, we copy all the required configuration to these objects so that they can be later updated during runtime. + self.predict_and_cost = CompileAdaptive(self._predict_and_cost) self.predict_optimal_trajectory = CompileAdaptive(self._predict_optimal_trajectory) @@ -100,7 +106,8 @@ def mppi_correction_cost(self, u, delta_u): #total cost of the trajectory def get_mppi_trajectory_cost(self, state_horizon ,u, u_prev, delta_u): total_cost = self.cost_function.get_trajectory_cost(state_horizon,u, u_prev) - total_mppi_cost = total_cost + self.mppi_correction_cost(u, delta_u) + mppi_correction_cost =self.mppi_correction_cost(u, delta_u) + total_mppi_cost = total_cost +mppi_correction_cost return total_mppi_cost def reward_weighted_average(self, S, delta_u): @@ -114,14 +121,14 @@ def inizialize_pertubation(self, random_gen): stdev = self.SQRTRHODTINV delta_u = random_gen.normal( - [self.num_rollouts, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], + [self.batch_size, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], dtype=self.lib.float32) * stdev delta_u = self.Interpolator.interpolate(delta_u) return delta_u - def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config): + def _predict_and_cost(self, s, u_nom, random_gen, u_old): """ Predict dynamics and compute costs of trajectories TODO add params and return @@ -129,11 +136,11 @@ def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config): :returns: ?? """ - s = self.lib.tile(s, (self.num_rollouts, 1)) + s = self.lib.tile(s, (self.batch_size, 1)) # generate random input sequence and clip to control limits u_nom = self.lib.concat([u_nom[:, 1:, :], u_nom[:, -1:, :]], 1) delta_u = self.inizialize_pertubation(random_gen) - u_run = self.lib.tile(u_nom, (self.num_rollouts, 1, 1))+delta_u + u_run = self.lib.tile(u_nom, (self.batch_size, 1, 1)) + delta_u u_run = self.lib.clip(u_run, self.action_low, self.action_high) rollout_trajectory = self.predictor.predict_tf(s, u_run) traj_cost = self.get_mppi_trajectory_cost(rollout_trajectory, u_run, u_old, delta_u) @@ -143,7 +150,7 @@ def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config): return self.mppi_output(u, u_nom, rollout_trajectory, traj_cost, u_run) def update_internal_state_of_RNN(self, s, u_nom): - u_tiled = self.lib.tile(u_nom[:, :1, :], (self.num_rollouts, 1, 1)) + u_tiled = self.lib.tile(u_nom[:, :1, :], (self.batch_size, 1, 1)) self.predictor.update(s=s, Q0=u_tiled) def _predict_optimal_trajectory(self, s, u_nom): @@ -160,7 +167,10 @@ def step(self, s: np.ndarray, time=None): self.u, self.u_nom, rollout_trajectory, traj_cost, u_run = self.predict_and_cost(s, self.u_nom, self.rng, self.u) self.u = self.lib.to_numpy(self.lib.squeeze(self.u)) - + + # print(f'mean traj cost={np.mean(traj_cost.numpy()):.2f}') # todo debug + + if self.optimizer_logging: self.logging_values["Q_logged"] = self.lib.to_numpy(u_run) self.logging_values["J_logged"] = self.lib.to_numpy(traj_cost) diff --git a/Optimizers/optimizer_mppi_optimize_tf.py b/Optimizers/optimizer_mppi_optimize_tf.py index f19a584..446fd9f 100644 --- a/Optimizers/optimizer_mppi_optimize_tf.py +++ b/Optimizers/optimizer_mppi_optimize_tf.py @@ -27,7 +27,7 @@ def __init__( R: float, LBD: float, mpc_horizon: int, - num_rollouts: int, + batch_size: int, NU: float, SQRTRHOINV: float, GAMMA: float, @@ -48,7 +48,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -110,7 +110,7 @@ def reward_weighted_average(self, S, delta_u): def inizialize_pertubation(self, random_gen): stdev = self.SQRTRHODTINV delta_u = random_gen.normal( - [self.num_rollouts, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], + [self.batch_size, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], dtype=tf.float32) * stdev delta_u = self.Interpolator.interpolate(delta_u) return delta_u @@ -119,7 +119,7 @@ def inizialize_pertubation(self, random_gen): def mppi_prior(self, s, u_nom, random_gen, u_old): # generate random input sequence and clip to control limits delta_u = self.inizialize_pertubation(random_gen) - u_run = tf.tile(u_nom, [self.num_rollouts, 1, 1]) + delta_u + u_run = tf.tile(u_nom, [self.batch_size, 1, 1]) + delta_u u_run = tf.clip_by_value(u_run, self.action_low, self.action_high) #predict trajectories rollout_trajectory = self.predictor.predict_tf(s, u_run) @@ -157,7 +157,7 @@ def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} # tile inital state and convert inputs to tensorflow tensors - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) #first retrieve suboptimal control sequence with mppi diff --git a/Optimizers/optimizer_mppi_var_tf.py b/Optimizers/optimizer_mppi_var_tf.py index 5c4410e..4417b73 100644 --- a/Optimizers/optimizer_mppi_var_tf.py +++ b/Optimizers/optimizer_mppi_var_tf.py @@ -28,7 +28,7 @@ def __init__( R: float, LBD_mc: float, mpc_horizon: int, - num_rollouts: int, + batch_size: int, NU_mc: float, SQRTRHOINV_mc: float, GAMMA: float, @@ -47,7 +47,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -94,7 +94,7 @@ def reward_weighted_average(self, S, delta_u): #initialize the pertubations def inizialize_pertubation(self, random_gen, nuvec): - delta_u = random_gen.normal([self.num_rollouts, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], dtype=tf.float32) * nuvec * self.SQRTRHODTINV + delta_u = random_gen.normal([self.batch_size, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], dtype=tf.float32) * nuvec * self.SQRTRHODTINV delta_u = self.Interpolator.interpolate(delta_u) return delta_u @@ -105,7 +105,7 @@ def do_step(self, s, u_nom, random_gen, u_old, nuvec): tape.watch(nuvec) #watch variances on tape delta_u = self.inizialize_pertubation(random_gen, nuvec) #initialize pertubations #build real input and clip, preserving gradient - u_run = tf.tile(u_nom, [self.num_rollouts, 1, 1]) + delta_u + u_run = tf.tile(u_nom, [self.batch_size, 1, 1]) + delta_u u_run = tfp.math.clip_by_value_preserve_gradient(u_run, self.action_low, self.action_high) #rollout and cost rollout_trajectory = self.predictor.predict_tf(s, u_run) @@ -131,7 +131,7 @@ def do_step(self, s, u_nom, random_gen, u_old, nuvec): def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) self.u, self.u_nom, new_nuvec, u_run, traj_cost = self.do_step(s, self.u_nom, self.rng, self.u, self.nuvec) diff --git a/Optimizers/optimizer_random_action_tf.py b/Optimizers/optimizer_random_action_tf.py index b7f18e2..7055d93 100644 --- a/Optimizers/optimizer_random_action_tf.py +++ b/Optimizers/optimizer_random_action_tf.py @@ -22,7 +22,7 @@ def __init__( computation_library: "type[ComputationLibrary]", seed: int, mpc_horizon: int, - num_rollouts: int, + batch_size: int, optimizer_logging: bool, ): super().__init__( @@ -33,7 +33,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -54,11 +54,11 @@ def step(self, s: np.ndarray, time=None): if self.optimizer_logging: self.logging_values = {"s_logged": s.copy()} # Start all trajectories in current state - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) Q = self.rng.uniform( - shape=[self.num_rollouts, self.mpc_horizon, self.num_control_inputs], + shape=[self.batch_size, self.mpc_horizon, self.num_control_inputs], minval=self.action_low, maxval=self.action_high, dtype=tf.float32, @@ -82,7 +82,7 @@ def step(self, s: np.ndarray, time=None): def optimizer_reset(self): # generate random input sequence and clip to control limits Q = self.rng.uniform( - shape=[self.num_rollouts, self.mpc_horizon, self.num_control_inputs], + shape=[self.batch_size, self.mpc_horizon, self.num_control_inputs], minval=self.action_low, maxval=self.action_high, dtype=tf.float32, diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py index c11ae3e..233a4bb 100644 --- a/Optimizers/optimizer_rpgd_me_tf.py +++ b/Optimizers/optimizer_rpgd_me_tf.py @@ -31,7 +31,7 @@ def __init__( computation_library: "type[ComputationLibrary]", seed: int, mpc_horizon: int, - num_rollouts: int, + batch_size: int, outer_its: int, sample_stdev: float, resamp_per: int, @@ -56,7 +56,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -200,7 +200,7 @@ def step(self, s: np.ndarray, time=None): self.logging_values = {"s_logged": s.copy()} # tile inital state and convert inputs to tensorflow tensors - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) # warm start setup @@ -254,13 +254,13 @@ def step(self, s: np.ndarray, time=None): if self.count % self.resamp_per == 0: # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories epsilon_res = self.sample_actions( - self.rng, self.num_rollouts - self.opt_keep_k + self.rng, self.batch_size - self.opt_keep_k ) epsilon_keep = tf.gather(self.epsilon, best_idx, axis=0) # resorting according to costs self.epsilon = tf.concat([epsilon_res, epsilon_keep], axis=0) self.trajectory_ages = tf.concat([ tf.gather(self.trajectory_ages, best_idx, axis=0), - tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32) + tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32) ], axis=0) # Updating the weights of adam: # For the trajectories which are kept, the weights are shifted for a warmstart @@ -317,10 +317,10 @@ def optimizer_reset(self): self.alpha = tf.constant(self.maximum_entropy_alpha, dtype=tf.float32) # Sample new initial guesses for trajectories - self.epsilon = self.sample_actions(self.rng, self.num_rollouts) + self.epsilon = self.sample_actions(self.rng, self.batch_size) self.count = 0 # Reset optimizer adam_weights = self.opt.get_weights() self.opt.set_weights([tf.zeros_like(el) for el in adam_weights]) - self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32) + self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32) diff --git a/Optimizers/optimizer_rpgd_ml_tf.py b/Optimizers/optimizer_rpgd_ml_tf.py index 515b083..fcc4374 100644 --- a/Optimizers/optimizer_rpgd_ml_tf.py +++ b/Optimizers/optimizer_rpgd_ml_tf.py @@ -31,7 +31,7 @@ def __init__( computation_library: "type[ComputationLibrary]", seed: int, mpc_horizon: int, - num_rollouts: int, + batch_size: int, outer_its: int, sample_stdev: float, resamp_per: int, @@ -57,7 +57,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -229,7 +229,7 @@ def step(self, s: np.ndarray, time=None): self.logging_values = {"s_logged": s.copy()} # tile inital state and convert inputs to tensorflow tensors - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) # warm start setup @@ -286,12 +286,12 @@ def step(self, s: np.ndarray, time=None): adam_weights = self.opt.get_weights() if self.count % self.resamp_per == 0: # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories - epsilon_resampled = self.sample_actions(self.rng, self.num_rollouts - self.opt_keep_k) + epsilon_resampled = self.sample_actions(self.rng, self.batch_size - self.opt_keep_k) epsilon_retained = tf.gather(self.epsilon, best_idx, axis=0) # resorting according to costs self.epsilon = tf.concat([epsilon_resampled, epsilon_retained], axis=0) self.trajectory_ages = tf.concat([ tf.gather(self.trajectory_ages, best_idx, axis=0), - tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32) + tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32) ], axis=0) # Updating the weights of Adam: @@ -300,14 +300,14 @@ def step(self, s: np.ndarray, time=None): w1 = tf.concat( [ adam_weights[1][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) w2 = tf.concat( [ adam_weights[2][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) @@ -334,7 +334,7 @@ def optimizer_reset(self): else: self.theta = tf.Variable(initial_theta, dtype=tf.float32) - initial_Q = tf.zeros([self.num_rollouts, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32) + initial_Q = tf.zeros([self.batch_size, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32) if hasattr(self, "Q"): self.Q.assign(initial_Q) else: @@ -343,10 +343,10 @@ def optimizer_reset(self): self.alpha = tf.constant(self.maximum_entropy_alpha, dtype=tf.float32) # Sample new initial guesses for trajectories - self.epsilon = self.sample_actions(self.rng, self.num_rollouts) + self.epsilon = self.sample_actions(self.rng, self.batch_size) self.count = 0 # Reset optimizer adam_weights = self.opt.get_weights() self.opt.set_weights([tf.zeros_like(el) for el in adam_weights]) - self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32) + self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32) diff --git a/Optimizers/optimizer_rpgd_particle_tf.py b/Optimizers/optimizer_rpgd_particle_tf.py index 4585fe4..f7a3749 100644 --- a/Optimizers/optimizer_rpgd_particle_tf.py +++ b/Optimizers/optimizer_rpgd_particle_tf.py @@ -25,7 +25,7 @@ def __init__( computation_library: "type[ComputationLibrary]", seed: int, mpc_horizon: int, - num_rollouts: int, + batch_size: int, outer_its: int, sample_stdev: float, resamp_per: int, @@ -49,7 +49,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -164,7 +164,7 @@ def step(self, s: np.ndarray, time=None): self.logging_values = {"s_logged": s.copy()} # tile inital state and convert inputs to tensorflow tensors - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) # warm start setup @@ -214,13 +214,13 @@ def step(self, s: np.ndarray, time=None): if self.count % self.resamp_per == 0: # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories Qres = self.sample_actions( - self.rng, self.num_rollouts - self.opt_keep_k + self.rng, self.batch_size - self.opt_keep_k ) Q_keep = tf.gather(Qn, best_idx) # resorting according to costs Qn = tf.concat([Qres, Q_keep], axis=0) self.trajectory_ages = tf.concat([ tf.gather(self.trajectory_ages, best_idx), - tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32) + tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32) ], axis=0) # Updating the weights of adam: # For the trajectories which are kept, the weights are shifted for a warmstart @@ -242,14 +242,14 @@ def step(self, s: np.ndarray, time=None): # For the new trajectories they are reset to 0 w1 = tf.zeros( [ - self.num_rollouts - self.opt_keep_k, + self.batch_size - self.opt_keep_k, self.mpc_horizon, self.num_control_inputs, ] ) w2 = tf.zeros( [ - self.num_rollouts - self.opt_keep_k, + self.batch_size - self.opt_keep_k, self.mpc_horizon, self.num_control_inputs, ] @@ -264,14 +264,14 @@ def step(self, s: np.ndarray, time=None): w1 = tf.concat( [ adam_weights[1][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) w2 = tf.concat( [ adam_weights[2][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) @@ -294,7 +294,7 @@ def optimizer_reset(self): # # end of unnecessary part # sample new initial guesses for trajectories - Qn = self.sample_actions(self.rng, self.num_rollouts) + Qn = self.sample_actions(self.rng, self.batch_size) if hasattr(self, "Q_tf"): self.Q_tf.assign(Qn) else: @@ -304,4 +304,4 @@ def optimizer_reset(self): # reset optimizer adam_weights = self.opt.get_weights() self.opt.set_weights([tf.zeros_like(el) for el in adam_weights]) - self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32) + self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32) diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py index 20eaf56..36143b8 100644 --- a/Optimizers/optimizer_rpgd_tf.py +++ b/Optimizers/optimizer_rpgd_tf.py @@ -25,7 +25,7 @@ def __init__( computation_library: "type[ComputationLibrary]", seed: int, mpc_horizon: int, - num_rollouts: int, + batch_size: int, outer_its: int, sample_stdev: float, resamp_per: int, @@ -49,7 +49,7 @@ def __init__( control_limits=control_limits, optimizer_logging=optimizer_logging, seed=seed, - num_rollouts=num_rollouts, + batch_size=batch_size, mpc_horizon=mpc_horizon, computation_library=computation_library, ) @@ -166,7 +166,7 @@ def step(self, s: np.ndarray, time=None): self.logging_values = {"s_logged": s.copy()} # tile inital state and convert inputs to tensorflow tensors - s = np.tile(s, tf.constant([self.num_rollouts, 1])) + s = np.tile(s, tf.constant([self.batch_size, 1])) s = tf.convert_to_tensor(s, dtype=tf.float32) # warm start setup @@ -216,13 +216,13 @@ def step(self, s: np.ndarray, time=None): if self.count % self.resamp_per == 0: # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories Qres = self.sample_actions( - self.rng, self.num_rollouts - self.opt_keep_k + self.rng, self.batch_size - self.opt_keep_k ) Q_keep = tf.gather(Qn, best_idx) # resorting according to costs Qn = tf.concat([Qres, Q_keep], axis=0) self.trajectory_ages = tf.concat([ tf.gather(self.trajectory_ages, best_idx), - tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32) + tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32) ], axis=0) # Updating the weights of adam: # For the trajectories which are kept, the weights are shifted for a warmstart @@ -244,14 +244,14 @@ def step(self, s: np.ndarray, time=None): # For the new trajectories they are reset to 0 w1 = tf.zeros( [ - self.num_rollouts - self.opt_keep_k, + self.batch_size - self.opt_keep_k, self.mpc_horizon, self.num_control_inputs, ] ) w2 = tf.zeros( [ - self.num_rollouts - self.opt_keep_k, + self.batch_size - self.opt_keep_k, self.mpc_horizon, self.num_control_inputs, ] @@ -266,14 +266,14 @@ def step(self, s: np.ndarray, time=None): w1 = tf.concat( [ adam_weights[1][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) w2 = tf.concat( [ adam_weights[2][:, 1:, :], - tf.zeros([self.num_rollouts, 1, self.num_control_inputs]), + tf.zeros([self.batch_size, 1, self.num_control_inputs]), ], axis=1, ) @@ -296,7 +296,7 @@ def optimizer_reset(self): # # end of unnecessary part # sample new initial guesses for trajectories - Qn = self.sample_actions(self.rng, self.num_rollouts) + Qn = self.sample_actions(self.rng, self.batch_size) if hasattr(self, "Q_tf"): self.Q_tf.assign(Qn) else: @@ -306,4 +306,4 @@ def optimizer_reset(self): # reset optimizer adam_weights = self.opt.get_weights() self.opt.set_weights([tf.zeros_like(el) for el in adam_weights]) - self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32) + self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32) diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index edbc331..beedf15 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -11,7 +11,7 @@ from numpy.random import SFC64, Generator from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary -LOGGING_LEVEL = logging.INFO +LOGGING_LEVEL = logging.DEBUG # usually INFO is good class CustomFormatter(logging.Formatter): """Logging Formatter to add colors and count warning / errors""" # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762 From 64148a325befc3bd3c18500344f3409c413ba0b3 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Mon, 12 Dec 2022 08:52:34 +0100 Subject: [PATCH 07/38] now spin and balance both work! and so does changing the policy and changing the cost weights during runtime! Yay, finally! Trick was to make sure that all values are actually propagated to tf variables, e.g. numpy arrays, ints, string, not just float Changed logging format to start with level for better readability. added string type to computation_library.py --- Controllers/cartpole_trajectory_generator.py | 27 +++++++++++++------- Controllers/controller_mpc.py | 2 +- others/globals_and_utils.py | 6 ++--- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py index a00559f..74595eb 100644 --- a/Controllers/cartpole_trajectory_generator.py +++ b/Controllers/cartpole_trajectory_generator.py @@ -1,13 +1,16 @@ # stub for genrating desired future trajectory of cartpole import numpy +import numpy as np from torch import TensorType from Control_Toolkit.Controllers import template_controller +from Control_Toolkit.others.globals_and_utils import get_logger from SI_Toolkit.computation_library import ComputationLibrary from CartPole import state_utilities period=1 +log=get_logger(__name__) class cartpole_trajectory_generator: """ Generates target state trajectory for the cartpole """ @@ -34,22 +37,28 @@ def step(self, time: float, horizon: int) -> TensorType: target_angleD=-target_angleD traj=numpy.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable traj[:]=self.lib.nan # set all states undetermined - aim='balance' - if aim=='spin': + policy=self.controller.cost_function_wrapper.cost_function.policy + if policy is None: + raise RuntimeError(f'set policy in config_cost_functions.yml') + + if policy == 'spin': traj[state_utilities.POSITION_IDX] = self.controller.target_position # traj[state_utilities.ANGLE_COS_IDX, :] = self.controller.target_equilibrium # traj[state_utilities.ANGLE_SIN_IDX, :] = 0 # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * self.controller.target_equilibrium traj[state_utilities.ANGLED_IDX, :] = 1000*self.controller.target_equilibrium # traj[state_utilities.POSITIOND_IDX, :] = 0 - elif aim=='balance': + elif policy == 'balance': traj[state_utilities.POSITION_IDX] = self.controller.target_position - # traj[state_utilities.ANGLE_COS_IDX, :] = self.controller.target_equilibrium - # traj[state_utilities.ANGLE_SIN_IDX, :] = 0 - traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * (1-self.controller.target_equilibrium)/2 # either 0 for up and pi for down - # traj[state_utilities.ANGLED_IDX, :] = 0 - # traj[state_utilities.POSITIOND_IDX, :] = 0 + target_angle=self.lib.pi * (1-self.controller.target_equilibrium)/2 # either 0 for up and pi for down + traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) + traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) + traj[state_utilities.ANGLE_IDX, :] = target_angle + traj[state_utilities.ANGLED_IDX, :] = 0 + traj[state_utilities.POSITIOND_IDX, :] = 0 + else: + log.error(f'cost policy "{policy}" is unknown') - traj=self.lib.to_variable(traj, self.lib.float32) + # traj=self.lib.to_variable(traj, self.lib.float32) return traj diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index d85f42c..589c18d 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -93,7 +93,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # process changes to configs using new returned change list if not changes is None: for k,v in changes.items(): - if isinstance(v, (int, float)): + if isinstance(v, (int, float, str)): updated_attributes[k]=v for o in objs: # for each object in objs, update its attributes update_attributes(updated_attributes,o) diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index beedf15..b6dcb2a 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -24,13 +24,13 @@ class CustomFormatter(logging.Formatter): bold_red = "\x1b[31;1m" reset = "\x1b[0m" # File "{file}", line {max(line, 1)}'.replace("\\", "/") - format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' + format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' FORMATS = { logging.DEBUG: grey + format + reset, logging.INFO: cyan + format + reset, - logging.WARNING: yellow + format + reset, - logging.ERROR: red + format + reset, + logging.WARNING: red + format + reset, + logging.ERROR: bold_red + format + reset, logging.CRITICAL: bold_red + format + reset } From fef38b083ed2744128f47cc6057ef5f587d2fb0d Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Mon, 12 Dec 2022 09:52:36 +0100 Subject: [PATCH 08/38] got basic shimmy movement to work now. added helper vars to access config values in cartpole_trajectory_generator.py. added dt to params in cartpole_trajectory_generator.step() call. --- Controllers/cartpole_trajectory_generator.py | 48 ++++++++++++++------ Controllers/controller_mpc.py | 14 +++--- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py index 74595eb..f7e7114 100644 --- a/Controllers/cartpole_trajectory_generator.py +++ b/Controllers/cartpole_trajectory_generator.py @@ -23,39 +23,57 @@ def __init__(self, lib:ComputationLibrary, controller:template_controller=None): self.lib = lib self.controller:template_controller=controller - def step(self, time: float, horizon: int) -> TensorType: + def step(self, time: float, horizon: int, dt:float) -> TensorType: """ Computes the desired future state trajectory at this time. :param time: the scalar time in seconds + :param horizon: the number of horizon steps + :param dt: the timestep in seconds :returns: the target state trajectory of cartpole. - It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values. + It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired state values. """ - target_angleD=1000 - if time%period Date: Mon, 12 Dec 2022 12:12:04 +0100 Subject: [PATCH 09/38] added cartonly trajectory and fixed bug that erased the target position and equilibrium from updated attributes before they were updated in controller --- Controllers/cartpole_trajectory_generator.py | 52 +++++++++++++------- Controllers/controller_mpc.py | 1 - 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py index f7e7114..0929e89 100644 --- a/Controllers/cartpole_trajectory_generator.py +++ b/Controllers/cartpole_trajectory_generator.py @@ -38,42 +38,60 @@ def step(self, time: float, horizon: int, dt:float) -> TensorType: traj=np.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable traj[:]=self.lib.nan # set all states undetermined - cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml - controller=self.controller # use controller to access attributes set in config_optimizers + # cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml + # controller=self.controller # use controller to access attributes set in config_optimizers - policy=cost_function.policy + policy=self.controller.cost_function_wrapper.cost_function.policy if policy is None: - raise RuntimeError(f'set policy in config_cost_functions.yml') + raise RuntimeError(f'set policy in config_self.controller.cost_function_wrapper.cost_functions.yml') + + gui_target_position=self.controller.target_position # GUI slider position + gui_target_equilibrium=self.controller.target_equilibrium # GUI switch +1 or -1 to make pole target up or down position if policy == 'spin': # spin pole CW or CCW depending on target_equilibrium up or down - traj[state_utilities.POSITION_IDX] = controller.target_position - # traj[state_utilities.ANGLE_COS_IDX, :] = controller.target_equilibrium + traj[state_utilities.POSITION_IDX] = gui_target_position + # traj[state_utilities.ANGLE_COS_IDX, :] = gui_target_equilibrium # traj[state_utilities.ANGLE_SIN_IDX, :] = 0 - # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * controller.target_equilibrium - traj[state_utilities.ANGLED_IDX, :] = 1000*controller.target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target + # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * gui_target_equilibrium + traj[state_utilities.ANGLED_IDX, :] = 1000*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target # traj[state_utilities.POSITIOND_IDX, :] = 0 elif policy == 'balance': # balance upright or down at desired cart position - traj[state_utilities.POSITION_IDX] = controller.target_position - target_angle=self.lib.pi * (1-controller.target_equilibrium)/2 # either 0 for up and pi for down + traj[state_utilities.POSITION_IDX] = gui_target_position + target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) traj[state_utilities.ANGLE_IDX, :] = target_angle traj[state_utilities.ANGLED_IDX, :] = 0 traj[state_utilities.POSITIOND_IDX, :] = 0 elif policy == 'shimmy': # cart follows a desired cart position shimmy while keeping pole up or down - shimmy_period=cost_function.shimmy_period # seconds - shimmy_amp=cost_function.shimmy_amp # meters + per=self.controller.cost_function_wrapper.cost_function.shimmy_per # seconds + amp=self.controller.cost_function_wrapper.cost_function.shimmy_amp # meters endtime=time+horizon*dt times=np.linspace(time,endtime,num=horizon) - shimmy=shimmy_amp*np.sin((2*np.pi/shimmy_period)*times) - shimmyd=np.gradient(shimmy,dt) - traj[state_utilities.POSITION_IDX] = controller.target_position+shimmy - target_angle=self.lib.pi * (1-controller.target_equilibrium)/2 # either 0 for up and pi for down + cartpos=amp*np.sin((2*np.pi/per)*times) + cartvel=np.gradient(cartpos,dt) + traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos + target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) traj[state_utilities.ANGLE_IDX, :] = target_angle # traj[state_utilities.ANGLED_IDX, :] = 0 - traj[state_utilities.POSITIOND_IDX, :] = shimmyd + traj[state_utilities.POSITIOND_IDX, :] = cartvel + elif policy == 'cartonly': # cart follows the trajectory, pole ignored + per=self.controller.cost_function_wrapper.cost_function.cartonly_per # seconds + amp=self.controller.cost_function_wrapper.cost_function.cartonly_amp # meters + endtime=time+horizon*dt + times=np.linspace(time,endtime,num=horizon) + from scipy.signal import sawtooth + cartpos=amp*sawtooth((2*np.pi/per)*times, width=.5) # .5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html + cartvel=np.gradient(cartpos,dt) + traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos + # target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down + # traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) + # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) + # traj[state_utilities.ANGLE_IDX, :] = target_angle + # traj[state_utilities.ANGLED_IDX, :] = 0 + traj[state_utilities.POSITIOND_IDX, :] = cartvel else: log.error(f'cost policy "{policy}" is unknown') diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index e0b8e28..1d12f80 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -81,7 +81,6 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # log.debug(f'step time={time:.3f}s') # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer - updated_attributes.clear() # detect any changes in config scalar values and pass to this controller or the cost function or optimizer # note that the cost function that has its attributes updated is the enclosed cost function of the wrapper! for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')): From 03fa3af90c0a6514dcd46722324283926ec5c4d0 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Mon, 12 Dec 2022 20:23:00 +0100 Subject: [PATCH 10/38] passing current state to cartpole_trajectory_generator.py so it can extrapolate it, and added option for using terminal cost or entire trajectory cost in cartpole_trajectory_cost.py. added more params to config_cost_functions.yml. fiddled with logging colors. changed yaml loader to one that can deal with scientific format numbers (ruamel.yaml) --- Controllers/cartpole_trajectory_generator.py | 19 +++++++++++++------ Controllers/controller_mpc.py | 5 ++++- Cost_Functions/cost_function_wrapper.py | 2 +- others/globals_and_utils.py | 4 +++- requirements.txt | 3 ++- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py index 0929e89..806a209 100644 --- a/Controllers/cartpole_trajectory_generator.py +++ b/Controllers/cartpole_trajectory_generator.py @@ -23,23 +23,24 @@ def __init__(self, lib:ComputationLibrary, controller:template_controller=None): self.lib = lib self.controller:template_controller=controller - def step(self, time: float, horizon: int, dt:float) -> TensorType: + def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorType: """ Computes the desired future state trajectory at this time. :param time: the scalar time in seconds :param horizon: the number of horizon steps :param dt: the timestep in seconds + :param state: the current state of the cartpole :returns: the target state trajectory of cartpole. - It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired state values. + It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired future state values. """ traj=np.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable traj[:]=self.lib.nan # set all states undetermined - # cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml - # controller=self.controller # use controller to access attributes set in config_optimizers + cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml + controller=self.controller # use controller to access attributes set in config_optimizers policy=self.controller.cost_function_wrapper.cost_function.policy if policy is None: @@ -52,8 +53,14 @@ def step(self, time: float, horizon: int, dt:float) -> TensorType: traj[state_utilities.POSITION_IDX] = gui_target_position # traj[state_utilities.ANGLE_COS_IDX, :] = gui_target_equilibrium # traj[state_utilities.ANGLE_SIN_IDX, :] = 0 - # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * gui_target_equilibrium - traj[state_utilities.ANGLED_IDX, :] = 1000*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target + endtime=horizon*dt + times=np.linspace(0,endtime,num=horizon) + s_per_rev_target=cost_function.spin_rev_period_sec + rad_per_s_target=2*np.pi/s_per_rev_target + rad_per_dt=rad_per_s_target*dt + current_angle=state[state_utilities.ANGLE_IDX] + traj[state_utilities.ANGLE_IDX, :] = current_angle+gui_target_equilibrium*times*rad_per_dt + # traj[state_utilities.ANGLED_IDX, :] = rad_per_s_target*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target # traj[state_utilities.POSITIOND_IDX, :] = 0 elif policy == 'balance': # balance upright or down at desired cart position traj[state_utilities.POSITION_IDX] = gui_target_position diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 1d12f80..d1ace9b 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -96,7 +96,10 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # following gets target_position, target_equilibrium, and target_trajectory passed to tensorflow. The trajectory is passed in # as updated_attributes, and is transferred to tensorflow by the update_attributes call - new_target_trajectory = self.target_trajectory_generator.step(time=time, horizon=self.optimizer.mpc_horizon, dt=gui_default_params.controller_update_interval) + new_target_trajectory = self.target_trajectory_generator.step(time=time, + horizon=self.optimizer.mpc_horizon, + dt=gui_default_params.controller_update_interval, + state=s) updated_attributes['target_trajectory'] = new_target_trajectory update_attributes(updated_attributes,self) diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index 39b6234..4e2a9a4 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -55,7 +55,7 @@ def configure( config=self.cost_function_config['CartPole'][self.cost_function_name] # todo hardcoded 'CartPole' has to go, not sure how to determine it, maybe from module folder? update_attributes(config, self.cost_function) - log.info(f'configured controller {controller.__class__} with cost function {self.cost_function.__class__}') + log.info(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}') def update_cost_function_name_from_specification( self, environment_name: str, cost_function_specification: str = None diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index b6dcb2a..4270e56 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -22,12 +22,14 @@ class CustomFormatter(logging.Formatter): green = "\x1b[31;21m" # dark green red = "\x1b[31;21m" bold_red = "\x1b[31;1m" + light_blue = "\x1b[1;36m" + blue = "\x1b[1;34m" reset = "\x1b[0m" # File "{file}", line {max(line, 1)}'.replace("\\", "/") format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' FORMATS = { - logging.DEBUG: grey + format + reset, + logging.DEBUG: light_blue + format + reset, logging.INFO: cyan + format + reset, logging.WARNING: red + format + reset, logging.ERROR: bold_red + format + reset, diff --git a/requirements.txt b/requirements.txt index 9fa1784..fdde05b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ numpy torch torchvision gym -dictdiffer \ No newline at end of file +dictdiffer +ruamel.yaml \ No newline at end of file From ab0f34e6177a87a42959b9459e0e6fd8c9d0a4d9 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Tue, 13 Dec 2022 08:14:58 +0100 Subject: [PATCH 11/38] added MPPI papers to docstring --- Optimizers/optimizer_mppi.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py index 51a96e3..38a3ed3 100644 --- a/Optimizers/optimizer_mppi.py +++ b/Optimizers/optimizer_mppi.py @@ -14,6 +14,17 @@ log = get_logger(__name__) class optimizer_mppi(template_optimizer): + """ Model Predictive Path Integral optimizer, based on + + Drews, Paul, Grady Williams, Brian Goldfain, Evangelos A. Theodorou, and James M. Rehg. 2017. “Aggressive Deep Driving: Model Predictive Control with a CNN Cost Model.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1707.05303. + + Williams, G., P. Drews, B. Goldfain, J. M. Rehg, and E. A. Theodorou. 2016. “Aggressive Driving with Model Predictive Path Integral Control.” In 2016 IEEE International Conference on Robotics and Automation (ICRA), 1433–40. https://doi.org/10.1109/ICRA.2016.7487277. + + Williams, Grady, Andrew Aldrich, and Evangelos A. Theodorou. 2017. “Model Predictive Path Integral Control: From Theory to Parallel Computation.” Journal of Guidance, Control, and Dynamics: A Publication of the American Institute of Aeronautics and Astronautics Devoted to the Technology of Dynamics and Control 40 (2): 344–57. https://doi.org/10.2514/1.G001921. + + Williams, Grady, Eric Rombokas, and Tom Daniel. 2015. “GPU Based Path Integral Control with Learned Dynamics.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1503.00330. + + """ supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary} def __init__( From 7ad7be12ccd2640e9c0644b0d48742dbf791d2d2 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Tue, 13 Dec 2022 09:10:12 +0100 Subject: [PATCH 12/38] added MPPI papers to docstring --- Optimizers/optimizer_mppi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py index 38a3ed3..c41b006 100644 --- a/Optimizers/optimizer_mppi.py +++ b/Optimizers/optimizer_mppi.py @@ -16,12 +16,16 @@ class optimizer_mppi(template_optimizer): """ Model Predictive Path Integral optimizer, based on - Drews, Paul, Grady Williams, Brian Goldfain, Evangelos A. Theodorou, and James M. Rehg. 2017. “Aggressive Deep Driving: Model Predictive Control with a CNN Cost Model.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1707.05303. + The equations and parameters are defined in the following: Williams, G., P. Drews, B. Goldfain, J. M. Rehg, and E. A. Theodorou. 2016. “Aggressive Driving with Model Predictive Path Integral Control.” In 2016 IEEE International Conference on Robotics and Automation (ICRA), 1433–40. https://doi.org/10.1109/ICRA.2016.7487277. + A longer paper with all the math is + Williams, Grady, Andrew Aldrich, and Evangelos A. Theodorou. 2017. “Model Predictive Path Integral Control: From Theory to Parallel Computation.” Journal of Guidance, Control, and Dynamics: A Publication of the American Institute of Aeronautics and Astronautics Devoted to the Technology of Dynamics and Control 40 (2): 344–57. https://doi.org/10.2514/1.G001921. + The following paper uses a LWPR model to fly crazyfly drones using another type of MPPI with minibatches of rollouts + Williams, Grady, Eric Rombokas, and Tom Daniel. 2015. “GPU Based Path Integral Control with Learned Dynamics.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1503.00330. """ From 86c8d6802176168d840c13079dd40423ccf85414 Mon Sep 17 00:00:00 2001 From: heetmeyer Date: Wed, 14 Dec 2022 15:46:13 +0100 Subject: [PATCH 13/38] Rename num_rollouts -> batch_size in config templates --- .../config_controllers.yml | 2 -- .../config_optimizers.yml | 28 +++++++++---------- Controllers/__init__.py | 5 ++-- Optimizers/optimizer_rpgd_me_tf.py | 2 +- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/Control_Toolkit_ASF_Template/config_controllers.yml b/Control_Toolkit_ASF_Template/config_controllers.yml index a603ba2..2d3df73 100644 --- a/Control_Toolkit_ASF_Template/config_controllers.yml +++ b/Control_Toolkit_ASF_Template/config_controllers.yml @@ -11,7 +11,6 @@ mpc: controller_logging: true do-mpc-discrete: mpc_horizon: 50 # steps - num_rollouts: 1 # Initial positions position_init: 0.0 positionD_init: 0.0 @@ -21,7 +20,6 @@ do-mpc-discrete: do-mpc: seed: null # If null, random seed based on datetime is used mpc_horizon: 50 # steps - num_rollouts: 1 p_Q: 0.00 # Perturbation factors: Change of output from optimal # Random change of cost function by factor p_position: 0.0 diff --git a/Control_Toolkit_ASF_Template/config_optimizers.yml b/Control_Toolkit_ASF_Template/config_optimizers.yml index d3571fe..485d06b 100644 --- a/Control_Toolkit_ASF_Template/config_optimizers.yml +++ b/Control_Toolkit_ASF_Template/config_optimizers.yml @@ -7,7 +7,7 @@ cem-tf: mpc_horizon: 40 # steps cem_outer_it: 3 #how many outer iterations to use cem_initial_action_stdev: 0.5 - num_rollouts: 200 #how many rollouts per outer cem iteration + batch_size: 200 #how many rollouts per outer cem iteration cem_stdev_min: 0.01 cem_best_k: 40 warmup: false @@ -16,7 +16,7 @@ cem-gmm-tf: seed: null # If null, random seed based on datetime is used mpc_horizon: 40 # steps cem_outer_it: 3 #how many outer iterations to use - num_rollouts: 200 #how many rollouts per outer cem iteration + batch_size: 200 #how many rollouts per outer cem iteration cem_stdev_min: 0.01 cem_initial_action_stdev: 0.5 cem_best_k: 40 @@ -24,7 +24,7 @@ cem-naive-grad-tf: seed: null # If null, random seed based on datetime is used mpc_horizon: 35 # steps cem_outer_it: 1 # how many outer iterations to use - num_rollouts: 200 # how many rollouts per outer cem iteration + batch_size: 200 # how many rollouts per outer cem iteration cem_stdev_min: 0.1 cem_initial_action_stdev: 0.5 cem_best_k: 40 @@ -37,7 +37,7 @@ cem-grad-bharadhwaj-tf: adam_beta_1: 0.9 adam_beta_2: 0.999 adam_epsilon: 1.0e-08 - num_rollouts: 32 + batch_size: 32 cem_best_k: 8 cem_outer_it: 2 cem_initial_action_stdev: 2 @@ -54,7 +54,7 @@ gradient-tf: adam_epsilon: 1.0e-07 rtol: 1.0e-3 gradient_steps: 5 - num_rollouts: 40 + batch_size: 40 initial_action_stdev: 0.5 gradmax_clip: 5 warmup: false @@ -67,7 +67,7 @@ mppi-optimize-tf: adam_epsilon: 1.0e-7 #default: 1.0e-7 gradmax_clip: 1000 mpc_horizon: 35 # steps - num_rollouts: 400 # Number of Monte Carlo samples + batch_size: 400 # Number of Monte Carlo samples cc_weight: 1.0 R: 1.0 # How much to punish Q LBD: 100.0 # Cost parameter lambda @@ -86,7 +86,7 @@ dist-adam-resamp2-tf: adam_epsilon: 1.0e-08 gradmax_clip: 5 rtol: 1.0e-3 - num_rollouts: 32 + batch_size: 32 opt_keep_k: 8 outer_its: 2 resamp_per: 10 @@ -103,7 +103,7 @@ rpgd-tf: adam_epsilon: 1.0e-08 gradmax_clip: 5 rtol: 1.0e-3 - num_rollouts: 32 + batch_size: 32 opt_keep_k: 8 outer_its: 2 resamp_per: 10 @@ -121,7 +121,7 @@ rpgd-me-tf: maximum_entropy_alpha: 0.0 gradmax_clip: 10 rtol: 1.0e-3 - num_rollouts: 32 + batch_size: 32 opt_keep_k: 0 outer_its: 25 resamp_per: 1 @@ -140,7 +140,7 @@ rpgd-ml-tf: maximum_entropy_alpha: 0.1 gradmax_clip: 10 rtol: 1.0e-3 - num_rollouts: 32 + batch_size: 32 opt_keep_k: 8 outer_its: 5 resamp_per: 1 @@ -157,7 +157,7 @@ rpgd-particle-tf: adam_epsilon: 1.0e-08 gradmax_clip: 10 rtol: 1.0e-3 - num_rollouts: 32 + batch_size: 32 opt_keep_k: 8 outer_its: 5 resamp_per: 1 @@ -167,7 +167,7 @@ rpgd-particle-tf: mppi-var-tf: seed: null # If null, random seed based on datetime is used mpc_horizon: 35 # steps - num_rollouts: 400 # Number of Monte Carlo samples + batch_size: 400 # Number of Monte Carlo samples period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated cc_weight: 1.0 R: 1.0 # How much to punish Q @@ -183,7 +183,7 @@ mppi-var-tf: mppi: seed: null # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!) mpc_horizon: 35 # steps - num_rollouts: 3500 # Number of Monte Carlo samples + batch_size: 3500 # Number of Monte Carlo samples cc_weight: 1.0 R: 1.0 # How much to punish Q LBD: 100.0 # Cost parameter lambda @@ -194,4 +194,4 @@ mppi: random-action-tf: seed: null # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!) mpc_horizon: 35 # steps - num_rollouts: 320 + batch_size: 320 diff --git a/Controllers/__init__.py b/Controllers/__init__.py index a447167..64fa78f 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -8,9 +8,8 @@ from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType) -from others.globals_and_utils import load_or_reload_config_if_modified +from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes -config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) logger = get_logger(__name__) """ @@ -126,7 +125,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[Te """ ### Any computations in order to retrieve the current control. Such as: ## If the environment's target positions etc. change, copy the new attributes over to this controller so the cost function knows about it: - # self.update_attributes(updated_attributes) + # update_attributes(updated_attributes,self) ## Use some sort of optimization procedure to get your control, e.g. # u = self.optimizer.step(s, time) ## Use the following call to populate the self.logs dictionary with savevars, such as: diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py index 233a4bb..0f3b44b 100644 --- a/Optimizers/optimizer_rpgd_me_tf.py +++ b/Optimizers/optimizer_rpgd_me_tf.py @@ -139,7 +139,7 @@ def grad_step( ): # rollout trajectories and retrieve cost with tf.GradientTape(watch_accessed_variables=False) as tape: - # theta = tf.tile(tf.expand_dims(theta, 0), (self.num_rollouts, 1)) + # theta = tf.tile(tf.expand_dims(theta, 0), (self.batch_size, 1)) tape.watch(theta) Q = self.zeta(theta, epsilon) traj_cost, _ = self.predict_and_cost(s, Q) From 3ade5d072169630afe74117d9dfe4b5fc3cf7b46 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sun, 18 Dec 2022 11:38:08 +0100 Subject: [PATCH 14/38] add comment --- Controllers/controller_mpc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index d1ace9b..d607df1 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -84,7 +84,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # detect any changes in config scalar values and pass to this controller or the cost function or optimizer # note that the cost function that has its attributes updated is the enclosed cost function of the wrapper! for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')): - (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config)) + (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config)) # all the config files are currently assumed to be in Control_Toolkit_ASF folder # process changes to configs using new returned change list if not changes is None: for k,v in changes.items(): From 56d3e516e0830dbcdb4f4528870c55dcabe55c8f Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sun, 18 Dec 2022 11:41:07 +0100 Subject: [PATCH 15/38] local changes, all minor except for trajectory cost that is in flux --- Controllers/cartpole_trajectory_generator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py index 806a209..ed20c12 100644 --- a/Controllers/cartpole_trajectory_generator.py +++ b/Controllers/cartpole_trajectory_generator.py @@ -66,8 +66,8 @@ def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorT traj[state_utilities.POSITION_IDX] = gui_target_position target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) - traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) - traj[state_utilities.ANGLE_IDX, :] = target_angle + # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) + # traj[state_utilities.ANGLE_IDX, :] = target_angle traj[state_utilities.ANGLED_IDX, :] = 0 traj[state_utilities.POSITIOND_IDX, :] = 0 elif policy == 'shimmy': # cart follows a desired cart position shimmy while keeping pole up or down @@ -90,7 +90,7 @@ def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorT endtime=time+horizon*dt times=np.linspace(time,endtime,num=horizon) from scipy.signal import sawtooth - cartpos=amp*sawtooth((2*np.pi/per)*times, width=.5) # .5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html + cartpos=amp*sawtooth((2*np.pi/per)*times, width=cost_function.cartonly_duty_cycle) # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html cartvel=np.gradient(cartpos,dt) traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos # target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down From 67ba59997d4b6ce823f533de416c077a2d494793 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sat, 24 Dec 2022 07:42:53 -0800 Subject: [PATCH 16/38] renamed s to state for clariy in many of the classes. added prefs to cartpole gui for initial position and angle increased num_rollouts to 1000, decreased horizon to 25 time steps, changed cost function for spin to pure cart angle speed. Now spin, balance, and shimmy all work quite well. moved cartpole_trajectory_generator back out outside compiled TF code for ease of development and debugging, it is cheap so this is OK. added more logging of what gets compiled. added time to arguments for the base controller so that contained methods can access current time. added ruamel.yaml to requirements (this is the yaml parser that can handle scientific notation numbers) --- .../Controllers/controller_barebone.py | 2 +- .../Controllers/controller_do_mpc.py | 12 +- .../Controllers/controller_do_mpc_discrete.py | 12 +- .../Controllers/controller_lqr.py | 4 +- Controllers/__init__.py | 4 +- Controllers/cartpole_trajectory_generator.py | 107 ------------------ Controllers/controller_mpc.py | 45 +++++--- .../controller_neural_imitator_pytorch.py | 4 +- Controllers/controller_neural_imitator_tf.py | 4 +- Cost_Functions/__init__.py | 11 +- Cost_Functions/cost_function_wrapper.py | 5 +- Optimizers/optimizer_mppi.py | 62 ++++++---- others/globals_and_utils.py | 4 +- 13 files changed, 103 insertions(+), 173 deletions(-) delete mode 100644 Controllers/cartpole_trajectory_generator.py diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py index 29c6add..b25092c 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py @@ -32,7 +32,7 @@ def configure(self): # u = 0.0 pass - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): # The controller has to adapt when environment-related attributes such as target positions change # Updated targets etc. are passed as a dictionary updated_attributes diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py index 7d01b1e..c2431fa 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py @@ -136,16 +136,16 @@ def tvp_fun(self, t_ind): return self.tvp_template - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): update_attributes(updated_attributes,self) - s = cartpole_state_vector_to_namespace(s) + state = cartpole_state_vector_to_namespace(state) - self.x0['s.position'] = s.position - self.x0['s.positionD'] = s.positionD + self.x0['s.position'] = state.position + self.x0['s.positionD'] = state.positionD - self.x0['s.angle'] = s.angle - self.x0['s.angleD'] = s.angleD + self.x0['s.angle'] = state.angle + self.x0['s.angleD'] = state.angleD self.tvp_template['_tvp', :, 'target_position'] = self.target_position diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py index c2e5171..5e23315 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py @@ -148,16 +148,16 @@ def configure(self): def tvp_fun(self, t_ind): return self.tvp_template - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): update_attributes(updated_attributes,self) - s = cartpole_state_vector_to_namespace(s) + state = cartpole_state_vector_to_namespace(state) - self.x0['s.position'] = s.position - self.x0['s.positionD'] = s.positionD + self.x0['s.position'] = state.position + self.x0['s.positionD'] = state.positionD - self.x0['s.angle'] = s.angle - self.x0['s.angleD'] = s.angleD + self.x0['s.angle'] = state.angle + self.x0['s.angleD'] = state.angleD self.tvp_template['_tvp', :, 'target_position'] = self.target_position diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py index 6aef009..3b895a5 100644 --- a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py +++ b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py @@ -80,11 +80,11 @@ def configure(self): self.X = X self.eigVals = eigVals - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): update_attributes(updated_attributes,self) state = np.array( - [[s[POSITION_IDX] - self.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]]) + [[state[POSITION_IDX] - self.target_position], [state[POSITIOND_IDX]], [state[ANGLE_IDX]], [state[ANGLED_IDX]]]) Q = np.dot(-self.K, state).item() diff --git a/Controllers/__init__.py b/Controllers/__init__.py index 64fa78f..99ed1b1 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -112,11 +112,11 @@ def configure(self, **kwargs): pass @abstractmethod - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()): + def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()): """ Execute one timestep of control. - :param s: the state array, dimensions are TODO add dimension to help users + :param state: the state array, dimensions are TODO add dimension to help users :param time: the time in seconds :param updated_attributes: dict of updated attributes diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py deleted file mode 100644 index ed20c12..0000000 --- a/Controllers/cartpole_trajectory_generator.py +++ /dev/null @@ -1,107 +0,0 @@ -# stub for genrating desired future trajectory of cartpole -import numpy -import numpy as np -from torch import TensorType - -from Control_Toolkit.Controllers import template_controller -from Control_Toolkit.others.globals_and_utils import get_logger -from SI_Toolkit.computation_library import ComputationLibrary -from CartPole import state_utilities - -period=1 - -log=get_logger(__name__) - -class cartpole_trajectory_generator: - """ Generates target state trajectory for the cartpole """ - def __init__(self, lib:ComputationLibrary, controller:template_controller=None): - """ Construct the trajectory generator. - - :param lib: the computation library, e.g. tensorflow - :param horizon: the MPC horizon in timesteps - """ - self.lib = lib - self.controller:template_controller=controller - - def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorType: - """ Computes the desired future state trajectory at this time. - - :param time: the scalar time in seconds - :param horizon: the number of horizon steps - :param dt: the timestep in seconds - :param state: the current state of the cartpole - - :returns: the target state trajectory of cartpole. - It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired future state values. - - """ - - traj=np.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable - traj[:]=self.lib.nan # set all states undetermined - - cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml - controller=self.controller # use controller to access attributes set in config_optimizers - - policy=self.controller.cost_function_wrapper.cost_function.policy - if policy is None: - raise RuntimeError(f'set policy in config_self.controller.cost_function_wrapper.cost_functions.yml') - - gui_target_position=self.controller.target_position # GUI slider position - gui_target_equilibrium=self.controller.target_equilibrium # GUI switch +1 or -1 to make pole target up or down position - - if policy == 'spin': # spin pole CW or CCW depending on target_equilibrium up or down - traj[state_utilities.POSITION_IDX] = gui_target_position - # traj[state_utilities.ANGLE_COS_IDX, :] = gui_target_equilibrium - # traj[state_utilities.ANGLE_SIN_IDX, :] = 0 - endtime=horizon*dt - times=np.linspace(0,endtime,num=horizon) - s_per_rev_target=cost_function.spin_rev_period_sec - rad_per_s_target=2*np.pi/s_per_rev_target - rad_per_dt=rad_per_s_target*dt - current_angle=state[state_utilities.ANGLE_IDX] - traj[state_utilities.ANGLE_IDX, :] = current_angle+gui_target_equilibrium*times*rad_per_dt - # traj[state_utilities.ANGLED_IDX, :] = rad_per_s_target*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target - # traj[state_utilities.POSITIOND_IDX, :] = 0 - elif policy == 'balance': # balance upright or down at desired cart position - traj[state_utilities.POSITION_IDX] = gui_target_position - target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down - traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) - # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) - # traj[state_utilities.ANGLE_IDX, :] = target_angle - traj[state_utilities.ANGLED_IDX, :] = 0 - traj[state_utilities.POSITIOND_IDX, :] = 0 - elif policy == 'shimmy': # cart follows a desired cart position shimmy while keeping pole up or down - per=self.controller.cost_function_wrapper.cost_function.shimmy_per # seconds - amp=self.controller.cost_function_wrapper.cost_function.shimmy_amp # meters - endtime=time+horizon*dt - times=np.linspace(time,endtime,num=horizon) - cartpos=amp*np.sin((2*np.pi/per)*times) - cartvel=np.gradient(cartpos,dt) - traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos - target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down - traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) - traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) - traj[state_utilities.ANGLE_IDX, :] = target_angle - # traj[state_utilities.ANGLED_IDX, :] = 0 - traj[state_utilities.POSITIOND_IDX, :] = cartvel - elif policy == 'cartonly': # cart follows the trajectory, pole ignored - per=self.controller.cost_function_wrapper.cost_function.cartonly_per # seconds - amp=self.controller.cost_function_wrapper.cost_function.cartonly_amp # meters - endtime=time+horizon*dt - times=np.linspace(time,endtime,num=horizon) - from scipy.signal import sawtooth - cartpos=amp*sawtooth((2*np.pi/per)*times, width=cost_function.cartonly_duty_cycle) # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html - cartvel=np.gradient(cartpos,dt) - traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos - # target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down - # traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle) - # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) - # traj[state_utilities.ANGLE_IDX, :] = target_angle - # traj[state_utilities.ANGLED_IDX, :] = 0 - traj[state_utilities.POSITIOND_IDX, :] = cartvel - else: - log.error(f'cost policy "{policy}" is unknown') - - # traj=self.lib.to_variable(traj, self.lib.float32) - - return traj diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index d607df1..2a549f0 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -1,6 +1,7 @@ import os from typing import Optional +from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import generate_cartpole_trajectory from GUI import gui_default_params from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper @@ -15,7 +16,6 @@ from torch import inference_mode -from Control_Toolkit.Controllers.cartpole_trajectory_generator import cartpole_trajectory_generator from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader) @@ -68,21 +68,43 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: predictor_specification=predictor_specification ) - # make a target position trajectory generator - self.target_trajectory_generator = cartpole_trajectory_generator(lib=self.computation_library, controller=self) - if self.lib.lib == 'Pytorch': self.step = inference_mode()(self.step) else: self.step = self.step - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, TensorType]" = {}): + """ Compute one step of control. + + :param state: the current state as 1d state vector + :param time: the current time in seconds + :param updated_attributes: a dict of values to update tensorflow assignment values in the cost function + + :returns: the control vector (for cartpole, a scalar cart acceleration) + + """ # log.debug(f'step time={time:.3f}s') + + # following gets target_position, target_equilibrium, and target_trajectory passed to tensorflow. The trajectory is passed in + # as updated_attributes, and is transferred to tensorflow by the update_attributes call + cost_function=self.cost_function_wrapper.cost_function + update_attributes(updated_attributes,cost_function) # update target_position and target_equilibrium in cost function to use + updated_attributes.clear() + target_trajectory = generate_cartpole_trajectory(time=time, state=state,controller=self, cost_function=self.cost_function_wrapper.cost_function) + updated_attributes['target_trajectory']=target_trajectory + update_attributes(updated_attributes,cost_function) # update + updated_attributes.clear() + # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer # detect any changes in config scalar values and pass to this controller or the cost function or optimizer + # note that the cost function that has its attributes updated is the enclosed cost function of the wrapper! + # note the following mapping for config files + # config_controller.yml -> self (i.e. controller_mpc) + # config_cost_functions.yml -> self.cost_function_wrapper.cost_function + # config_optimizers.yml -> self.optimizer AND self.predictor_wrapper.predictor for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')): (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config)) # all the config files are currently assumed to be in Control_Toolkit_ASF folder # process changes to configs using new returned change list @@ -94,17 +116,8 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy update_attributes(updated_attributes,o) log.debug(f'updated {objs} with scalar updated_attributes {updated_attributes}') - # following gets target_position, target_equilibrium, and target_trajectory passed to tensorflow. The trajectory is passed in - # as updated_attributes, and is transferred to tensorflow by the update_attributes call - new_target_trajectory = self.target_trajectory_generator.step(time=time, - horizon=self.optimizer.mpc_horizon, - dt=gui_default_params.controller_update_interval, - state=s) - updated_attributes['target_trajectory'] = new_target_trajectory - update_attributes(updated_attributes,self) - - # log.info(f'targetposition={self.target_position}, equil={self.target_equilibrium}') - u = self.optimizer.step(s, time) + + u = self.optimizer.step(state, time) self.update_logs(self.optimizer.logging_values) return u diff --git a/Controllers/controller_neural_imitator_pytorch.py b/Controllers/controller_neural_imitator_pytorch.py index 56438a5..0e96ad5 100644 --- a/Controllers/controller_neural_imitator_pytorch.py +++ b/Controllers/controller_neural_imitator_pytorch.py @@ -45,10 +45,10 @@ def configure(self): self.net.reset() self.net.eval() - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): update_attributes(updated_attributes,self) - net_input = s[ + net_input = state[ ..., [STATE_INDICES.get(key) for key in self.net_info.inputs[:-1]] ] # -1 is a fix to exclude target position net_input = np.append(net_input, self.target_position) diff --git a/Controllers/controller_neural_imitator_tf.py b/Controllers/controller_neural_imitator_tf.py index 70855f1..20da62c 100644 --- a/Controllers/controller_neural_imitator_tf.py +++ b/Controllers/controller_neural_imitator_tf.py @@ -40,10 +40,10 @@ def configure(self): self.evaluate_net = CompileAdaptive(self._evaluate_net) - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): update_attributes(updated_attributes,self) - net_input = s[ + net_input = state[ ..., [STATE_INDICES.get(key) for key in self.net_info.inputs[:-1]] ] # -1 is a fix to exclude target position net_input = np.append(net_input, self.target_position) diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py index 3c4731c..a9ed600 100644 --- a/Cost_Functions/__init__.py +++ b/Cost_Functions/__init__.py @@ -38,7 +38,7 @@ def get_terminal_cost(self, terminal_states: TensorType) -> TensorType: """ raise NotImplementedError("To be implemented in subclass.") - def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType) -> TensorType: + def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType, time:float) -> TensorType: """Compute all stage costs of a batch of states and contol inputs. One "stage" is one step in the MPC horizon. @@ -53,9 +53,7 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: """ raise NotImplementedError("To be implemented in subclass.") - def get_trajectory_cost( - self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None - ) -> TensorType: + def get_trajectory_cost(self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, time:float=None) -> TensorType: """Helper function which computes a batch of the summed cost of a trajectory. Can be overwritten in a subclass, e.g. if weighted sum is required. The batch dimension is used to compute for multiple rollouts in parallel. @@ -66,10 +64,13 @@ def get_trajectory_cost( :type inputs: TensorType :param previous_input: The most recent actually applied control, defaults to None :type previous_input: TensorType, optional + :param time: the time in seconds + :type time: float + :return: The summed cost of the trajectory. Has shape [batch_size]. :rtype: TensorType """ - stage_cost = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input) # Select all but last state of the horizon + stage_cost = self.get_stage_cost(states=state_horizon[:, :-1, :], inputs=inputs, previous_input=previous_input, time=time) # Select all but last state of the horizon total_cost = self.lib.sum(stage_cost, 1) # Sum across the MPC horizon dimension total_cost = total_cost + self.get_terminal_cost(state_horizon[:, -1, :]) return total_cost diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index 4e2a9a4..aea21b3 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -78,10 +78,9 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: return self.cost_function.get_stage_cost(states, inputs, previous_input) def get_trajectory_cost( - self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None - ): + self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None, time:float=None): """Refer to :func:`the base cost function `""" - return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input) + return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input, time=time) def copy(self): """ diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py index c41b006..711f8b8 100644 --- a/Optimizers/optimizer_mppi.py +++ b/Optimizers/optimizer_mppi.py @@ -1,7 +1,8 @@ from typing import Tuple from Control_Toolkit.others.globals_and_utils import get_logger -from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary +from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary, \ + TensorType import numpy as np @@ -86,7 +87,6 @@ def __init__( self.Interpolator = Interpolator(self.mpc_horizon, period_interpolation_inducing_points, self.num_control_inputs, self.lib) # here the predictor, cost computer, and optimizer are compiled to native instrutions by tensorflow graphs and XLA JIT - # before this, we copy all the required configuration to these objects so that they can be later updated during runtime. self.predict_and_cost = CompileAdaptive(self._predict_and_cost) self.predict_optimal_trajectory = CompileAdaptive(self._predict_optimal_trajectory) @@ -115,13 +115,23 @@ def check_dimensions_s(self, s): return s #mppi correction - def mppi_correction_cost(self, u, delta_u): + def mppi_correction_cost(self, u, delta_u, time=None): return self.lib.sum(self.cc_weight * (0.5 * (1 - 1.0 / self.NU) * self.R * (delta_u ** 2) + self.R * u * delta_u + 0.5 * self.R * (u ** 2)), (1, 2)) #total cost of the trajectory - def get_mppi_trajectory_cost(self, state_horizon ,u, u_prev, delta_u): - total_cost = self.cost_function.get_trajectory_cost(state_horizon,u, u_prev) - mppi_correction_cost =self.mppi_correction_cost(u, delta_u) + def get_mppi_trajectory_cost(self, state_horizon ,u, u_prev, delta_u, time:float=None): + """ Compute the total trajectory costs for all the rollouts + + :param state_horizon: the states as [rollouts,timesteps,states] + :param u: the control as ??? TODO + :param u_prev: the previous control input + :param delta_u: change in control input, TODO passed in for efficiency? + :param time: the time in seconds + + :returns: the total mppi cost for each rollout, i.e. 1d-vector of costs per rollout + """ + total_cost = self.cost_function.get_trajectory_cost(state_horizon,u, u_prev,time=time) + mppi_correction_cost =self.mppi_correction_cost(u, delta_u, time=time) total_mppi_cost = total_cost +mppi_correction_cost return total_mppi_cost @@ -143,25 +153,28 @@ def inizialize_pertubation(self, random_gen): return delta_u - def _predict_and_cost(self, s, u_nom, random_gen, u_old): + def _predict_and_cost(self, state:TensorType, u_nom:TensorType, random_gen, u_old:TensorType, time:float=None): """ Predict dynamics and compute costs of trajectories - TODO add params and return - :param ?? + :param state: the current state of system, dimensions are [rollouts, timesteps, states] + :param u_nom: the nominal control input + :param random_gen: the random generator + :param u_old: previous control input + :param time: time in seconds - :returns: ?? + :returns: u, u_nom: the new control input TODO what are u and u_nom? """ - s = self.lib.tile(s, (self.batch_size, 1)) + state = self.lib.tile(state, (self.batch_size, 1)) # generate random input sequence and clip to control limits u_nom = self.lib.concat([u_nom[:, 1:, :], u_nom[:, -1:, :]], 1) delta_u = self.inizialize_pertubation(random_gen) u_run = self.lib.tile(u_nom, (self.batch_size, 1, 1)) + delta_u u_run = self.lib.clip(u_run, self.action_low, self.action_high) - rollout_trajectory = self.predictor.predict_tf(s, u_run) - traj_cost = self.get_mppi_trajectory_cost(rollout_trajectory, u_run, u_old, delta_u) + rollout_trajectory = self.predictor.predict_tf(state, u_run, time=time) + traj_cost = self.get_mppi_trajectory_cost(rollout_trajectory, u_run, u_old, delta_u, time=time) u_nom = self.lib.clip(u_nom + self.reward_weighted_average(traj_cost, delta_u), self.action_low, self.action_high) u = u_nom[0, 0, :] - self.update_internal_state(s, u_nom) + self.update_internal_state(state, u_nom) return self.mppi_output(u, u_nom, rollout_trajectory, traj_cost, u_run) def update_internal_state_of_RNN(self, s, u_nom): @@ -174,13 +187,22 @@ def _predict_optimal_trajectory(self, s, u_nom): return optimal_trajectory #step function to find control - def step(self, s: np.ndarray, time=None): + def step(self, state: np.ndarray, time=None): + """ Does one timestep of control + + :param state: the current state + :param time: the current time in seconds + + :returns: u, the new control input to system + """ if self.optimizer_logging: - self.logging_values = {"s_logged": s.copy()} - s = self.lib.to_tensor(s, self.lib.float32) - s = self.check_dimensions_s(s) + self.logging_values = {"s_logged": state.copy()} + state = self.lib.to_tensor(state, self.lib.float32) + state = self.check_dimensions_s(state) + + tf_time=self.lib.to_tensor(time,self.lib.float32) # must pass scalar tensor for time to prevent recompiling tensorflow functions over and over - self.u, self.u_nom, rollout_trajectory, traj_cost, u_run = self.predict_and_cost(s, self.u_nom, self.rng, self.u) + self.u, self.u_nom, rollout_trajectory, traj_cost, u_run = self.predict_and_cost(state, self.u_nom, self.rng, self.u, time=tf_time) self.u = self.lib.to_numpy(self.lib.squeeze(self.u)) # print(f'mean traj cost={np.mean(traj_cost.numpy()):.2f}') # todo debug @@ -193,7 +215,7 @@ def step(self, s: np.ndarray, time=None): self.logging_values["u_logged"] = self.u if False: - self.optimal_trajectory = self.lib.to_numpy(self.predict_optimal_trajectory(s, self.u_nom)) + self.optimal_trajectory = self.lib.to_numpy(self.predict_optimal_trajectory(state, self.u_nom)) return self.u diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index 4270e56..3a2b12d 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -246,4 +246,6 @@ def get_optimizer_name(optimizer_names=None, optimizer_name=None, optimizer_idx= else: optimizer_name = optimizer_names[optimizer_idx] - return optimizer_name, optimizer_idx \ No newline at end of file + return optimizer_name, optimizer_idx + + From 1f324f4135f9733c84c55ee1db1de5560c3ce54c Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Mon, 26 Dec 2022 04:53:12 -0800 Subject: [PATCH 17/38] added dancer that reads CSV file to specify sequence of 'steps' (behaviors). --- Controllers/controller_mpc.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 2a549f0..2489b6b 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -1,7 +1,7 @@ import os from typing import Optional -from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import generate_cartpole_trajectory +from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import cartpole_trajectory_generator from GUI import gui_default_params from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper @@ -25,7 +25,20 @@ class controller_mpc(template_controller): _has_optimizer = True - + + def __init__( + self, + dt: float, + environment_name: str, + num_states: int, + num_control_inputs: int, + control_limits: "Tuple[np.ndarray, np.ndarray]", + initial_environment_attributes: "dict[str, TensorType]", + ): + super().__init__(dt, environment_name, num_states, num_control_inputs, control_limits, + initial_environment_attributes) + self.cartpole_trajectory_generator = cartpole_trajectory_generator() + def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): if optimizer_name in {None, ""}: optimizer_name = str(self.config_controller["optimizer"]) @@ -92,7 +105,9 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str cost_function=self.cost_function_wrapper.cost_function update_attributes(updated_attributes,cost_function) # update target_position and target_equilibrium in cost function to use updated_attributes.clear() - target_trajectory = generate_cartpole_trajectory(time=time, state=state,controller=self, cost_function=self.cost_function_wrapper.cost_function) + target_trajectory = \ + self.cartpole_trajectory_generator.\ + generate_cartpole_trajectory(time=time, state=state, controller=self, cost_function=self.cost_function_wrapper.cost_function) updated_attributes['target_trajectory']=target_trajectory update_attributes(updated_attributes,cost_function) # update updated_attributes.clear() From 0b543a01df6adaa424cd6d8bce66a9a8929b72cb Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sat, 28 Jan 2023 12:36:55 +0100 Subject: [PATCH 18/38] fixed import of CompileTF to point to SI_Toolkit --- Optimizers/optimizer_rpgd_tf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py index 36143b8..82051de 100644 --- a/Optimizers/optimizer_rpgd_tf.py +++ b/Optimizers/optimizer_rpgd_tf.py @@ -1,11 +1,13 @@ from typing import Tuple + +from SI_Toolkit.Functions.TF.Compile import CompileTF from SI_Toolkit.computation_library import ComputationLibrary, TensorFlowLibrary import numpy as np import tensorflow as tf from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger +from Control_Toolkit.others.globals_and_utils import get_logger from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper From c0ee6205af886ec540c0391b341f67fe1af82f9c Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Fri, 3 Feb 2023 16:31:58 +0100 Subject: [PATCH 19/38] merged from Tobi_Dance and added some loggers --- .../config_optimizers.yml | 3 -- Controllers/__init__.py | 9 +++--- Controllers/controller_mpc.py | 5 +-- Optimizers/optimizer_mppi.py | 2 -- Optimizers/optimizer_mppi_optimize_tf.py | 2 -- Optimizers/optimizer_mppi_var_tf.py | 2 -- others/globals_and_utils.py | 32 ++++++++++++------- 7 files changed, 28 insertions(+), 27 deletions(-) diff --git a/Control_Toolkit_ASF_Template/config_optimizers.yml b/Control_Toolkit_ASF_Template/config_optimizers.yml index 6786ce8..32ff5aa 100644 --- a/Control_Toolkit_ASF_Template/config_optimizers.yml +++ b/Control_Toolkit_ASF_Template/config_optimizers.yml @@ -73,7 +73,6 @@ mppi-optimize-tf: LBD: 100.0 # Cost parameter lambda NU: 1000.0 # Exploration variance SQRTRHOINV: 0.02 - GAMMA: 1.00 # Future cost discount period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated optim_steps: 10 dist-adam-resamp2-tf: @@ -175,7 +174,6 @@ mppi-var-tf: LBD_mc: 10.0 # Cost parameter lambda SQRTRHOINV_mc: 0.002 # Sampling variance NU_mc: 20.0 # Exploration variance - GAMMA: 1.00 # Future cost discount LR: 1000 # Learning rate for adaption of variance, !!! Set to 0 to retrieve a mppi version in accordance with mppi paper STDEV_min: 0.01 # Maximal variance for sampling STDEV_max: 10 # Minimal sampling variance for sampling @@ -189,7 +187,6 @@ mppi-tf: LBD: 100.0 # Cost parameter lambda NU: 1000.0 # Exploration variance SQRTRHOINV: 0.03 # Sampling variance - GAMMA: 1.00 # Future cost discount period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated random-action-tf: seed: null # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index d8753d0..4f31edf 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -1,5 +1,6 @@ import os from abc import ABC, abstractmethod +from pathlib import Path from typing import Tuple import numpy as np @@ -38,10 +39,10 @@ def __init__( initial_environment_attributes: "dict[str, TensorType]", ): # Load controller config and select the entry for the current controller - config_controllers = yaml.load( - open(os.path.join("Control_Toolkit_ASF", "config_controllers.yml")), - Loader=yaml.FullLoader - ) + f=os.path.join("Control_Toolkit_ASF", "config_controllers.yml") + fp=Path(f) + logger.debug(f'loading controller config from "{fp.absolute()}"') + config_controllers = yaml.load(open(f), Loader=yaml.FullLoader) # self.controller_name is inferred from the class name, which is the class being instantiated # Example: If you create a controller_mpc, this controller_template.__init__ will be called # but the class name will be controller_mpc, not template_controller. diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index d6cd4ae..e3bd585 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -25,15 +25,16 @@ class controller_mpc(template_controller): def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): if optimizer_name in {None, ""}: optimizer_name = str(self.config_controller["optimizer"]) - logger.info(f"Using optimizer {optimizer_name} specified in controller config file") + logger.info(f'Using optimizer "{optimizer_name}" specified in controller config file') if predictor_specification in {None, ""}: predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None) - logger.info(f"Using predictor {predictor_specification} specified in controller config file") + logger.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file') config_optimizer = config_optimizers[optimizer_name] # Create cost function cost_function_specification = self.config_controller.get("cost_function_specification", None) + logger.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}') self.cost_function = CostFunctionWrapper() self.cost_function.configure(self, cost_function_specification=cost_function_specification) diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py index 81e48c2..c55b14d 100644 --- a/Optimizers/optimizer_mppi.py +++ b/Optimizers/optimizer_mppi.py @@ -29,7 +29,6 @@ def __init__( num_rollouts: int, NU: float, SQRTRHOINV: float, - GAMMA: float, period_interpolation_inducing_points: int, optimizer_logging: bool, ): @@ -56,7 +55,6 @@ def __init__( self.LBD = LBD self.NU = self.lib.to_tensor(NU, self.lib.float32) self._SQRTRHOINV = SQRTRHOINV - self.GAMMA = GAMMA self.update_internal_state = self.update_internal_state_of_RNN # FIXME: There is one unnecessary operation in this function in case it is not an RNN. diff --git a/Optimizers/optimizer_mppi_optimize_tf.py b/Optimizers/optimizer_mppi_optimize_tf.py index f19a584..180eee0 100644 --- a/Optimizers/optimizer_mppi_optimize_tf.py +++ b/Optimizers/optimizer_mppi_optimize_tf.py @@ -30,7 +30,6 @@ def __init__( num_rollouts: int, NU: float, SQRTRHOINV: float, - GAMMA: float, gradmax_clip: float, optim_steps: int, mppi_LR: float, @@ -61,7 +60,6 @@ def __init__( # MPPI parameters self.NU = NU self._SQRTRHOINV = SQRTRHOINV - self.GAMMA = GAMMA # Optimization params self.gradmax_clip = tf.constant(gradmax_clip, dtype = tf.float32) diff --git a/Optimizers/optimizer_mppi_var_tf.py b/Optimizers/optimizer_mppi_var_tf.py index 5c4410e..3edd3c3 100644 --- a/Optimizers/optimizer_mppi_var_tf.py +++ b/Optimizers/optimizer_mppi_var_tf.py @@ -31,7 +31,6 @@ def __init__( num_rollouts: int, NU_mc: float, SQRTRHOINV_mc: float, - GAMMA: float, LR: float, max_grad_norm: float, STDEV_min: float, @@ -57,7 +56,6 @@ def __init__( self.R = R self.LBD = LBD_mc self.NU = NU_mc - self.GAMMA = GAMMA self.mppi_lr = LR self.stdev_min = STDEV_min self.stdev_max = STDEV_max diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index 13ac619..fa15c38 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -12,36 +12,44 @@ from SI_Toolkit.Functions.TF.Compile import CompileTF, CompileAdaptive from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary -LOGGING_LEVEL = logging.INFO - - +LOGGING_LEVEL = logging.DEBUG # usually INFO is good class CustomFormatter(logging.Formatter): """Logging Formatter to add colors and count warning / errors""" + # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762 grey = "\x1b[38;21m" yellow = "\x1b[33;21m" + cyan = "\x1b[1;36m" # dark green + green = "\x1b[31;21m" # dark green red = "\x1b[31;21m" bold_red = "\x1b[31;1m" + light_blue = "\x1b[1;36m" + blue = "\x1b[1;34m" reset = "\x1b[0m" - format = ( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)" - ) + # File "{file}", line {max(line, 1)}'.replace("\\", "/") + format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' FORMATS = { - logging.DEBUG: grey + format + reset, - logging.INFO: grey + format + reset, - logging.WARNING: yellow + format + reset, - logging.ERROR: red + format + reset, - logging.CRITICAL: bold_red + format + reset, + logging.DEBUG: light_blue + format + reset, + logging.INFO: cyan + format + reset, + logging.WARNING: red + format + reset, + logging.ERROR: bold_red + format + reset, + logging.CRITICAL: bold_red + format + reset } def format(self, record): log_fmt = self.FORMATS.get(record.levelno) formatter = logging.Formatter(log_fmt) - return formatter.format(record) + return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links def get_logger(name): + """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL. + + :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it. + + :returns: the logger. + """ # logging.basicConfig(stream=sys.stdout, level=logging.INFO) logger = logging.getLogger(name) logger.setLevel(LOGGING_LEVEL) From ee9ca38f92b208c11f84601f58f257c845e4a424 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Mon, 6 Feb 2023 16:19:19 +0100 Subject: [PATCH 20/38] moved get_logger to own file in SI_Toolkit Started merging from remote/origin/Tobi_Dance --- Controllers/__init__.py | 13 +++--- Controllers/controller_mpc.py | 13 +++--- Cost_Functions/__init__.py | 56 +++++++++++++++++------- Cost_Functions/cost_function_wrapper.py | 55 +++++++++++++++++------ Optimizers/optimizer_rpgd_me_tf.py | 5 ++- Optimizers/optimizer_rpgd_ml_tf.py | 5 ++- Optimizers/optimizer_rpgd_particle_tf.py | 5 ++- Optimizers/optimizer_rpgd_tf.py | 5 ++- others/environment.py | 2 +- others/globals_and_utils.py | 50 +-------------------- 10 files changed, 112 insertions(+), 97 deletions(-) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index 4f31edf..db61e4f 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -5,13 +5,14 @@ import numpy as np import yaml -from Control_Toolkit.others.globals_and_utils import get_logger from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType) +from get_logger import get_logger +log = get_logger(__name__) + config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) -logger = get_logger(__name__) """ For a controller to be found and imported by CartPoleGUI/DataGenerator it must: @@ -41,7 +42,7 @@ def __init__( # Load controller config and select the entry for the current controller f=os.path.join("Control_Toolkit_ASF", "config_controllers.yml") fp=Path(f) - logger.debug(f'loading controller config from "{fp.absolute()}"') + log.debug(f'loading controller config from "{fp.absolute()}"') config_controllers = yaml.load(open(f), Loader=yaml.FullLoader) # self.controller_name is inferred from the class name, which is the class being instantiated # Example: If you create a controller_mpc, this controller_template.__init__ will be called @@ -54,7 +55,7 @@ def __init__( if computation_library_name: # Assign computation library from config - logger.info(f"Found library {computation_library_name} for MPC controller.") + log.info(f"Found library {computation_library_name} for MPC controller.") if "tensorflow" in computation_library_name.lower(): self._computation_library = TensorFlowLibrary elif "pytorch" in computation_library_name.lower(): @@ -68,7 +69,7 @@ def __init__( if not issubclass(self.computation_library, ComputationLibrary): raise ValueError(f"{self.__class__.__name__} does not have a default computation library set. You have to define one in this controller's config.") else: - logger.info(f"No computation library specified in controller config. Using default {self.computation_library} for class.") + log.info(f"No computation library specified in controller config. Using default {self.computation_library} for class.") self.lib = self.computation_library # Shortcut to make easy using functions from computation library, this is also used by CompileAdaptive to recognize library # Environment-related parameters @@ -124,7 +125,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # Optionally: A method called after an experiment. # May be used to print some statistics about controller performance (e.g. number of iter. to converge) def controller_report(self): - logger.info("No controller report implemented for this controller. Stopping without report.") + log.info("No controller report implemented for this controller. Stopping without report.") pass # Optionally: reset the controller after an experiment diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index e3bd585..493b3ac 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -9,14 +9,17 @@ from Control_Toolkit.Optimizers import template_optimizer from SI_Toolkit.computation_library import TensorType -from Control_Toolkit.others.globals_and_utils import get_logger, import_optimizer_by_name +from Control_Toolkit.others.globals_and_utils import import_optimizer_by_name from torch import inference_mode +from get_logger import get_logger +log = get_logger(__name__) + + config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader) config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) -logger = get_logger(__name__) class controller_mpc(template_controller): @@ -25,16 +28,16 @@ class controller_mpc(template_controller): def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): if optimizer_name in {None, ""}: optimizer_name = str(self.config_controller["optimizer"]) - logger.info(f'Using optimizer "{optimizer_name}" specified in controller config file') + log.info(f'Using optimizer "{optimizer_name}" specified in controller config file') if predictor_specification in {None, ""}: predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None) - logger.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file') + log.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file') config_optimizer = config_optimizers[optimizer_name] # Create cost function cost_function_specification = self.config_controller.get("cost_function_specification", None) - logger.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}') + log.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}') self.cost_function = CostFunctionWrapper() self.cost_function.configure(self, cost_function_specification=cost_function_specification) diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py index a98da9d..67bcf0d 100644 --- a/Cost_Functions/__init__.py +++ b/Cost_Functions/__init__.py @@ -1,18 +1,34 @@ from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType from Control_Toolkit.Controllers import template_controller -from Control_Toolkit.others.globals_and_utils import get_logger - -logger = get_logger(__name__) +from get_logger import get_logger +log = get_logger(__name__) class cost_function_base: + """ Base cost function for all MPC systems + """ # Default: Class supports all libs to compute costs supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary} - - def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]") -> None: - self.controller = controller + # Define default values used for cost normalization + MIN_COST = -1.0 + MAX_COST = 0.0 + COST_RANGE = MAX_COST - MIN_COST + + def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]", config:dict=None) -> None: + """ makes a new cost function + + :param controller: the controller + :param ComputationLib: the library, e.g. python, tensorflow + :param config: the dict of configuration for this cost function. The caller can modify the config to change behavior during runtime. + + """ + + self.lib:Optional[ComputationLibrary] = None + self.controller:template_controller = controller + self.config:dict=config self.set_computation_library(ComputationLib) - + logger.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}') + def get_terminal_cost(self, terminal_states: TensorType) -> TensorType: """Compute a batch of terminal costs for a batch of terminal states. @@ -21,11 +37,13 @@ def get_terminal_cost(self, terminal_states: TensorType) -> TensorType: :return: The terminal costs. Has shape [batch_size] :rtype: TensorType """ - raise NotImplementedError("To be implemented in subclass.") + # Default behavior: Return a zero cost scalar per sample of batch + return self.lib.zeros_like(terminal_states)[:,:1] # Shape: (batch_size x 1) - def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType) -> TensorType: + def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType, time:float) -> TensorType: """Compute all stage costs of a batch of states and contol inputs. One "stage" is one step in the MPC horizon. + Stage costs are shifted so that they are <= 0. Reason: reward = -cost is then >= 0 and therefore easier to interpret. :param states: Has shape [batch_size, mpc_horizon, num_states] :type states: TensorType @@ -36,11 +54,15 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: :return: The stage costs. Has shape [batch_size, mpc_horizon] :rtype: TensorType """ + stage_costs = self._get_stage_cost(states, inputs, previous_input) # Select all but last state of the horizon + return stage_costs - self.MAX_COST + # Could also normalize to [-1, 0]: + # (stage_costs - self.MIN_COST) / self.COST_RANGE - 1 + + def _get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType) -> TensorType: raise NotImplementedError("To be implemented in subclass.") - def get_trajectory_cost( - self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None - ) -> TensorType: + def get_trajectory_cost(self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, time:float=None) -> TensorType: """Helper function which computes a batch of the summed cost of a trajectory. Can be overwritten in a subclass, e.g. if weighted sum is required. The batch dimension is used to compute for multiple rollouts in parallel. @@ -51,12 +73,15 @@ def get_trajectory_cost( :type inputs: TensorType :param previous_input: The most recent actually applied control, defaults to None :type previous_input: TensorType, optional + :param time: the time in seconds + :type time: float + :return: The summed cost of the trajectory. Has shape [batch_size]. :rtype: TensorType """ - stage_cost = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input) # Select all but last state of the horizon - total_cost = self.lib.sum(stage_cost, 1) # Sum across the MPC horizon dimension - total_cost = total_cost + self.get_terminal_cost(state_horizon[:, -1, :]) + stage_costs = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input) # Select all but last state of the horizon + terminal_cost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1)) + total_cost = self.lib.mean(self.lib.concat([stage_costs, terminal_cost], 1), 1) # Average across the MPC horizon dimension return total_cost def set_computation_library(self, ComputationLib: "type[ComputationLibrary]"): @@ -64,3 +89,4 @@ def set_computation_library(self, ComputationLib: "type[ComputationLibrary]"): if not ComputationLib in self.supported_computation_libraries: raise ValueError(f"The cost function {self.__class__.__name__} does not support {ComputationLib.__name__}") self.lib = ComputationLib + diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index d0a7ae5..e192019 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -7,26 +7,50 @@ from Control_Toolkit.Controllers import template_controller from Control_Toolkit.Cost_Functions import cost_function_base +from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes +from get_logger import get_logger +log=get_logger(__name__) -# cost_function config -cost_function_config = yaml.load( - open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"), - Loader=yaml.FullLoader, -) +class CostFunctionWrapper: + """ + Wrapper class for cost functions. + It allows the creation of an instance with deferred specification which specific class containing cost functions is to be used. + Usage: + 1) Instantiate this wrapper in controller. + 2) Call `configure` with a reference to the controller instance and the name of the cost function, once this is known to the controller. -class CostFunctionWrapper: + You can do both steps + """ def __init__(self): self.cost_function = None - self.cost_function_name_default: str = cost_function_config[ - "cost_function_name_default" - ] + # cost_function config + (self.cost_function_config, _) = load_or_reload_config_if_modified( + os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"),search_path=['CartPoleSimulation']) + + self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default + self.lib = None # filled by configure(), needed to update TF variables + + log.info(f'default cost function name is {self.cost_function_name_default}') def configure( self, controller: template_controller, - cost_function_specification=None, + cost_function_specification: str=None, ): + """ + Configures the cost function. TODO This lazy constructor is needed why? + + :param controller: the controller that uses this cost function + :param cost_function_specification: the string name of the cost function class, to construct the class and find the config values in config_cost_functions.yml + """ + """Configure this wrapper as part of a controller. + + :param controller: Reference to the controller instance this wrapper is used by + :type controller: template_controller + :param cost_function_specification: Name of cost function class within Control_Toolkit_ASF.Cost_Functions.<> package. If not specified, the 'default' one is used. + :type cost_function_specification: str, optional + """ environment_name = controller.environment_name computation_library = controller.computation_library # Use library dictated by controller @@ -42,6 +66,12 @@ def configure( cost_function_module, self.cost_function_name )(controller, computation_library) + self.lib=self.cost_function.lib + + config=self.cost_function_config['CartPole'][self.cost_function_name] # todo hardcoded 'CartPole' has to go, not sure how to determine it, maybe from module folder? + update_attributes(config, self.cost_function) + log.info(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}') + def update_cost_function_name_from_specification( self, environment_name: str, cost_function_specification: str = None ): @@ -63,10 +93,9 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: return self.cost_function.get_stage_cost(states, inputs, previous_input) def get_trajectory_cost( - self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None - ): + self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None, time:float=None): """Refer to :func:`the base cost function `""" - return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input) + return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input, time=time) def copy(self): """ diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py index 7b38d08..df834a4 100644 --- a/Optimizers/optimizer_rpgd_me_tf.py +++ b/Optimizers/optimizer_rpgd_me_tf.py @@ -6,11 +6,12 @@ import tensorflow_probability as tfp from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger +from Control_Toolkit.others.globals_and_utils import CompileTF from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -logger = get_logger(__name__) +from get_logger import get_logger +log = get_logger(__name__) class optimizer_rpgd_me_tf(template_optimizer): diff --git a/Optimizers/optimizer_rpgd_ml_tf.py b/Optimizers/optimizer_rpgd_ml_tf.py index 2611ed0..69f031f 100644 --- a/Optimizers/optimizer_rpgd_ml_tf.py +++ b/Optimizers/optimizer_rpgd_ml_tf.py @@ -6,11 +6,12 @@ import tensorflow_probability as tfp from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger +from Control_Toolkit.others.globals_and_utils import CompileTF from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -logger = get_logger(__name__) +from get_logger import get_logger +log = get_logger(__name__) class optimizer_rpgd_ml_tf(template_optimizer): diff --git a/Optimizers/optimizer_rpgd_particle_tf.py b/Optimizers/optimizer_rpgd_particle_tf.py index b1c7531..e4a2b65 100644 --- a/Optimizers/optimizer_rpgd_particle_tf.py +++ b/Optimizers/optimizer_rpgd_particle_tf.py @@ -5,11 +5,12 @@ import tensorflow as tf from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger +from Control_Toolkit.others.globals_and_utils import CompileTF from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -logger = get_logger(__name__) +from get_logger import get_logger +log = get_logger(__name__) class optimizer_rpgd_particle_tf(template_optimizer): diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py index 3357857..fdc4578 100644 --- a/Optimizers/optimizer_rpgd_tf.py +++ b/Optimizers/optimizer_rpgd_tf.py @@ -5,11 +5,12 @@ import tensorflow as tf from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger +from Control_Toolkit.others.globals_and_utils import CompileTF from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -logger = get_logger(__name__) +from get_logger import get_logger +log = get_logger(__name__) class optimizer_rpgd_tf(template_optimizer): diff --git a/others/environment.py b/others/environment.py index 899ff1f..515c927 100644 --- a/others/environment.py +++ b/others/environment.py @@ -5,9 +5,9 @@ import torch from SI_Toolkit.computation_library import ComputationLibrary, TensorType -from Control_Toolkit.others.globals_and_utils import get_logger from gym.spaces import Box +from get_logger import get_logger log = get_logger(__name__) diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index fa15c38..f7c6f24 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -11,55 +11,7 @@ from numpy.random import SFC64, Generator from SI_Toolkit.Functions.TF.Compile import CompileTF, CompileAdaptive from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary - -LOGGING_LEVEL = logging.DEBUG # usually INFO is good -class CustomFormatter(logging.Formatter): - """Logging Formatter to add colors and count warning / errors""" - # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762 - - grey = "\x1b[38;21m" - yellow = "\x1b[33;21m" - cyan = "\x1b[1;36m" # dark green - green = "\x1b[31;21m" # dark green - red = "\x1b[31;21m" - bold_red = "\x1b[31;1m" - light_blue = "\x1b[1;36m" - blue = "\x1b[1;34m" - reset = "\x1b[0m" - # File "{file}", line {max(line, 1)}'.replace("\\", "/") - format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' - - FORMATS = { - logging.DEBUG: light_blue + format + reset, - logging.INFO: cyan + format + reset, - logging.WARNING: red + format + reset, - logging.ERROR: bold_red + format + reset, - logging.CRITICAL: bold_red + format + reset - } - - def format(self, record): - log_fmt = self.FORMATS.get(record.levelno) - formatter = logging.Formatter(log_fmt) - return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links - - -def get_logger(name): - """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL. - - :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it. - - :returns: the logger. - """ - # logging.basicConfig(stream=sys.stdout, level=logging.INFO) - logger = logging.getLogger(name) - logger.setLevel(LOGGING_LEVEL) - # create console handler - ch = logging.StreamHandler() - ch.setFormatter(CustomFormatter()) - logger.addHandler(ch) - return logger - - +from get_logger import get_logger log = get_logger(__name__) From 69b886cda37363f8a77db3d031986bce8e163d91 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Tue, 7 Feb 2023 07:27:42 +0100 Subject: [PATCH 21/38] added search path for running from physical-cartpole. fixed logger->log renaming --- Controllers/controller_mpc.py | 2 +- Cost_Functions/__init__.py | 2 +- Cost_Functions/cost_function_wrapper.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index e09a7b7..7c1a94e 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -127,7 +127,7 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str # config_cost_functions.yml -> self.cost_function_wrapper.cost_function # config_optimizers.yml -> self.optimizer AND self.predictor_wrapper.predictor for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')): - (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config)) # all the config files are currently assumed to be in Control_Toolkit_ASF folder + (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config), search_path=['CartPoleSimulation']) # all the config files are currently assumed to be in Control_Toolkit_ASF folder # process changes to configs using new returned change list if not changes is None: for k,v in changes.items(): diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py index 78a7546..8908115 100644 --- a/Cost_Functions/__init__.py +++ b/Cost_Functions/__init__.py @@ -29,7 +29,7 @@ def __init__(self, controller: template_controller, ComputationLib: "type[Comput self.controller:template_controller = controller self.config:dict=config self.set_computation_library(ComputationLib) - logger.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}') + log.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}') def get_terminal_cost(self, terminal_states: TensorType) -> TensorType: """Compute a batch of terminal costs for a batch of terminal states. diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index cff0ece..e1c0fd4 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -26,7 +26,7 @@ def __init__(self): self.cost_function = None # cost_function config (self.cost_function_config, _) = load_or_reload_config_if_modified( - os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")) + os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), search_path=['CartPoleSimulation']) # if run from physical-cartpole, need to find it relative to CartPoleSimulation submodule self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default self.lib = None # filled by configure(), needed to update TF variables From 4ce131e34114312c996ecaddf93338e3dd565aa7 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Tue, 7 Feb 2023 08:51:42 +0100 Subject: [PATCH 22/38] update path to config_cost_functions.yml --- Controllers/__init__.py | 2 +- Controllers/controller_mpc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index 138ebbb..4b7f854 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -14,7 +14,7 @@ from get_logger import get_logger log = get_logger(__name__) -config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) +config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) """ For a controller to be found and imported by CartPoleGUI/DataGenerator it must: diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 7c1a94e..cecd016 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -23,7 +23,7 @@ config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader) -config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) +config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) class controller_mpc(template_controller): From 32c0a50b82f6a30d4195fe0641175c0f09b08563 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Wed, 8 Feb 2023 12:40:58 +0100 Subject: [PATCH 23/38] move get_logger.py to Control_Toolkit so that it can be used by physical-cartpole repo. Fix some cartpole parameter import. Fix rename of config_cost_functions.yml. Add timer.py and yes_or_no.py utility classes. --- Controllers/__init__.py | 7 +- Controllers/controller_mpc.py | 5 +- Cost_Functions/__init__.py | 2 +- Cost_Functions/cost_function_wrapper.py | 5 +- Optimizers/optimizer_rpgd_me_tf.py | 2 +- Optimizers/optimizer_rpgd_ml_tf.py | 2 +- Optimizers/optimizer_rpgd_particle_tf.py | 2 +- Optimizers/optimizer_rpgd_tf.py | 4 +- others/environment.py | 4 +- others/get_logger.py | 50 +++++++++ others/globals_and_utils.py | 3 +- others/timer.py | 129 +++++++++++++++++++++++ others/yes_or_no.py | 58 ++++++++++ 13 files changed, 250 insertions(+), 23 deletions(-) create mode 100644 others/get_logger.py create mode 100644 others/timer.py create mode 100644 others/yes_or_no.py diff --git a/Controllers/__init__.py b/Controllers/__init__.py index 138ebbb..ccb7340 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -1,7 +1,6 @@ import os from abc import ABC, abstractmethod from pathlib import Path -from typing import Tuple from typing import Tuple, Union import numpy as np @@ -9,12 +8,12 @@ from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType) -from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes +from others.globals_and_utils import load_or_reload_config_if_modified -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) -config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) +config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) """ For a controller to be found and imported by CartPoleGUI/DataGenerator it must: diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 7c1a94e..4ebd136 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -2,7 +2,6 @@ from typing import Optional from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import cartpole_trajectory_generator -from GUI import gui_default_params from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper import numpy as np @@ -18,12 +17,12 @@ from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader) -config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader) +config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader) class controller_mpc(template_controller): diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py index 8908115..4637629 100644 --- a/Cost_Functions/__init__.py +++ b/Cost_Functions/__init__.py @@ -3,7 +3,7 @@ from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType from Control_Toolkit.Controllers import template_controller -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) class cost_function_base: diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index e1c0fd4..6f4e746 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -1,14 +1,11 @@ from importlib import import_module import os from SI_Toolkit.computation_library import TensorType -import yaml -from copy import deepcopy as dcp -from types import MappingProxyType from Control_Toolkit.Controllers import template_controller from Control_Toolkit.Cost_Functions import cost_function_base from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log=get_logger(__name__) class CostFunctionWrapper: diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py index 68c80ae..e352781 100644 --- a/Optimizers/optimizer_rpgd_me_tf.py +++ b/Optimizers/optimizer_rpgd_me_tf.py @@ -10,7 +10,7 @@ from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) diff --git a/Optimizers/optimizer_rpgd_ml_tf.py b/Optimizers/optimizer_rpgd_ml_tf.py index f82af1c..c966f99 100644 --- a/Optimizers/optimizer_rpgd_ml_tf.py +++ b/Optimizers/optimizer_rpgd_ml_tf.py @@ -10,7 +10,7 @@ from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) diff --git a/Optimizers/optimizer_rpgd_particle_tf.py b/Optimizers/optimizer_rpgd_particle_tf.py index d560c72..c7f388a 100644 --- a/Optimizers/optimizer_rpgd_particle_tf.py +++ b/Optimizers/optimizer_rpgd_particle_tf.py @@ -9,7 +9,7 @@ from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py index 3b5a0e7..8e04d2f 100644 --- a/Optimizers/optimizer_rpgd_tf.py +++ b/Optimizers/optimizer_rpgd_tf.py @@ -1,18 +1,16 @@ from typing import Tuple -from SI_Toolkit.Functions.TF.Compile import CompileTF from SI_Toolkit.computation_library import ComputationLibrary, TensorFlowLibrary import numpy as np import tensorflow as tf from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import get_logger from Control_Toolkit.others.globals_and_utils import CompileTF from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) diff --git a/others/environment.py b/others/environment.py index 3196e05..9dbef5c 100644 --- a/others/environment.py +++ b/others/environment.py @@ -1,13 +1,11 @@ from typing import Any, Optional, Tuple, Union import numpy as np -import tensorflow as tf -import torch from SI_Toolkit.computation_library import ComputationLibrary, TensorType from gymnasium.spaces import Box -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) diff --git a/others/get_logger.py b/others/get_logger.py new file mode 100644 index 0000000..ce468fe --- /dev/null +++ b/others/get_logger.py @@ -0,0 +1,50 @@ +import logging +# general logger for all control/si_toolkit users. Produces nice output format with live hyperlinks for pycharm users +# to use it, just call log=get_logger(__name__) at the top of your python file + +LOGGING_LEVEL = logging.DEBUG # usually INFO is good +class CustomFormatter(logging.Formatter): + """Logging Formatter to add colors and count warning / errors""" + # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762 + + grey = "\x1b[38;21m" + yellow = "\x1b[33;21m" + cyan = "\x1b[1;36m" # dark green + green = "\x1b[31;21m" # dark green + red = "\x1b[31;21m" + bold_red = "\x1b[31;1m" + light_blue = "\x1b[1;36m" + blue = "\x1b[1;34m" + reset = "\x1b[0m" + # File "{file}", line {max(line, 1)}'.replace("\\", "/") + format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' + + FORMATS = { + logging.DEBUG: light_blue + format + reset, + logging.INFO: cyan + format + reset, + logging.WARNING: red + format + reset, + logging.ERROR: bold_red + format + reset, + logging.CRITICAL: bold_red + format + reset + } + + def format(self, record): + log_fmt = self.FORMATS.get(record.levelno) + formatter = logging.Formatter(log_fmt) + return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links + + +def get_logger(name): + """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL. + + :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it. + + :returns: the logger. + """ + # logging.basicConfig(stream=sys.stdout, level=logging.INFO) + logger = logging.getLogger(name) + logger.setLevel(LOGGING_LEVEL) + # create console handler + ch = logging.StreamHandler() + ch.setFormatter(CustomFormatter()) + logger.addHandler(ch) + return logger \ No newline at end of file diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index 678e0f2..87fe0c9 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -1,5 +1,4 @@ import glob -import logging import os from datetime import datetime from importlib import import_module @@ -10,7 +9,7 @@ import torch from numpy.random import SFC64, Generator from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary -from get_logger import get_logger +from Control_Toolkit.others.get_logger import get_logger log = get_logger(__name__) diff --git a/others/timer.py b/others/timer.py new file mode 100644 index 0000000..b83b6a9 --- /dev/null +++ b/others/timer.py @@ -0,0 +1,129 @@ +import atexit + +# a useful timer class to measure execution time statistics + +from engineering_notation import EngNumber as eng # only from pip +from matplotlib import pyplot as plt +import numpy as np +import time + +from get_logger import get_logger +log=get_logger(__name__) + +timers = {} +times = {} +class timer: + def __init__(self, timer_name='', delay=None, show_hist=False, numpy_file=None): + """ Make a timer() in a _with_ statement for a block of code. + The timer is started when the block is entered and stopped when exited. + + The accumulated timer statistics are automatically printed on exit with atexit (see end of this file). + + The timer _must_ be used in a with statement, e.g. + + while True: + timestr = time.strftime("%Y%m%d-%H%M") + with timer('overall consumer loop', numpy_file=f'{DATA_FOLDER}/consumer-frame-rate-{timestr}.npy', show_hist=True): + with timer('recieve UDP'): + # do something + + timers can be nested + + + :param timer_name: the str by which this timer is repeatedly called and which it is named when summary is printed on exit + :param delay: set this to a value to simply accumulate this externally determined interval + :param show_hist: whether to plot a histogram with pyplot + :param numpy_file: optional numpy file path + """ + self.timer_name = timer_name + self.show_hist = show_hist + self.numpy_file = numpy_file + self.delay=delay + + if self.timer_name not in timers.keys(): + timers[self.timer_name] = self + if self.timer_name not in times.keys(): + times[self.timer_name]=[] + + def __enter__(self): + if self.delay is None: + self.start = time.time() + return self + + def __exit__(self, *args): + if self.delay is None: + self.end = time.time() + self.interval = self.end - self.start # measured in seconds + else: + self.interval=self.delay + times[self.timer_name].append(self.interval) + + def print_timing_info(self, logger=None): + """ Prints the timing information accumulated for this timer + + :param logger: write to the supplied logger, otherwise use the built-in logger + """ + if len(times)==0: + log.error(f'Timer {self.timer_name} has no statistics; was it used without a "with" statement?') + return + a = np.array(times[self.timer_name]) + timing_mean = np.mean(a) # todo use built in print method for timer + timing_std = np.std(a) + timing_median = np.median(a) + timing_min = np.min(a) + timing_max = np.max(a) + s='{} n={}: {}s +/- {}s (median {}s, min {}s max {}s)'.format(self.timer_name, len(a), + eng(timing_mean), eng(timing_std), + eng(timing_median), eng(timing_min), + eng(timing_max)) + + if logger is not None: + logger.info(s) + else: + log.info(s) + +def print_timing_info(): + for k,v in times.items(): # k is the name, v is the list of times + a = np.array(v) + timing_mean = np.mean(a) + timing_std = np.std(a) + timing_median = np.median(a) + timing_min = np.min(a) + timing_max = np.max(a) + log.info('== Timing statistics from all Timer ==\n{} n={}: {}s +/- {}s (median {}s, min {}s max {}s)'.format(k, len(a), + eng(timing_mean), eng(timing_std), + eng(timing_median), eng(timing_min), + eng(timing_max))) + if timers[k].numpy_file is not None: + try: + log.info(f'saving timing data for {k} in numpy file {timers[k].numpy_file}') + log.info('there are {} times'.format(len(a))) + np.save(timers[k].numpy_file, a) + except Exception as e: + log.error(f'could not save numpy file {timers[k].numpy_file}; caught {e}') + + if timers[k].show_hist: + + def plot_loghist(x, bins): + hist, bins = np.histogram(x, bins=bins) # histogram x linearly + if len(bins)<2 or bins[0]<=0: + log.error(f'cannot plot histogram since bins={bins}') + return + logbins = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), len(bins)) # use resulting bin ends to get log bins + plt.hist(x, bins=logbins) # now again histogram x, but with the log-spaced bins, and plot this histogram + plt.xscale('log') + + dt = np.clip(a,1e-6, None) + # logbins = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), len(bins)) + try: + plot_loghist(dt,bins=100) + plt.xlabel('interval[ms]') + plt.ylabel('frequency') + plt.title(k) + plt.show() + except Exception as e: + log.error(f'could not plot histogram: got {e}') + + +# this will print all the timer values upon termination of any program that imported this file +atexit.register(print_timing_info) diff --git a/others/yes_or_no.py b/others/yes_or_no.py new file mode 100644 index 0000000..14a53ca --- /dev/null +++ b/others/yes_or_no.py @@ -0,0 +1,58 @@ +import signal +def alarm_handler(signum, frame): + raise TimeoutError +def input_with_timeout(prompt, timeout=30): + """ get input with timeout + + :param prompt: the prompt to print + :param timeout: timeout in seconds, or None to disable + + :returns: the input + :raises: TimeoutError if times out + """ + # set signal handler + if timeout is not None: + signal.signal(signal.SIGALRM, alarm_handler) + signal.alarm(timeout) # produce SIGALRM in `timeout` seconds + try: + time.sleep(.5) # get input to be printed after logging + return input(prompt) + except TimeoutError as to: + raise to + finally: + if timeout is not None: + signal.alarm(0) # cancel alarm + +def yes_or_no(question, default='y', timeout=None): + """ Get y/n answer with default choice and optional timeout + + :param question: prompt + :param default: the default choice, i.e. 'y' or 'n' + :param timeout: the timeout in seconds, default is None + + :returns: True or False + """ + if default is not None and (default!='y' and default!='n'): + log.error(f'bad option for default: {default}') + quit(1) + y='Y' if default=='y' else 'y' + n='N' if default=='n' else 'n' + while "the answer is invalid": + try: + to_str='' if timeout is None or os.name=='nt' else f'(Timeout {default} in {timeout}s)' + if os.name=='nt': + log.warning('cannot use timeout signal on windows') + time.sleep(.1) # make the warning come out first + reply=str(input(f'{question} {to_str} ({y}/{n}): ')).lower().strip() + else: + reply = str(input_with_timeout(f'{question} {to_str} ({y}/{n}): ',timeout=timeout)).lower().strip() + except TimeoutError: + log.warning(f'timeout expired, returning default={default} answer') + reply='' + if len(reply)==0 or reply=='': + return True if default=='y' else False + elif reply[0].lower() == 'y': + return True + if reply[0].lower() == 'n': + return False + From f0ec1d115892f57e875b0a8f8d63bcc5728f31b3 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Fri, 10 Feb 2023 06:31:59 +0100 Subject: [PATCH 24/38] cartpole_dancer.py starts to work. Music starts and stops, some steps seem to work. mppi parameters copied from development. cost weights adjusted. controller update interval inccreased to 25ms from 20ms to match simulator and actual achieved rate with dancer mppi. mppi rollouts reduced to 700 to speed up control. PhysicalCartPoleDriver.py has class self variable for reference from other classes to the instance. Compile.py raises exception when either option is undefined. --- Controllers/__init__.py | 2 +- others/get_logger.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index ccb7340..b660eb1 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -124,7 +124,7 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str Execute one timestep of control. :param state: the state array, dimensions are TODO add dimension to help users - :param time: the time in seconds + :param time: the time now in seconds :param updated_attributes: dict of updated attributes :returns: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct? diff --git a/others/get_logger.py b/others/get_logger.py index ce468fe..8973ea3 100644 --- a/others/get_logger.py +++ b/others/get_logger.py @@ -9,7 +9,7 @@ class CustomFormatter(logging.Formatter): grey = "\x1b[38;21m" yellow = "\x1b[33;21m" - cyan = "\x1b[1;36m" # dark green + cyan = "\x1b[1;36m" # green = "\x1b[31;21m" # dark green red = "\x1b[31;21m" bold_red = "\x1b[31;1m" @@ -20,7 +20,7 @@ class CustomFormatter(logging.Formatter): format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)' FORMATS = { - logging.DEBUG: light_blue + format + reset, + logging.DEBUG: grey + format + reset, logging.INFO: cyan + format + reset, logging.WARNING: red + format + reset, logging.ERROR: bold_red + format + reset, From 5d13e63e48c47fefc45b63cf4136c25776632e54 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Sat, 11 Feb 2023 14:01:59 +0100 Subject: [PATCH 25/38] improved control slightly by adding back more cost terms to provide some damping. But balancing still not working very well. Cannot follow cart position well. gets into endless repeats of trying to swing up. Fixed increment and dec of target position. Reduced control period back to 20ms from 25ms. added a controller reset to controller_mpc.py that stops the dance song. added a return to center function in PhysicalCartPoleDriver.py with key "C" --- Controllers/controller_mpc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 4ebd136..1eb61ca 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -143,4 +143,6 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str def controller_reset(self): self.optimizer.optimizer_reset() + self.cartpole_trajectory_generator.reset() + \ No newline at end of file From e2162fdae3d341536fd6c144a1ff2598a2982773 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Sun, 12 Feb 2023 11:37:00 +0100 Subject: [PATCH 26/38] added some docstrings, but they are not very informative setting up to measure model mismatch in controller_mpc.py, not implemented yet. --- Controllers/__init__.py | 2 ++ Controllers/controller_mpc.py | 8 ++++++-- README.md | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index b660eb1..245a7e0 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -196,6 +196,8 @@ def get_outputs(self) -> "dict[str, np.ndarray]": } def update_logs(self, logging_values: "dict[str, TensorType]") -> None: + """ Appends controller logging information to memory in self.logs if self.controller_logging exists, according to self.save_vars + """ if self.controller_logging: for name, var in zip( self.save_vars, [logging_values.get(var_name, None) for var_name in self.save_vars] diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 1eb61ca..9ef1ffb 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -92,7 +92,7 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: self.step = self.step - def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, TensorType]" = {})->float: """ Compute one step of control. :param state: the current state as 1d state vector @@ -136,8 +136,12 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str update_attributes(updated_attributes,o) log.debug(f'updated {objs} with scalar updated_attributes {updated_attributes}') - + # obtain the next control action from the optimizer u = self.optimizer.step(state, time) + + # for analysis of model mismatch, obtain the prediction of next state with this control action + + self.update_logs(self.optimizer.logging_values) return u diff --git a/README.md b/README.md index 6d712ac..d7d2abe 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,8 @@ The toolkit provides a uniform interface to log values in the controller. These The `controller_mpc.step` method takes the `optimizer.logging_values` dictionary and copies it to its `controller_mpc.logs` dictionary in each step. The `template_controller` has two related attributes: `controller_logging` and `save_vars`. If the former is `true`, then the controller populates the fields of `save_vars` in the `template_controller.logs` dictionary with values if your controller calls `update_logs` within the `step` method. +The resulting dict (stored in RAM during runtime) is written out to disk on exit. + ## Examples of Application-Specific Controllers From 518a00f3cfb51cc3981619d41e0655f769e0c7c6 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Mon, 13 Feb 2023 17:52:08 +0100 Subject: [PATCH 27/38] added prediction and target trajectory to logging to allow model mismatch error measurement. updated jupyter script to use filedialog to select csv. reverted the mysterious lowpass filter in the cart target position so that the position is changed instantaneously. added partially working M key command to cartpole_trajectory_generator.py to allow switching dance step from keyboard. added keyboard_input() and print_keyboard_help() to template_controller and implemented it in controller_mpc.py and cartpole_trajectory_generator.py. added easygui to requirements.txt. added warning to __init__.py in SI_Toolkit to warn about slow tensorflow eager mode. moved is_physical_cartpole_runnning_and_control_enabled to CartPole.__init__. --- Controllers/__init__.py | 10 ++++++++++ Controllers/controller_mpc.py | 14 ++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index 245a7e0..f5c0e2f 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -206,3 +206,13 @@ def update_logs(self, logging_values: "dict[str, TensorType]") -> None: self.logs[name].append( var.numpy().copy() if hasattr(var, "numpy") else var.copy() ) + + def keyboard_input(self, c): + """ process keyboard input character. Default implementation does nothing. + :param c: character + """ + pass + + def print_keyboard_help(self): + """ Print help for keybaord input""" + pass diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 9ef1ffb..7128051 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -40,7 +40,7 @@ def __init__( ): super().__init__(dt, environment_name, num_states, num_control_inputs, control_limits, initial_environment_attributes) - self.cartpole_trajectory_generator = cartpole_trajectory_generator() + self.cartpole_trajectory_generator:cartpole_trajectory_generator = cartpole_trajectory_generator() def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): if optimizer_name in {None, ""}: @@ -149,4 +149,14 @@ def controller_reset(self): self.optimizer.optimizer_reset() self.cartpole_trajectory_generator.reset() - \ No newline at end of file + def keyboard_input(self, c): + try: + self.cartpole_trajectory_generator.keyboard_input(c) + except AttributeError: + pass + + def print_keyboard_help(self): + try: + self.cartpole_trajectory_generator.print_keyboard_help() + except AttributeError: + pass From 9190c2aa5b2ae1373073f38b45f6625852bda7cc Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Tue, 14 Feb 2023 17:44:20 +0100 Subject: [PATCH 28/38] add computation of pole natural frequency to p_globals.py. remove terminal cost in cartpole_dancer_cost.py to move track edge barrier cost inside stage costs function where it is easier to manage. comment out terminal cost computation in Control_Toolkit - note this will break cost functions that use it. --- Cost_Functions/__init__.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py index 4637629..2a26ea6 100644 --- a/Cost_Functions/__init__.py +++ b/Cost_Functions/__init__.py @@ -81,9 +81,14 @@ def get_trajectory_cost(self, state_horizon: TensorType, inputs: TensorType, pre :return: The summed cost of the trajectory. Has shape [batch_size]. :rtype: TensorType """ - stage_costs = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input) # Select all but last state of the horizon - terminal_cost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1)) - total_cost = self.lib.mean(self.lib.concat([stage_costs, terminal_cost], 1), 1) # Average across the MPC horizon dimension + # Select all but last state of the horizon + # stages costs has dimension [num_rollouts, horizon, states] + stage_costs = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input) + # select last states of horizon for all rollouts + # compute the cost of these terminal states, result is ??? + # terminal_cost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1)) + # total_cost = self.lost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1)) + total_cost = self.lib.mean(stage_costs, 1) # Average across the MPC horizon dimension to leave a 1-d vector of num_rollouts dimension return total_cost def set_computation_library(self, ComputationLib: "type[ComputationLibrary]"): From 6b8a67673b4ff5c3a95677e3dd51fa7a116bd0a7 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Thu, 16 Feb 2023 14:43:47 +0100 Subject: [PATCH 29/38] added 'cartwheel' step to cartpole_trajectory_generator.py. commented out some noisy log.debug. made horizon same as physical cartpole --- Controllers/controller_mpc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 7128051..8100efa 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -45,8 +45,7 @@ def __init__( def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): if optimizer_name in {None, ""}: optimizer_name = str(self.config_controller["optimizer"]) - log.info(f'Using optimizer "{optimizer_name}" specified in controller config file') - log.info(f"Using optimizer {optimizer_name} specified in controller config file") + log.info(f'Using optimizer \'{optimizer_name}\' specified in controller config file') if predictor_specification in {None, ""}: predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None) log.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file') From 039d385b71602071d84af3d8436b5325ad71dad7 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Sun, 19 Feb 2023 11:05:17 +0100 Subject: [PATCH 30/38] added warning for cart calibration. --- others/yes_or_no.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/others/yes_or_no.py b/others/yes_or_no.py index 14a53ca..bf2aa32 100644 --- a/others/yes_or_no.py +++ b/others/yes_or_no.py @@ -1,4 +1,8 @@ +# useful utilies to ask question at console terminal with default answer and timeout + import signal +import time + def alarm_handler(signum, frame): raise TimeoutError def input_with_timeout(prompt, timeout=30): From ed2c1180807ebe6b8abafa47ae030566d42a2488 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sun, 19 Feb 2023 20:50:18 +0100 Subject: [PATCH 31/38] fixed some logic and reduced some loggers to debug level updated satisfaction CSV to use markers from audition and added more cartsheels --- Cost_Functions/cost_function_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index 6f4e746..134842f 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -75,7 +75,7 @@ def configure( config=self.cost_function_config['CartPole'][self.cost_function_name] # todo hardcoded 'CartPole' has to go, not sure how to determine it, maybe from module folder? update_attributes(config, self.cost_function) - log.info(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}') + log.debug(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}') def update_cost_function_name_from_specification( self, environment_name: str, cost_function_specification: str = None From a16b45420ca15cf829d7ba647c0c9fa588d34cff Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Mon, 20 Feb 2023 10:59:19 +0100 Subject: [PATCH 32/38] fixed shimmy math. fixed import of CompileTF in optimizer_rpgd_tf.py. other small code changes and comments --- Optimizers/optimizer_rpgd_tf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py index 8e04d2f..1a2429a 100644 --- a/Optimizers/optimizer_rpgd_tf.py +++ b/Optimizers/optimizer_rpgd_tf.py @@ -6,7 +6,7 @@ import tensorflow as tf from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper from Control_Toolkit.Optimizers import template_optimizer -from Control_Toolkit.others.globals_and_utils import CompileTF +from SI_Toolkit.Functions.TF.Compile import CompileTF from Control_Toolkit.others.Interpolator import Interpolator from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper @@ -15,6 +15,7 @@ class optimizer_rpgd_tf(template_optimizer): + """ The tensorflow (tf) version of the Resampling Parallel Gradient Descent (RPGD) optimizer reported in ICRA paper """ supported_computation_libraries = {TensorFlowLibrary} def __init__( From ea33a0024f7fe20b15aca31129ac4312d7bbe725 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Tue, 21 Feb 2023 14:04:39 +0100 Subject: [PATCH 33/38] improved console reporting of current objective and logging output so that it is shown in live console display instead of scrolling away. fixed cartonly trajectory so that cartonly works with sawtooth much better. changed PhysicalCartPoleDriver.py logger so that the handler logs with \r like other status indicators. added another exception handler in globals_and_utils.py to handle AttributeErrors in config files. Optimized shimmy amplitudes in satisfaction --- Controllers/controller_mpc.py | 2 +- others/get_logger.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 8100efa..1d8d65f 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -40,7 +40,7 @@ def __init__( ): super().__init__(dt, environment_name, num_states, num_control_inputs, control_limits, initial_environment_attributes) - self.cartpole_trajectory_generator:cartpole_trajectory_generator = cartpole_trajectory_generator() + self.cartpole_trajectory_generator:cartpole_trajectory_generator = cartpole_trajectory_generator() # TODO awkward to add this specific cost function generator here to generic controller_mpc def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None): if optimizer_name in {None, ""}: diff --git a/others/get_logger.py b/others/get_logger.py index 8973ea3..5cd5049 100644 --- a/others/get_logger.py +++ b/others/get_logger.py @@ -33,10 +33,11 @@ def format(self, record): return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links -def get_logger(name): +def get_logger(name=None): """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL. :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it. + If no name is supplied, it defaults to root logger which may turn on a LOT of undesired logging output. :returns: the logger. """ From 6e8ef5157f29206a265522db14f26da377df6653 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Tue, 21 Feb 2023 15:33:40 +0100 Subject: [PATCH 34/38] improved logging output to make debug logger light gray, include filename in console logger for logging during control --- others/get_logger.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/others/get_logger.py b/others/get_logger.py index 5cd5049..1d9d916 100644 --- a/others/get_logger.py +++ b/others/get_logger.py @@ -7,9 +7,11 @@ class CustomFormatter(logging.Formatter): """Logging Formatter to add colors and count warning / errors""" # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762 - grey = "\x1b[38;21m" + # \x1b[ (ESC[) is the CSI introductory sequence for ANSI https://en.wikipedia.org/wiki/ANSI_escape_code + # The control sequence CSI n m, named Select Graphic Rendition (SGR), sets display attributes. + grey = "\x1b[2;37m" # 2 faint, 37 gray yellow = "\x1b[33;21m" - cyan = "\x1b[1;36m" # + cyan = "\x1b[1;36m" # 1 bold 36 cyan green = "\x1b[31;21m" # dark green red = "\x1b[31;21m" bold_red = "\x1b[31;1m" @@ -33,16 +35,15 @@ def format(self, record): return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links -def get_logger(name=None): +def get_logger(name='ControlToolkit'): """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL. - :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it. - If no name is supplied, it defaults to root logger which may turn on a LOT of undesired logging output. + :param name: ignored -- all loggers here have the name 'ControlToolkit' so that all can be affected uniformly :returns: the logger. """ # logging.basicConfig(stream=sys.stdout, level=logging.INFO) - logger = logging.getLogger(name) + logger = logging.getLogger('ControlToolkit') # tobi changed so all have same name so we can uniformly affect all of them logger.setLevel(LOGGING_LEVEL) # create console handler ch = logging.StreamHandler() From e894dfce1a2594a18f2cf4f4476f367885ab0250 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Wed, 22 Feb 2023 08:46:26 +0100 Subject: [PATCH 35/38] fixed get_logger.py that now uses a single logger name to only add the handler once --- others/get_logger.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/others/get_logger.py b/others/get_logger.py index 1d9d916..6546b15 100644 --- a/others/get_logger.py +++ b/others/get_logger.py @@ -1,6 +1,7 @@ import logging # general logger for all control/si_toolkit users. Produces nice output format with live hyperlinks for pycharm users -# to use it, just call log=get_logger(__name__) at the top of your python file +# to use it, just call log=get_logger() at the top of your python file +# all these loggers share the same logger name 'Control_Toolkit' LOGGING_LEVEL = logging.DEBUG # usually INFO is good class CustomFormatter(logging.Formatter): @@ -11,9 +12,9 @@ class CustomFormatter(logging.Formatter): # The control sequence CSI n m, named Select Graphic Rendition (SGR), sets display attributes. grey = "\x1b[2;37m" # 2 faint, 37 gray yellow = "\x1b[33;21m" - cyan = "\x1b[1;36m" # 1 bold 36 cyan + cyan = "\x1b[0;36m" # 0 normal 36 cyan green = "\x1b[31;21m" # dark green - red = "\x1b[31;21m" + red = "\x1b[31;21m" # bold red bold_red = "\x1b[31;1m" light_blue = "\x1b[1;36m" blue = "\x1b[1;34m" @@ -45,8 +46,9 @@ def get_logger(name='ControlToolkit'): # logging.basicConfig(stream=sys.stdout, level=logging.INFO) logger = logging.getLogger('ControlToolkit') # tobi changed so all have same name so we can uniformly affect all of them logger.setLevel(LOGGING_LEVEL) - # create console handler - ch = logging.StreamHandler() - ch.setFormatter(CustomFormatter()) - logger.addHandler(ch) + # create console handler if this logger does not have handler yet + if len(logger.handlers)==0: + ch = logging.StreamHandler() + ch.setFormatter(CustomFormatter()) + logger.addHandler(ch) return logger \ No newline at end of file From 3489a44996c524b33ffaa0a8c431ac79246852a5 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Thu, 23 Feb 2023 20:08:21 +0100 Subject: [PATCH 36/38] reduced chatter in logging --- Controllers/controller_mpc.py | 4 ++-- Cost_Functions/cost_function_wrapper.py | 4 ++-- others/globals_and_utils.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index 1d8d65f..db1571b 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -48,13 +48,13 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification: log.info(f'Using optimizer \'{optimizer_name}\' specified in controller config file') if predictor_specification in {None, ""}: predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None) - log.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file') + log.info(f"Using predictor_specification='{predictor_specification}' specified in controller config file") config_optimizer = config_optimizers[optimizer_name] # Create cost function cost_function_specification = self.config_controller.get("cost_function_specification", None) - log.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}') + log.info(f'using cost_function_specification=\'{cost_function_specification}\' in config {self.config_controller}') self.cost_function_wrapper = CostFunctionWrapper() self.cost_function_wrapper.configure(self, cost_function_specification=cost_function_specification) diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py index 134842f..b484190 100644 --- a/Cost_Functions/cost_function_wrapper.py +++ b/Cost_Functions/cost_function_wrapper.py @@ -28,7 +28,7 @@ def __init__(self): self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default self.lib = None # filled by configure(), needed to update TF variables - log.info(f'default cost function name is {self.cost_function_name_default}') + log.debug(f'default cost function name is {self.cost_function_name_default}') # cost_function config (self.cost_function_config, _) = load_or_reload_config_if_modified( os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"),search_path=['CartPoleSimulation']) @@ -36,7 +36,7 @@ def __init__(self): self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default self.lib = None # filled by configure(), needed to update TF variables - log.info(f'default cost function name is {self.cost_function_name_default}') + log.debug(f'default cost function name is {self.cost_function_name_default}') def configure( self, diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index 87fe0c9..7cf8b9d 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -100,7 +100,7 @@ def import_controller_by_name(controller_name: str) -> type: assert len(controller_relative_paths) == 1, f"Controller {controller_full_name} must be in a unique location. {len(controller_relative_paths)} found." controller_relative_path = controller_relative_paths[0] - log.info(f"Importing controller from {controller_relative_path}") + log.debug(f"Importing controller from {controller_relative_path}") Controller: type = getattr(import_module(controller_relative_path.replace(".py", "").replace(os.sep, ".")), controller_full_name) return Controller From a1a36db4aa8dac2a81f51c62913d47fc4ee82a57 Mon Sep 17 00:00:00 2001 From: Tobi Delbruck Date: Tue, 28 Feb 2023 11:42:07 +0100 Subject: [PATCH 37/38] major changes to cartpole_dancer_cost and cartpole_trajectory_generator for compatibilty with RPGD gradient computation. update_attributes in globals_and_utils.py supports assignment of tf.Variable and now raises exception if we try to .assign an immutable Tensor. Balance step now uses int +1 or -1 for desired pole up or down, not 'up' or 'down'. computation_library.py now casts variale to numpy() from tensorflow only if it is a Tensor type. --- Controllers/controller_mpc.py | 6 +----- Optimizers/optimizer_rpgd_tf.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py index db1571b..056b6d8 100644 --- a/Controllers/controller_mpc.py +++ b/Controllers/controller_mpc.py @@ -109,12 +109,8 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str cost_function=self.cost_function_wrapper.cost_function update_attributes(updated_attributes,cost_function) # update target_position and target_equilibrium in cost function to use updated_attributes.clear() - target_trajectory = \ - self.cartpole_trajectory_generator.\ + self.cartpole_trajectory_generator.\ generate_cartpole_trajectory(time=time, state=state, controller=self, cost_function=self.cost_function_wrapper.cost_function) - updated_attributes['target_trajectory']=target_trajectory - update_attributes(updated_attributes,cost_function) # update - updated_attributes.clear() # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer # detect any changes in config scalar values and pass to this controller or the cost function or optimizer diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py index 1a2429a..3e1a2cf 100644 --- a/Optimizers/optimizer_rpgd_tf.py +++ b/Optimizers/optimizer_rpgd_tf.py @@ -78,7 +78,7 @@ def __init__( self.Interpolator = Interpolator(self.mpc_horizon, period_interpolation_inducing_points, self.num_control_inputs, self.lib) - self.opt = tf.keras.optimizers.Adam( + self.opt:tf.keras.optimizers.Adam = tf.keras.optimizers.Adam( learning_rate=learning_rate, beta_1=adam_beta_1, beta_2=adam_beta_2, From 807c46de404c542c4a074b99645be8e7c8397be0 Mon Sep 17 00:00:00 2001 From: tobidelbruck Date: Sat, 13 May 2023 14:52:02 +0200 Subject: [PATCH 38/38] initial commit of Shreyan's code for energy-based controller for cartpole balancing --- Controllers/__init__.py | 8 ++++++++ others/globals_and_utils.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Controllers/__init__.py b/Controllers/__init__.py index f5c0e2f..c078b49 100644 --- a/Controllers/__init__.py +++ b/Controllers/__init__.py @@ -118,6 +118,14 @@ def configure(self, **kwargs): # In your controller, implement any additional initialization steps here pass + def update_attributes(self, updated_attributes: "dict[str,TensorType]"): + for property, new_value in updated_attributes.items(): + try: + attr = getattr(self.variable_parameters, property) + self.computation_library.assign(attr, self.lib.to_tensor(new_value, attr.dtype)) + except: + setattr(self.variable_parameters, property, new_value) + @abstractmethod def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()): """ diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py index 7cf8b9d..57a7914 100644 --- a/others/globals_and_utils.py +++ b/others/globals_and_utils.py @@ -159,7 +159,7 @@ def get_controller_name(controller_names=None, controller_name=None, controller_ try: controller_idx = controller_names.index(controller_name) except ValueError: - print('{} is not in list. \n In list are: {}'.format(controller_name, controller_names)) + log.error('{} is not in list. \n In list are: {}'.format(controller_name, controller_names)) return None else: controller_name = controller_names[controller_idx]