From 68bf53797876cf806f7fdf34588a599be0a62d1f Mon Sep 17 00:00:00 2001
From: marcinpaluch1994 <marcin.paluch1994@gmail.com>
Date: Thu, 24 Nov 2022 16:17:28 +0100
Subject: [PATCH 01/38] Starting template for integrating vector of target
 positions

---
 Controllers/TargetPositionGenerator.py | 8 ++++++++
 Controllers/controller_mpc.py          | 8 ++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 Controllers/TargetPositionGenerator.py

diff --git a/Controllers/TargetPositionGenerator.py b/Controllers/TargetPositionGenerator.py
new file mode 100644
index 0000000..5a923d9
--- /dev/null
+++ b/Controllers/TargetPositionGenerator.py
@@ -0,0 +1,8 @@
+
+class TargetPositionGenerator:
+    def __init__(self, lib, horizon):
+        self.horizon = horizon
+        self.lib = lib
+
+    def step(self, time):
+        return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3)
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index d6cd4ae..ed906ea 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -13,6 +13,8 @@
 
 from torch import inference_mode
 
+from Control_Toolkit.Controllers.TargetPositionGenerator import TargetPositionGenerator
+
 
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
 config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
@@ -64,6 +66,9 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
             predictor_specification=predictor_specification
         )
 
+        self.TargetPositionGeneratorInstance = TargetPositionGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon)
+        setattr(self, "target_positions_vector", self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)), self.computation_library.float32))
+
         if self.lib.lib == 'Pytorch':
             self.step = inference_mode()(self.step)
         else:
@@ -71,6 +76,9 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
 
         
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+
+        target_positions_vector = self.TargetPositionGeneratorInstance.step(time)
+        updated_attributes['target_positions_vector'] = target_positions_vector
         self.update_attributes(updated_attributes)
         u = self.optimizer.step(s, time)
         self.update_logs(self.optimizer.logging_values)

From 6a6d2187f4898f61c11bd601925b1168332ef121 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Sun, 27 Nov 2022 09:26:16 +0100
Subject: [PATCH 02/38] to fix tensorflow JIT compile case insentivity problem,
 renamed cart mass M to m_cart and cart pole mass m to m_pole added comments
 about meaning of "stage" in MPPI renamed TargetPositionGenerator to
 TargetTrajectoryGenerator, since we target a complete trajectory, not just
 cart position. renamed my_logger() to get_logger()

---
 .../Controllers/controller_barebone.py        |  1 +
 .../EnvironmentName/cost_function_barebone.py |  2 +-
 Controllers/TargetPositionGenerator.py        |  8 -------
 Controllers/TrajectoryGenerator.py            | 22 +++++++++++++++++++
 Controllers/__init__.py                       |  8 +++++++
 Controllers/controller_mpc.py                 | 11 ++++++----
 6 files changed, 39 insertions(+), 13 deletions(-)
 delete mode 100644 Controllers/TargetPositionGenerator.py
 create mode 100644 Controllers/TrajectoryGenerator.py

diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
index ecc1858..9f1b7eb 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
@@ -32,6 +32,7 @@ def configure(self):
         pass
 
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+
         # The controller has to adapt when environment-related attributes such as target positions change
         # Updated targets etc. are passed as a dictionary updated_attributes
         self.update_attributes(updated_attributes)  # After this call, updated attributes are available as self.<<attribute_name>>
diff --git a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py
index 97e9b53..9f5526a 100644
--- a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py
+++ b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py
@@ -37,7 +37,7 @@ def get_terminal_cost(self, terminal_states: TensorType):
         # return terminal_cost
         pass
 
-    # all stage costs together
+    # all stage costs together. A 'stage' is one timestep of a rollout.
     def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType):
         # Shape of states: [batch_size, mpc_horizon, num_states]
         # TODO: Compute stage cost
diff --git a/Controllers/TargetPositionGenerator.py b/Controllers/TargetPositionGenerator.py
deleted file mode 100644
index 5a923d9..0000000
--- a/Controllers/TargetPositionGenerator.py
+++ /dev/null
@@ -1,8 +0,0 @@
-
-class TargetPositionGenerator:
-    def __init__(self, lib, horizon):
-        self.horizon = horizon
-        self.lib = lib
-
-    def step(self, time):
-        return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3)
diff --git a/Controllers/TrajectoryGenerator.py b/Controllers/TrajectoryGenerator.py
new file mode 100644
index 0000000..96ab182
--- /dev/null
+++ b/Controllers/TrajectoryGenerator.py
@@ -0,0 +1,22 @@
+# stub for genrating desired future trajectory of cartpole
+
+class TrajectoryGenerator:
+    def __init__(self, lib, horizon:int):
+        """ Construct the trajectory generator.
+
+        :param lib: the computation library, e.g. tensorflow
+        :param horizon: the MPC horizon in timesteps
+        """
+        self.horizon = horizon
+        self.lib = lib
+
+    def step(self, time):
+        """ Computes the desired future state trajectory at this time.
+
+        :param time: the scalar time in seconds
+
+        :returns: the target state trajectory of cartpole.
+        It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values.
+
+        """
+        return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3)
diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index d8753d0..36bd71b 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -109,6 +109,14 @@ def update_attributes(self, updated_attributes: "dict[str, TensorType]"):
     
     @abstractmethod
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+        """
+        Execute one timestep of control.
+        @param s: the state array, dimensions are TODO add dimension to help users
+        @param time: the time in seconds
+        @param updated_attributes: dict of updated states
+        @return: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct?
+
+        """
         ### Any computations in order to retrieve the current control. Such as:
         ## If the environment's target positions etc. change, copy the new attributes over to this controller so the cost function knows about it:
         # self.update_attributes(updated_attributes)
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index ed906ea..3781bd7 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -13,7 +13,7 @@
 
 from torch import inference_mode
 
-from Control_Toolkit.Controllers.TargetPositionGenerator import TargetPositionGenerator
+from Control_Toolkit.Controllers.TrajectoryGenerator import TrajectoryGenerator
 
 
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
@@ -22,6 +22,7 @@
 
 
 class controller_mpc(template_controller):
+
     _has_optimizer = True
     
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
@@ -66,8 +67,10 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
             predictor_specification=predictor_specification
         )
 
-        self.TargetPositionGeneratorInstance = TargetPositionGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon)
-        setattr(self, "target_positions_vector", self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)), self.computation_library.float32))
+        self.TrajectoryGeneratorInstance = TrajectoryGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon)
+        setattr(self, "target_positions_vector",
+                self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)),
+                                                     self.computation_library.float32))
 
         if self.lib.lib == 'Pytorch':
             self.step = inference_mode()(self.step)
@@ -77,7 +80,7 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
         
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
 
-        target_positions_vector = self.TargetPositionGeneratorInstance.step(time)
+        target_positions_vector = self.TrajectoryGeneratorInstance.step(time)
         updated_attributes['target_positions_vector'] = target_positions_vector
         self.update_attributes(updated_attributes)
         u = self.optimizer.step(s, time)

From d1d9203507176d4cb653598e6399e77446b3dffe Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Mon, 28 Nov 2022 10:53:12 +0100
Subject: [PATCH 03/38] docstring for update_parameters

---
 Controllers/__init__.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index 36bd71b..0bd784a 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -1,6 +1,6 @@
 import os
 from abc import ABC, abstractmethod
-from typing import Tuple
+from typing import Tuple, Union
 
 import numpy as np
 import yaml
@@ -9,12 +9,12 @@
                                             PyTorchLibrary, TensorFlowLibrary,
                                             TensorType)
 
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
+config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 logger = get_logger(__name__)
 
 """
 For a controller to be found and imported by CartPoleGUI/DataGenerator it must:
-1. Be in Controller folder
+1. Be in Controllers folder
 2. Have a name starting with "controller_"
 3. The name of the controller class must be the same as the name of the file.
 4. It must have __init__ and step methods
@@ -104,17 +104,27 @@ def configure(self, **kwargs):
         pass
     
     def update_attributes(self, updated_attributes: "dict[str, TensorType]"):
+        """ Update scalar float32 attributes in compiled code (tensorflow JIT) that have changed, i.e. copy them to the compiled/GPU instance.
+
+        After this call, such attribute values are available to the TF function as self.key, where key is the key used in dict.
+
+        Used in various controllers in Control_Toolkit/Control_Toolkit_ASF_Template/Controllers.
+
+        :param updated_attributes: a dict with string keys and TensorType (i.e. float32) value attributes to copy
+        """
         for property, new_value in updated_attributes.items():
             self.computation_library.assign(getattr(self, property), self.lib.to_tensor(new_value, self.lib.float32))
     
     @abstractmethod
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()):
         """
         Execute one timestep of control.
-        @param s: the state array, dimensions are TODO add dimension to help users
-        @param time: the time in seconds
-        @param updated_attributes: dict of updated states
-        @return: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct?
+
+        :param s: the state array, dimensions are TODO add dimension to help users
+        :param time: the time in seconds
+        :param updated_attributes: dict of updated attributes
+
+        :returns: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct?
 
         """
         ### Any computations in order to retrieve the current control. Such as:

From cc2b58ce742edca11b1a874b50e98ef0d3a6e9d5 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Mon, 28 Nov 2022 11:21:02 +0100
Subject: [PATCH 04/38] controller_mpc.py now copies all modified controller,
 cost and optimizer config changes to updated_attributes before executing
 optimizer.step

---
 Controllers/controller_mpc.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 3781bd7..e059038 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -14,11 +14,11 @@
 from torch import inference_mode
 
 from Control_Toolkit.Controllers.TrajectoryGenerator import TrajectoryGenerator
-
+from others.globals_and_utils import load_or_reload_config_if_modified
 
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
-logger = get_logger(__name__)
+config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
+log = get_logger(__name__)
 
 
 class controller_mpc(template_controller):
@@ -28,13 +28,13 @@ class controller_mpc(template_controller):
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
         if optimizer_name in {None, ""}:
             optimizer_name = str(self.config_controller["optimizer"])
-            logger.info(f"Using optimizer {optimizer_name} specified in controller config file")
+            log.info(f"Using optimizer {optimizer_name} specified in controller config file")
         if predictor_specification in {None, ""}:
             predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None)
-            logger.info(f"Using predictor {predictor_specification} specified in controller config file")
+            log.info(f"Using predictor {predictor_specification} specified in controller config file")
         
         config_optimizer = config_optimizers[optimizer_name]
-        
+
         # Create cost function
         cost_function_specification = self.config_controller.get("cost_function_specification", None)
         self.cost_function = CostFunctionWrapper()
@@ -67,7 +67,9 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
             predictor_specification=predictor_specification
         )
 
+        # make a target position trajectory generator
         self.TrajectoryGeneratorInstance = TrajectoryGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon)
+        # set self.target_positions_vector to the correct vector tensor type for TF or whatever is doing the stepping
         setattr(self, "target_positions_vector",
                 self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)),
                                                      self.computation_library.float32))
@@ -79,10 +81,22 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
 
         
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-
+        log.debug(f'step time={time:.3f}s')
+        # following is hack to get a target trajectory passed to tensorflow. The trajectory is passed in
+        # as updated_attributes, and is transferred to tensorflow by the update_attributes call
         target_positions_vector = self.TrajectoryGeneratorInstance.step(time)
         updated_attributes['target_positions_vector'] = target_positions_vector
         self.update_attributes(updated_attributes)
+        for c in ('config_controllers.yml','config_cost_functions.yml', 'config_optimizers.yml'):
+            (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',c))
+            # process changes to configs using new returned change list
+            if not changes is None:
+                for k,v in changes:
+                    if isinstance(v, (int, float)):
+                        updated_attributes[k]=v
+                self.update_attributes(updated_attributes)
+                log.info(f'updated config {c} with scalar updated_attributes {updated_attributes}')
+
         u = self.optimizer.step(s, time)
         self.update_logs(self.optimizer.logging_values)
         return u

From 06c79816da4bc592c9c7a72c034a8dae0877b43d Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Mon, 28 Nov 2022 11:27:00 +0100
Subject: [PATCH 05/38] rename config_cost_function.yml to
 config_cost_functions.yml for consistency with other config files. added more
 docstrings for undocumented methods and constructors. added
 updated_attributes to barebones controller. improved formatting of logging
 output added dictdiffer to requirements.txt renamed target_position cost
 function to cartpole_trajectory_cost.py. replaced some load_config with the
 load_or_reload_config_if_modified, so that the configs are cached for change
 checking. added logger to Compile.py

---
 .../EnvironmentName/cost_function_barebone.py |  2 +-
 Cost_Functions/__init__.py                    | 12 +++++++++-
 Cost_Functions/cost_function_wrapper.py       | 23 +++++++++++++++----
 Optimizers/__init__.py                        |  2 +-
 Optimizers/optimizer_mppi.py                  | 11 +++++++--
 others/globals_and_utils.py                   | 23 +++++++++++--------
 requirements.txt                              |  3 ++-
 7 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py
index 9f5526a..6e9802d 100644
--- a/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py
+++ b/Control_Toolkit_ASF_Template/Cost_Functions/EnvironmentName/cost_function_barebone.py
@@ -9,7 +9,7 @@
 
 
 # TODO: Load constants from the cost config file, like this:
-config = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"), Loader=yaml.FullLoader)
+config = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r"), Loader=yaml.FullLoader)
 
 # TODO: Rename parent folder from EnvironmentName to the actual name of you environment
 # TODO: Load constants like this:
diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py
index a98da9d..c86692e 100644
--- a/Cost_Functions/__init__.py
+++ b/Cost_Functions/__init__.py
@@ -9,9 +9,19 @@ class cost_function_base:
     # Default: Class supports all libs to compute costs
     supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary}
     
-    def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]") -> None:
+    def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]", config:dict=None) -> None:
+        """ makes a new cost function
+
+        :param controller: the controller
+        :param ComputationLib: the library, e.g. python, tensorflow
+        :param config: the dict of configuration for this cost function.  The caller can modify the config to change behavior during runtime.
+
+         """
+
         self.controller = controller
+        self.config=config
         self.set_computation_library(ComputationLib)
+        logger.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}')
     
     def get_terminal_cost(self, terminal_states: TensorType) -> TensorType:
         """Compute a batch of terminal costs for a batch of terminal states.
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index d0a7ae5..2788e59 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -1,5 +1,7 @@
 from importlib import import_module
 import os
+
+from Control_Toolkit.others.globals_and_utils import get_logger
 from SI_Toolkit.computation_library import TensorType
 import yaml
 from copy import deepcopy as dcp
@@ -7,12 +9,12 @@
 from Control_Toolkit.Controllers import template_controller
 
 from Control_Toolkit.Cost_Functions import cost_function_base
+from others.globals_and_utils import load_or_reload_config_if_modified
+
+log=get_logger(__name__)
 
 # cost_function config
-cost_function_config = yaml.load(
-    open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"),
-    Loader=yaml.FullLoader,
-)
+(cost_function_config,_) = load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"))
 
 
 class CostFunctionWrapper:
@@ -21,12 +23,22 @@ def __init__(self):
         self.cost_function_name_default: str = cost_function_config[
             "cost_function_name_default"
         ]
+        log.info(f'default cost function name is {self.cost_function_name_default}')
 
     def configure(
         self,
         controller: template_controller,
         cost_function_specification=None,
+        config:dict=None
     ):
+        """
+        Configures the cost function. TODO This lazy constructor is needed why?
+
+        :param controller: the controller that uses this cost function
+        :param cost_function_specification: the string name of the cost function class, to construct the class
+        :param config: the config dict() that holds all the configuration values
+
+        """
         environment_name = controller.environment_name
         computation_library = controller.computation_library  # Use library dictated by controller
         
@@ -41,6 +53,7 @@ def configure(
         self.cost_function: cost_function_base = getattr(
             cost_function_module, self.cost_function_name
         )(controller, computation_library)
+        log.info(f'configured controller {controller.__class__} with cost function {self.cost_function.__class__}')
 
     def update_cost_function_name_from_specification(
         self, environment_name: str, cost_function_specification: str = None
@@ -63,7 +76,7 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input:
         return self.cost_function.get_stage_cost(states, inputs, previous_input)
 
     def get_trajectory_cost(
-        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None
+        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None
     ):
         """Refer to :func:`the base cost function <Control_Toolkit.Cost_Functions.cost_function_base.get_trajectory_cost>`"""
         return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input)
diff --git a/Optimizers/__init__.py b/Optimizers/__init__.py
index aecd35d..91875ba 100644
--- a/Optimizers/__init__.py
+++ b/Optimizers/__init__.py
@@ -54,7 +54,7 @@ def configure(self, **kwargs):
         """Pass any additional arguments from the controller to the optimizer."""
         pass
     
-    def step(self, s: np.ndarray, time=None):
+    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         raise NotImplementedError("Implement this function in a subclass.")
     
     def optimizer_reset(self):
diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py
index 81e48c2..2ad0f06 100644
--- a/Optimizers/optimizer_mppi.py
+++ b/Optimizers/optimizer_mppi.py
@@ -5,9 +5,9 @@
 
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import CompileAdaptive
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
+from SI_Toolkit.Functions.TF.Compile import CompileAdaptive
 
 
 class optimizer_mppi(template_optimizer):
@@ -121,7 +121,14 @@ def inizialize_pertubation(self, random_gen):
 
         return delta_u
 
-    def _predict_and_cost(self, s, u_nom, random_gen, u_old):
+    def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config):
+        """ Predict dynamics and compute costs of trajectories
+        TODO add params and return
+
+        :param ??
+
+        :returns: ??
+        """
         s = self.lib.tile(s, (self.num_rollouts, 1))
         # generate random input sequence and clip to control limits
         u_nom = self.lib.concat([u_nom[:, 1:, :], u_nom[:, -1:, :]], 1)
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index 13ac619..edbc331 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -9,39 +9,44 @@
 import tensorflow as tf
 import torch
 from numpy.random import SFC64, Generator
-from SI_Toolkit.Functions.TF.Compile import CompileTF, CompileAdaptive
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary
 
 LOGGING_LEVEL = logging.INFO
-
-
 class CustomFormatter(logging.Formatter):
     """Logging Formatter to add colors and count warning / errors"""
+    # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762
 
     grey = "\x1b[38;21m"
     yellow = "\x1b[33;21m"
+    cyan = "\x1b[1;36m" # dark green
+    green = "\x1b[31;21m" # dark green
     red = "\x1b[31;21m"
     bold_red = "\x1b[31;1m"
     reset = "\x1b[0m"
-    format = (
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
-    )
+    # File "{file}", line {max(line, 1)}'.replace("\\", "/")
+    format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
 
     FORMATS = {
         logging.DEBUG: grey + format + reset,
-        logging.INFO: grey + format + reset,
+        logging.INFO: cyan + format + reset,
         logging.WARNING: yellow + format + reset,
         logging.ERROR: red + format + reset,
-        logging.CRITICAL: bold_red + format + reset,
+        logging.CRITICAL: bold_red + format + reset
     }
 
     def format(self, record):
         log_fmt = self.FORMATS.get(record.levelno)
         formatter = logging.Formatter(log_fmt)
-        return formatter.format(record)
+        return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links
 
 
 def get_logger(name):
+    """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL.
+
+    :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it.
+
+    :returns: the logger.
+    """
     # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
     logger = logging.getLogger(name)
     logger.setLevel(LOGGING_LEVEL)
diff --git a/requirements.txt b/requirements.txt
index d2c9d6b..9fa1784 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@ tensorflow_probability
 numpy
 torch
 torchvision
-gym
\ No newline at end of file
+gym
+dictdiffer
\ No newline at end of file

From 068a0ac934959a6fd86a54b219adeeb369e95814 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Sun, 11 Dec 2022 15:10:14 +0100
Subject: [PATCH 06/38] finally the dynamically modifiable control cost
 parameters are working and the cartpole is at least balancing itself and
 swinging up from down position. Big changes all over! Now anytime any of the
 config files is modified during runtime, the using class has its tf.variable
 assigned the new value. To get this work, the variable MUST NOT EXIST before
 it is set this way the first time. Otherwise the compiler just uses the field
 at compile time and the changes are never seen inside. The cartpole window
 now does not reset the initial position and angle sliders on each start.

---
 .../Controllers/controller_barebone.py        |  5 +-
 .../Controllers/controller_do_mpc.py          |  4 +-
 .../Controllers/controller_do_mpc_discrete.py |  3 +-
 .../Controllers/controller_lqr.py             |  4 +-
 Controllers/TrajectoryGenerator.py            | 22 --------
 Controllers/__init__.py                       | 46 +++++++++-------
 Controllers/cartpole_trajectory_generator.py  | 55 +++++++++++++++++++
 Controllers/controller_mpc.py                 | 51 +++++++++--------
 .../controller_neural_imitator_pytorch.py     |  3 +-
 Controllers/controller_neural_imitator_tf.py  |  3 +-
 Cost_Functions/__init__.py                    | 10 +++-
 Cost_Functions/cost_function_wrapper.py       | 26 +++++----
 Optimizers/__init__.py                        |  4 +-
 Optimizers/optimizer_cem_gmm_tf.py            | 16 +++---
 .../optimizer_cem_grad_bharadhwaj_tf.py       | 10 ++--
 Optimizers/optimizer_cem_naive_grad_tf.py     | 10 ++--
 Optimizers/optimizer_cem_tf.py                | 18 +++---
 Optimizers/optimizer_gradient_tf.py           | 14 ++---
 Optimizers/optimizer_mppi.py                  | 38 ++++++++-----
 Optimizers/optimizer_mppi_optimize_tf.py      | 10 ++--
 Optimizers/optimizer_mppi_var_tf.py           | 10 ++--
 Optimizers/optimizer_random_action_tf.py      | 10 ++--
 Optimizers/optimizer_rpgd_me_tf.py            | 14 ++---
 Optimizers/optimizer_rpgd_ml_tf.py            | 20 +++----
 Optimizers/optimizer_rpgd_particle_tf.py      | 22 ++++----
 Optimizers/optimizer_rpgd_tf.py               | 22 ++++----
 others/globals_and_utils.py                   |  2 +-
 27 files changed, 257 insertions(+), 195 deletions(-)
 delete mode 100644 Controllers/TrajectoryGenerator.py
 create mode 100644 Controllers/cartpole_trajectory_generator.py

diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
index 9f1b7eb..29c6add 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
@@ -9,7 +9,8 @@
 import os
 
 from Control_Toolkit.Controllers import template_controller
-from others.globals_and_utils import create_rng
+from others.globals_and_utils import create_rng, update_attributes
+
 
 # TODO: You can load and access config files here, like this:
 # config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader)
@@ -35,7 +36,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
 
         # The controller has to adapt when environment-related attributes such as target positions change
         # Updated targets etc. are passed as a dictionary updated_attributes
-        self.update_attributes(updated_attributes)  # After this call, updated attributes are available as self.<<attribute_name>>
+        update_attributes(updated_attributes,self)  # After this call, updated attributes are available as self.<<attribute_name>>
         
         # TODO: Implement your controller here
         # Examples:
diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py
index 1d2f7d7..7d01b1e 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py
@@ -9,7 +9,7 @@
 from CartPole.cartpole_model import Q2u, cartpole_ode_namespace
 from CartPole.state_utilities import cartpole_state_vector_to_namespace
 from Control_Toolkit.Controllers import template_controller
-from others.globals_and_utils import create_rng
+from others.globals_and_utils import create_rng, update_attributes
 from SI_Toolkit.computation_library import NumpyLibrary, TensorType
 
 
@@ -137,7 +137,7 @@ def tvp_fun(self, t_ind):
 
 
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+        update_attributes(updated_attributes,self)
 
         s = cartpole_state_vector_to_namespace(s)
 
diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py
index 11f40c5..c2e5171 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py
@@ -11,6 +11,7 @@
 from CartPole.state_utilities import cartpole_state_vector_to_namespace
 from Control_Toolkit.Controllers import template_controller
 from SI_Toolkit.computation_library import NumpyLibrary, TensorType
+from others.globals_and_utils import update_attributes
 
 
 def mpc_next_state(s, u, dt):
@@ -148,7 +149,7 @@ def tvp_fun(self, t_ind):
         return self.tvp_template
 
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+        update_attributes(updated_attributes,self)
 
         s = cartpole_state_vector_to_namespace(s)
 
diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py
index 4060ad1..6aef009 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py
@@ -14,7 +14,7 @@
 from CartPole.state_utilities import (ANGLE_IDX, ANGLED_IDX, POSITION_IDX,
                                       POSITIOND_IDX)
 from Control_Toolkit.Controllers import template_controller
-from others.globals_and_utils import create_rng
+from others.globals_and_utils import create_rng, update_attributes
 
 config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader)
 actuator_noise = config["cartpole"]["actuator_noise"]
@@ -81,7 +81,7 @@ def configure(self):
         self.eigVals = eigVals
 
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+        update_attributes(updated_attributes,self)
         
         state = np.array(
             [[s[POSITION_IDX] - self.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]])
diff --git a/Controllers/TrajectoryGenerator.py b/Controllers/TrajectoryGenerator.py
deleted file mode 100644
index 96ab182..0000000
--- a/Controllers/TrajectoryGenerator.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# stub for genrating desired future trajectory of cartpole
-
-class TrajectoryGenerator:
-    def __init__(self, lib, horizon:int):
-        """ Construct the trajectory generator.
-
-        :param lib: the computation library, e.g. tensorflow
-        :param horizon: the MPC horizon in timesteps
-        """
-        self.horizon = horizon
-        self.lib = lib
-
-    def step(self, time):
-        """ Computes the desired future state trajectory at this time.
-
-        :param time: the scalar time in seconds
-
-        :returns: the target state trajectory of cartpole.
-        It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values.
-
-        """
-        return self.lib.zeros(self.horizon)+self.lib.sin(time/1.3)
diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index 0bd784a..a447167 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -8,6 +8,7 @@
 from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary,
                                             PyTorchLibrary, TensorFlowLibrary,
                                             TensorType)
+from others.globals_and_utils import load_or_reload_config_if_modified
 
 config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 logger = get_logger(__name__)
@@ -26,7 +27,7 @@
 class template_controller(ABC):
     _has_optimizer = False
     # Define the computation library in your controller class or in the controller's configuration:
-    _computation_library: "type[ComputationLibrary]" = None
+    _computation_library: ComputationLibrary = None
     
     def __init__(
         self,
@@ -38,14 +39,13 @@ def __init__(
         initial_environment_attributes: "dict[str, TensorType]",
     ):
         # Load controller config and select the entry for the current controller
-        config_controllers = yaml.load(
-            open(os.path.join("Control_Toolkit_ASF", "config_controllers.yml")),
-            Loader=yaml.FullLoader
-        )
+        (config_controllers,_) = load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_controllers.yml")) # ignore the _ changes return since this is initial call
         # self.controller_name is inferred from the class name, which is the class being instantiated
         # Example: If you create a controller_mpc, this controller_template.__init__ will be called
         # but the class name will be controller_mpc, not template_controller.
-        self.config_controller = dict(config_controllers[self.controller_name])
+        config_key=self.controller_name
+        self.config_controller = config_controllers[config_key]
+        # add timestep .dt to all controllers here
         self.config_controller["dt"] = dt
         
         # Set computation library
@@ -78,11 +78,20 @@ def __init__(
         self.num_control_inputs = num_control_inputs
         self.control_limits = control_limits
         self.action_low, self.action_high = self.control_limits
+
+        # todo these are special for cartpole but we would need a base cartpole controller class to put them there
+        # self.target_position=None
+        # self.target_equilibrium=None
         
         # Set properties like target positions on this controller
         for property, new_value in initial_environment_attributes.items():
             setattr(self, property, self.computation_library.to_variable(new_value, self.computation_library.float32))
-                
+
+        # set all controller config numerical values as float variables in the computation space, e.g. tensorflow, so they can be updqted during runtime
+        for property, value in self.config_controller.items():
+            if value is float or value is int:
+                setattr(self, property, self.computation_library.to_variable(value, self.computation_library.float32))
+
         # Initialize control variable
         self.u = 0.0
 
@@ -103,18 +112,6 @@ def configure(self, **kwargs):
         # In your controller, implement any additional initialization steps here
         pass
     
-    def update_attributes(self, updated_attributes: "dict[str, TensorType]"):
-        """ Update scalar float32 attributes in compiled code (tensorflow JIT) that have changed, i.e. copy them to the compiled/GPU instance.
-
-        After this call, such attribute values are available to the TF function as self.key, where key is the key used in dict.
-
-        Used in various controllers in Control_Toolkit/Control_Toolkit_ASF_Template/Controllers.
-
-        :param updated_attributes: a dict with string keys and TensorType (i.e. float32) value attributes to copy
-        """
-        for property, new_value in updated_attributes.items():
-            self.computation_library.assign(getattr(self, property), self.lib.to_tensor(new_value, self.lib.float32))
-    
     @abstractmethod
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()):
         """
@@ -152,10 +149,17 @@ def controller_report(self):
     def controller_reset(self):
         raise NotImplementedError
     
-    @property
+    @property # decorates the controller so it has the field .controller_name that gets its short name which is the key in the .yml config file
     def controller_name(self):
+        """ Generates standard name for this controller, but use this method like it were a field.
+
+        :returns: the short name which is the key to the controller in the config_controller.yml file, e.g. 'cartpole_mppi'
+
+        """
         name = self.__class__.__name__
         if name != "template_controller":
+            if 'controller_' not in name:
+                raise AttributeError(f'this controller named "{name}" does not contain "controller_". Controllers should start or contain "controller_" and the key in the config_controllers.yml file should follow the underscore')
             return name.replace("controller_", "").replace("_", "-").lower()
         else:
             raise AttributeError()
@@ -165,7 +169,7 @@ def controller_data_for_csv(self):
         return {}
 
     @property
-    def computation_library(self) -> "type[ComputationLibrary]":
+    def computation_library(self) -> ComputationLibrary:
         if self._computation_library == None:
             raise NotImplementedError("Controller class needs to specify its computation library")
         return self._computation_library
diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py
new file mode 100644
index 0000000..a00559f
--- /dev/null
+++ b/Controllers/cartpole_trajectory_generator.py
@@ -0,0 +1,55 @@
+# stub for genrating desired future trajectory of cartpole
+import numpy
+from torch import TensorType
+
+from Control_Toolkit.Controllers import template_controller
+from SI_Toolkit.computation_library import ComputationLibrary
+from CartPole import state_utilities
+
+period=1
+
+
+class cartpole_trajectory_generator:
+    """ Generates target state trajectory for the cartpole """
+    def __init__(self, lib:ComputationLibrary, controller:template_controller=None):
+        """ Construct the trajectory generator.
+
+        :param lib: the computation library, e.g. tensorflow
+        :param horizon: the MPC horizon in timesteps
+        """
+        self.lib = lib
+        self.controller:template_controller=controller
+
+    def step(self, time: float, horizon: int) -> TensorType:
+        """ Computes the desired future state trajectory at this time.
+
+        :param time: the scalar time in seconds
+
+        :returns: the target state trajectory of cartpole.
+        It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values.
+
+        """
+        target_angleD=1000
+        if time%period<period/2:
+            target_angleD=-target_angleD
+        traj=numpy.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable
+        traj[:]=self.lib.nan # set all states undetermined
+        aim='balance'
+        if aim=='spin':
+            traj[state_utilities.POSITION_IDX] = self.controller.target_position
+            # traj[state_utilities.ANGLE_COS_IDX, :] = self.controller.target_equilibrium
+            # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
+            # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * self.controller.target_equilibrium
+            traj[state_utilities.ANGLED_IDX, :] = 1000*self.controller.target_equilibrium
+            # traj[state_utilities.POSITIOND_IDX, :] = 0
+        elif aim=='balance':
+            traj[state_utilities.POSITION_IDX] = self.controller.target_position
+            # traj[state_utilities.ANGLE_COS_IDX, :] = self.controller.target_equilibrium
+            # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
+            traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * (1-self.controller.target_equilibrium)/2 # either 0 for up and pi for down
+            # traj[state_utilities.ANGLED_IDX, :] = 0
+            # traj[state_utilities.POSITIOND_IDX, :] = 0
+
+        traj=self.lib.to_variable(traj, self.lib.float32)
+
+        return traj
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index e059038..d85f42c 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -13,11 +13,10 @@
 
 from torch import inference_mode
 
-from Control_Toolkit.Controllers.TrajectoryGenerator import TrajectoryGenerator
-from others.globals_and_utils import load_or_reload_config_if_modified
+from Control_Toolkit.Controllers.cartpole_trajectory_generator import cartpole_trajectory_generator
+from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
 
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 log = get_logger(__name__)
 
 
@@ -37,17 +36,17 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
 
         # Create cost function
         cost_function_specification = self.config_controller.get("cost_function_specification", None)
-        self.cost_function = CostFunctionWrapper()
-        self.cost_function.configure(self, cost_function_specification=cost_function_specification)
+        self.cost_function_wrapper = CostFunctionWrapper()
+        self.cost_function_wrapper.configure(self, cost_function_specification=cost_function_specification)
         
         # Create predictor
-        self.predictor = PredictorWrapper()
+        self.predictor_wrapper = PredictorWrapper()
         
         # MPC Controller always has an optimizer
         Optimizer = import_optimizer_by_name(optimizer_name)
         self.optimizer: template_optimizer = Optimizer(
-            predictor=self.predictor,
-            cost_function=self.cost_function,
+            predictor=self.predictor_wrapper,
+            cost_function=self.cost_function_wrapper,
             num_states=self.num_states,
             num_control_inputs=self.num_control_inputs,
             control_limits=self.control_limits,
@@ -59,8 +58,8 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
         # Do this here. If the optimizer does not require any additional parameters, it will ignore them.
         self.optimizer.configure(dt=self.config_controller["dt"], predictor_specification=predictor_specification)
         
-        self.predictor.configure(
-            batch_size=self.optimizer.num_rollouts,
+        self.predictor_wrapper.configure(
+            batch_size=self.optimizer.batch_size,
             horizon=self.optimizer.mpc_horizon,
             dt=self.config_controller["dt"],
             computation_library=self.computation_library,
@@ -68,11 +67,7 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
         )
 
         # make a target position trajectory generator
-        self.TrajectoryGeneratorInstance = TrajectoryGenerator(lib=self.computation_library, horizon=self.optimizer.mpc_horizon)
-        # set self.target_positions_vector to the correct vector tensor type for TF or whatever is doing the stepping
-        setattr(self, "target_positions_vector",
-                self.computation_library.to_variable(self.computation_library.zeros((self.optimizer.mpc_horizon,)),
-                                                     self.computation_library.float32))
+        self.target_trajectory_generator = cartpole_trajectory_generator(lib=self.computation_library, controller=self)
 
         if self.lib.lib == 'Pytorch':
             self.step = inference_mode()(self.step)
@@ -81,22 +76,30 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
 
         
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        log.debug(f'step time={time:.3f}s')
+        # log.debug(f'step time={time:.3f}s')
+
         # following is hack to get a target trajectory passed to tensorflow. The trajectory is passed in
         # as updated_attributes, and is transferred to tensorflow by the update_attributes call
-        target_positions_vector = self.TrajectoryGeneratorInstance.step(time)
-        updated_attributes['target_positions_vector'] = target_positions_vector
-        self.update_attributes(updated_attributes)
-        for c in ('config_controllers.yml','config_cost_functions.yml', 'config_optimizers.yml'):
-            (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',c))
+        new_target_trajectory = self.target_trajectory_generator.step(time=time, horizon=self.optimizer.mpc_horizon)
+        updated_attributes['target_trajectory'] = new_target_trajectory
+        update_attributes(updated_attributes,self)
+
+        # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer
+        updated_attributes.clear()
+        # detect any changes in config scalar values and pass to this controller or the cost function or optimizer
+        # note that the cost function that has its attributes updated is the enclosed cost function of the wrapper!
+        for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')):
+            (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config))
             # process changes to configs using new returned change list
             if not changes is None:
-                for k,v in changes:
+                for k,v in changes.items():
                     if isinstance(v, (int, float)):
                         updated_attributes[k]=v
-                self.update_attributes(updated_attributes)
-                log.info(f'updated config {c} with scalar updated_attributes {updated_attributes}')
+                        for o in objs: # for each object in objs, update its attributes
+                            update_attributes(updated_attributes,o)
+                log.debug(f'updated {objs} with scalar updated_attributes {updated_attributes}')
 
+        # log.info(f'targetposition={self.target_position}, equil={self.target_equilibrium}')
         u = self.optimizer.step(s, time)
         self.update_logs(self.optimizer.logging_values)
         return u
diff --git a/Controllers/controller_neural_imitator_pytorch.py b/Controllers/controller_neural_imitator_pytorch.py
index b37e764..56438a5 100644
--- a/Controllers/controller_neural_imitator_pytorch.py
+++ b/Controllers/controller_neural_imitator_pytorch.py
@@ -5,6 +5,7 @@
 
 from Control_Toolkit.Controllers import template_controller
 from SI_Toolkit.load_and_normalize import normalize_numpy_array
+from others.globals_and_utils import update_attributes
 
 try:
     from SI_Toolkit_ASF.predictors_customization import STATE_INDICES
@@ -45,7 +46,7 @@ def configure(self):
         self.net.eval()
 
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+        update_attributes(updated_attributes,self)
 
         net_input = s[
             ..., [STATE_INDICES.get(key) for key in self.net_info.inputs[:-1]]
diff --git a/Controllers/controller_neural_imitator_tf.py b/Controllers/controller_neural_imitator_tf.py
index 6104334..70855f1 100644
--- a/Controllers/controller_neural_imitator_tf.py
+++ b/Controllers/controller_neural_imitator_tf.py
@@ -5,6 +5,7 @@
 
 from Control_Toolkit.Controllers import template_controller
 from SI_Toolkit.load_and_normalize import normalize_numpy_array
+from others.globals_and_utils import update_attributes
 
 try:
     from SI_Toolkit_ASF.predictors_customization import STATE_INDICES
@@ -40,7 +41,7 @@ def configure(self):
 
 
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+        update_attributes(updated_attributes,self)
 
         net_input = s[
             ..., [STATE_INDICES.get(key) for key in self.net_info.inputs[:-1]]
diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py
index c86692e..3c4731c 100644
--- a/Cost_Functions/__init__.py
+++ b/Cost_Functions/__init__.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType
 from Control_Toolkit.Controllers import template_controller
 from Control_Toolkit.others.globals_and_utils import get_logger
@@ -6,6 +8,8 @@
 
 
 class cost_function_base:
+    """ Base cost function for all MPC systems
+    """
     # Default: Class supports all libs to compute costs
     supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary}
     
@@ -18,8 +22,9 @@ def __init__(self, controller: template_controller, ComputationLib: "type[Comput
 
          """
 
-        self.controller = controller
-        self.config=config
+        self.lib:Optional[ComputationLibrary] = None
+        self.controller:template_controller = controller
+        self.config:dict=config
         self.set_computation_library(ComputationLib)
         logger.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}')
     
@@ -74,3 +79,4 @@ def set_computation_library(self, ComputationLib: "type[ComputationLibrary]"):
         if not ComputationLib in self.supported_computation_libraries:
             raise ValueError(f"The cost function {self.__class__.__name__} does not support {ComputationLib.__name__}")
         self.lib = ComputationLib
+
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index 2788e59..39b6234 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -9,35 +9,32 @@
 from Control_Toolkit.Controllers import template_controller
 
 from Control_Toolkit.Cost_Functions import cost_function_base
-from others.globals_and_utils import load_or_reload_config_if_modified
+from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
 
 log=get_logger(__name__)
 
-# cost_function config
-(cost_function_config,_) = load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"))
-
-
 class CostFunctionWrapper:
     def __init__(self):
         self.cost_function = None
-        self.cost_function_name_default: str = cost_function_config[
-            "cost_function_name_default"
-        ]
+        # cost_function config
+        (self.cost_function_config, _) = load_or_reload_config_if_modified(
+            os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"))
+
+        self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default
+        self.lib = None # filled by configure(), needed to update TF variables
+
         log.info(f'default cost function name is {self.cost_function_name_default}')
 
     def configure(
         self,
         controller: template_controller,
         cost_function_specification=None,
-        config:dict=None
     ):
         """
         Configures the cost function. TODO This lazy constructor is needed why?
 
         :param controller: the controller that uses this cost function
-        :param cost_function_specification: the string name of the cost function class, to construct the class
-        :param config: the config dict() that holds all the configuration values
-
+        :param cost_function_specification: the string name of the cost function class, to construct the class and find the config values in config_cost_functions.yml
         """
         environment_name = controller.environment_name
         computation_library = controller.computation_library  # Use library dictated by controller
@@ -53,6 +50,11 @@ def configure(
         self.cost_function: cost_function_base = getattr(
             cost_function_module, self.cost_function_name
         )(controller, computation_library)
+
+        self.lib=self.cost_function.lib
+
+        config=self.cost_function_config['CartPole'][self.cost_function_name] # todo hardcoded 'CartPole' has to go, not sure how to determine it, maybe from module folder?
+        update_attributes(config, self.cost_function)
         log.info(f'configured controller {controller.__class__} with cost function {self.cost_function.__class__}')
 
     def update_cost_function_name_from_specification(
diff --git a/Optimizers/__init__.py b/Optimizers/__init__.py
index 91875ba..4a6687c 100644
--- a/Optimizers/__init__.py
+++ b/Optimizers/__init__.py
@@ -19,7 +19,7 @@ def __init__(
             control_limits: "Tuple[np.ndarray, np.ndarray]",
             optimizer_logging: bool,
             seed: int,
-            num_rollouts: int,
+            batch_size: int,
             mpc_horizon: int,
             computation_library: "type[ComputationLibrary]",
         ) -> None:
@@ -30,7 +30,7 @@ def __init__(
 
         self.lib = computation_library
 
-        self.num_rollouts = num_rollouts
+        self.batch_size = batch_size
         self.mpc_horizon = mpc_horizon
         self.cost_function = cost_function
         self.u = 0.0
diff --git a/Optimizers/optimizer_cem_gmm_tf.py b/Optimizers/optimizer_cem_gmm_tf.py
index b371414..ca59be8 100644
--- a/Optimizers/optimizer_cem_gmm_tf.py
+++ b/Optimizers/optimizer_cem_gmm_tf.py
@@ -26,7 +26,7 @@ def __init__(
         mpc_horizon: int,
         cem_outer_it: int,
         cem_initial_action_stdev: float,
-        num_rollouts: int,
+        batch_size: int,
         cem_stdev_min: float,
         cem_best_k: int,
         optimizer_logging: bool,
@@ -39,7 +39,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -61,12 +61,12 @@ def predict_and_cost(self, s, Q):
     @CompileTF
     def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor, rollout_trajectory: tf.Tensor, sampling_dist: tfpd.Distribution, rng: tf.random.Generator):
         #generate random input sequence and clip to control limits
-        Q = sampling_dist.sample(sample_shape=[self.num_rollouts])
+        Q = sampling_dist.sample(sample_shape=[self.batch_size])
         Q = tf.clip_by_value(Q, self.action_low, self.action_high)
 
         #rollout the trajectories and get cost
         traj_cost, rollout_trajectory = self.predict_and_cost(s, Q)
-        rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.num_rollouts, self.mpc_horizon+1, self.num_states])
+        rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.batch_size, self.mpc_horizon + 1, self.num_states])
 
         #sort the costs and find best k costs
         sorted_cost = tf.argsort(traj_cost)
@@ -102,11 +102,11 @@ def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor,
     def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
-        Q = tf.zeros((self.num_rollouts, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32)
-        rollout_trajectory = tf.zeros((self.num_rollouts, self.mpc_horizon+1, self.num_states), dtype=tf.float32)
-        traj_cost = tf.zeros((self.num_rollouts), dtype=tf.float32)
+        Q = tf.zeros((self.batch_size, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32)
+        rollout_trajectory = tf.zeros((self.batch_size, self.mpc_horizon + 1, self.num_states), dtype=tf.float32)
+        traj_cost = tf.zeros((self.batch_size), dtype=tf.float32)
 
         for _ in range(0, self.cem_outer_it):
             self.sampling_dist, Q, elite_Q, traj_cost, rollout_trajectory = self.update_distribution(s, Q, traj_cost, rollout_trajectory, self.sampling_dist, self.rng)
diff --git a/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py b/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py
index e56e999..fdd8b60 100644
--- a/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py
+++ b/Optimizers/optimizer_cem_grad_bharadhwaj_tf.py
@@ -27,7 +27,7 @@ def __init__(
         seed: int,
         mpc_horizon: int,
         cem_outer_it: int,
-        num_rollouts: int,
+        batch_size: int,
         cem_initial_action_stdev: float,
         cem_stdev_min: float,
         cem_best_k: int,
@@ -48,7 +48,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -74,7 +74,7 @@ def __init__(
         
         # Initialization
         self.Q_tf = tf.Variable(
-            initial_value=tf.zeros([self.num_rollouts, self.mpc_horizon, self.num_control_inputs]),
+            initial_value=tf.zeros([self.batch_size, self.mpc_horizon, self.num_control_inputs]),
             trainable=True,
             dtype=tf.float32,
         )
@@ -82,7 +82,7 @@ def __init__(
 
     @CompileTF
     def predict_and_cost(self, s, elite_Q, Q_tf: tf.Variable, opt, rng: tf.random.Generator):
-        Q_sampled = self._sample_actions(rng, self.num_rollouts - self.cem_best_k)
+        Q_sampled = self._sample_actions(rng, self.batch_size - self.cem_best_k)
         Q = tf.concat([elite_Q, Q_sampled], axis=0)
         Q = tf.clip_by_value(Q, self.action_low, self.action_high)
         Q_tf.assign(Q)
@@ -141,7 +141,7 @@ def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
         # tile s and convert inputs to tensor
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
 
         # generate random input sequence and clip to control limits
diff --git a/Optimizers/optimizer_cem_naive_grad_tf.py b/Optimizers/optimizer_cem_naive_grad_tf.py
index 9624994..cd46795 100644
--- a/Optimizers/optimizer_cem_naive_grad_tf.py
+++ b/Optimizers/optimizer_cem_naive_grad_tf.py
@@ -26,7 +26,7 @@ def __init__(
         seed: int,
         mpc_horizon: int,
         cem_outer_it: int,
-        num_rollouts: int,
+        batch_size: int,
         cem_initial_action_stdev: float,
         cem_stdev_min: float,
         cem_best_k: int,
@@ -42,7 +42,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -62,8 +62,8 @@ def __init__(
     @CompileTF
     def predict_and_cost(self, s, rng, dist_mue, stdev):
         # generate random input sequence and clip to control limits
-        Q = tf.tile(dist_mue, [self.num_rollouts, 1, 1]) + rng.normal(
-            [self.num_rollouts, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32) * stdev
+        Q = tf.tile(dist_mue, [self.batch_size, 1, 1]) + rng.normal(
+            [self.batch_size, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32) * stdev
         Q = tf.clip_by_value(Q, self.action_low, self.action_high)
         # rollout the trajectories and record gradient
         with tf.GradientTape(watch_accessed_variables=False) as tape:
@@ -95,7 +95,7 @@ def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
         # tile s and convert inputs to tensor
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
 
         #cem steps updating distribution
diff --git a/Optimizers/optimizer_cem_tf.py b/Optimizers/optimizer_cem_tf.py
index 9c3a3ea..f51f3b7 100644
--- a/Optimizers/optimizer_cem_tf.py
+++ b/Optimizers/optimizer_cem_tf.py
@@ -25,7 +25,7 @@ def __init__(
         mpc_horizon: int,
         cem_outer_it: int,
         cem_initial_action_stdev: float,
-        num_rollouts: int,
+        batch_size: int,
         cem_stdev_min: float,
         cem_best_k: int,
         warmup: bool,
@@ -40,7 +40,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -65,13 +65,13 @@ def predict_and_cost(self, s, Q):
     @CompileTF
     def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor, rollout_trajectory: tf.Tensor, dist_mue: tf.Tensor, stdev: tf.Tensor, rng: tf.random.Generator):
         #generate random input sequence and clip to control limits
-        Q = tf.tile(dist_mue,(self.num_rollouts,1,1)) + tf.multiply(rng.normal(
-            shape=(self.num_rollouts, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32), stdev)
+        Q = tf.tile(dist_mue, (self.batch_size, 1, 1)) + tf.multiply(rng.normal(
+            shape=(self.batch_size, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32), stdev)
         Q = tf.clip_by_value(Q, self.action_low, self.action_high)
 
         #rollout the trajectories and get cost
         traj_cost, rollout_trajectory = self.predict_and_cost(s, Q)
-        rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.num_rollouts, self.mpc_horizon+1, self.num_states])
+        rollout_trajectory = tf.ensure_shape(rollout_trajectory, [self.batch_size, self.mpc_horizon + 1, self.num_states])
 
         #sort the costs and find best k costs
         sorted_cost = tf.argsort(traj_cost)
@@ -87,11 +87,11 @@ def update_distribution(self, s: tf.Tensor, Q: tf.Tensor, traj_cost: tf.Tensor,
     def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
-        Q = tf.zeros((self.num_rollouts, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32)
-        rollout_trajectory = tf.zeros((self.num_rollouts, self.mpc_horizon+1, self.num_states), dtype=tf.float32)
-        traj_cost = tf.zeros((self.num_rollouts), dtype=tf.float32)
+        Q = tf.zeros((self.batch_size, self.mpc_horizon, self.num_control_inputs), dtype=tf.float32)
+        rollout_trajectory = tf.zeros((self.batch_size, self.mpc_horizon + 1, self.num_states), dtype=tf.float32)
+        traj_cost = tf.zeros((self.batch_size), dtype=tf.float32)
 
         iterations = self.warmup_iterations if self.warmup and self.count == 0 else self.cem_outer_it
         for _ in range(0, iterations):
diff --git a/Optimizers/optimizer_gradient_tf.py b/Optimizers/optimizer_gradient_tf.py
index a70346e..0533f81 100644
--- a/Optimizers/optimizer_gradient_tf.py
+++ b/Optimizers/optimizer_gradient_tf.py
@@ -23,7 +23,7 @@ def __init__(
         seed: int,
         mpc_horizon: int,
         gradient_steps: int,
-        num_rollouts: int,
+        batch_size: int,
         initial_action_stdev: float,
         learning_rate: float,
         adam_beta_1: float,
@@ -43,7 +43,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -105,7 +105,7 @@ def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
         # Start all trajectories in current state
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
         
         # warm start setup
@@ -143,7 +143,7 @@ def step(self, s: np.ndarray, time=None):
         # Shift Q, Adam weights by one time step
         self.count += 1
         Q_s = self.rng.uniform(
-            shape=[self.num_rollouts, 1, self.num_control_inputs],
+            shape=[self.batch_size, 1, self.num_control_inputs],
             minval=self.action_low,
             maxval=self.action_high,
             dtype=tf.float32,
@@ -157,14 +157,14 @@ def step(self, s: np.ndarray, time=None):
             w1 = tf.concat(
                 [
                     adam_weights[1][:, 1:, :],
-                    tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                    tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                 ],
                 axis=1,
             )
             w2 = tf.concat(
                 [
                     adam_weights[2][:, 1:, :],
-                    tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                    tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                 ],
                 axis=1,
             )
@@ -175,7 +175,7 @@ def step(self, s: np.ndarray, time=None):
     def optimizer_reset(self):
         # generate random input sequence and clip to control limits
         Q = self.rng.uniform(
-            [self.num_rollouts, self.mpc_horizon, self.num_control_inputs],
+            [self.batch_size, self.mpc_horizon, self.num_control_inputs],
             self.action_low,
             self.action_high,
             dtype=tf.float32,
diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py
index 2ad0f06..51a96e3 100644
--- a/Optimizers/optimizer_mppi.py
+++ b/Optimizers/optimizer_mppi.py
@@ -1,4 +1,6 @@
 from typing import Tuple
+
+from Control_Toolkit.others.globals_and_utils import get_logger
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary
 
 import numpy as np
@@ -9,6 +11,7 @@
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 from SI_Toolkit.Functions.TF.Compile import CompileAdaptive
 
+log = get_logger(__name__)
 
 class optimizer_mppi(template_optimizer):
     supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary}
@@ -26,7 +29,7 @@ def __init__(
         R: float,
         LBD: float,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         NU: float,
         SQRTRHOINV: float,
         GAMMA: float,
@@ -41,7 +44,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -51,12 +54,12 @@ def __init__(
         
         
         # MPPI parameters
-        self.cc_weight = cc_weight
-        self.R = self.lib.to_tensor(R, self.lib.float32)
-        self.LBD = LBD
-        self.NU = self.lib.to_tensor(NU, self.lib.float32)
+        self.cc_weight = self.lib.to_variable(cc_weight, self.lib.float32)
+        self.R = self.lib.to_variable(R, self.lib.float32)
+        self.LBD = self.lib.to_variable(LBD, self.lib.float32)
+        self.NU = self.lib.to_variable(NU, self.lib.float32)
         self._SQRTRHOINV = SQRTRHOINV
-        self.GAMMA = GAMMA
+        self.GAMMA = self.lib.to_variable(GAMMA, self.lib.float32)
 
         self.update_internal_state = self.update_internal_state_of_RNN  # FIXME: There is one unnecessary operation in this function in case it is not an RNN.
 
@@ -67,6 +70,9 @@ def __init__(
 
         self.Interpolator = Interpolator(self.mpc_horizon, period_interpolation_inducing_points, self.num_control_inputs, self.lib)
 
+        # here the predictor, cost computer, and optimizer are compiled to native instrutions by tensorflow graphs and XLA JIT
+        # before this, we copy all the required configuration to these objects so that they can be later updated during runtime.
+
         self.predict_and_cost = CompileAdaptive(self._predict_and_cost)
         self.predict_optimal_trajectory = CompileAdaptive(self._predict_optimal_trajectory)
 
@@ -100,7 +106,8 @@ def mppi_correction_cost(self, u, delta_u):
     #total cost of the trajectory
     def get_mppi_trajectory_cost(self, state_horizon ,u, u_prev, delta_u):
         total_cost = self.cost_function.get_trajectory_cost(state_horizon,u, u_prev)
-        total_mppi_cost = total_cost + self.mppi_correction_cost(u, delta_u)
+        mppi_correction_cost =self.mppi_correction_cost(u, delta_u)
+        total_mppi_cost = total_cost +mppi_correction_cost
         return total_mppi_cost
 
     def reward_weighted_average(self, S, delta_u):
@@ -114,14 +121,14 @@ def inizialize_pertubation(self, random_gen):
         stdev = self.SQRTRHODTINV
 
         delta_u = random_gen.normal(
-            [self.num_rollouts, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs],
+            [self.batch_size, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs],
             dtype=self.lib.float32) * stdev
 
         delta_u = self.Interpolator.interpolate(delta_u)
 
         return delta_u
 
-    def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config):
+    def _predict_and_cost(self, s, u_nom, random_gen, u_old):
         """ Predict dynamics and compute costs of trajectories
         TODO add params and return
 
@@ -129,11 +136,11 @@ def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config):
 
         :returns: ??
         """
-        s = self.lib.tile(s, (self.num_rollouts, 1))
+        s = self.lib.tile(s, (self.batch_size, 1))
         # generate random input sequence and clip to control limits
         u_nom = self.lib.concat([u_nom[:, 1:, :], u_nom[:, -1:, :]], 1)
         delta_u = self.inizialize_pertubation(random_gen)
-        u_run = self.lib.tile(u_nom, (self.num_rollouts, 1, 1))+delta_u
+        u_run = self.lib.tile(u_nom, (self.batch_size, 1, 1)) + delta_u
         u_run = self.lib.clip(u_run, self.action_low, self.action_high)
         rollout_trajectory = self.predictor.predict_tf(s, u_run)
         traj_cost = self.get_mppi_trajectory_cost(rollout_trajectory, u_run, u_old, delta_u)
@@ -143,7 +150,7 @@ def _predict_and_cost(self, s, u_nom, random_gen, u_old, **config):
         return self.mppi_output(u, u_nom, rollout_trajectory, traj_cost, u_run)
 
     def update_internal_state_of_RNN(self, s, u_nom):
-        u_tiled = self.lib.tile(u_nom[:, :1, :], (self.num_rollouts, 1, 1))
+        u_tiled = self.lib.tile(u_nom[:, :1, :], (self.batch_size, 1, 1))
         self.predictor.update(s=s, Q0=u_tiled)
 
     def _predict_optimal_trajectory(self, s, u_nom):
@@ -160,7 +167,10 @@ def step(self, s: np.ndarray, time=None):
 
         self.u, self.u_nom, rollout_trajectory, traj_cost, u_run = self.predict_and_cost(s, self.u_nom, self.rng, self.u)
         self.u = self.lib.to_numpy(self.lib.squeeze(self.u))
-        
+
+        # print(f'mean traj cost={np.mean(traj_cost.numpy()):.2f}') # todo debug
+
+
         if self.optimizer_logging:
             self.logging_values["Q_logged"] = self.lib.to_numpy(u_run)
             self.logging_values["J_logged"] = self.lib.to_numpy(traj_cost)
diff --git a/Optimizers/optimizer_mppi_optimize_tf.py b/Optimizers/optimizer_mppi_optimize_tf.py
index f19a584..446fd9f 100644
--- a/Optimizers/optimizer_mppi_optimize_tf.py
+++ b/Optimizers/optimizer_mppi_optimize_tf.py
@@ -27,7 +27,7 @@ def __init__(
         R: float,
         LBD: float,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         NU: float,
         SQRTRHOINV: float,
         GAMMA: float,
@@ -48,7 +48,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -110,7 +110,7 @@ def reward_weighted_average(self, S, delta_u):
     def inizialize_pertubation(self, random_gen):
         stdev = self.SQRTRHODTINV
         delta_u = random_gen.normal(
-            [self.num_rollouts, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs],
+            [self.batch_size, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs],
             dtype=tf.float32) * stdev
         delta_u = self.Interpolator.interpolate(delta_u)
         return delta_u
@@ -119,7 +119,7 @@ def inizialize_pertubation(self, random_gen):
     def mppi_prior(self, s, u_nom, random_gen, u_old):
         # generate random input sequence and clip to control limits
         delta_u = self.inizialize_pertubation(random_gen)
-        u_run = tf.tile(u_nom, [self.num_rollouts, 1, 1]) + delta_u
+        u_run = tf.tile(u_nom, [self.batch_size, 1, 1]) + delta_u
         u_run = tf.clip_by_value(u_run, self.action_low, self.action_high)
         #predict trajectories
         rollout_trajectory = self.predictor.predict_tf(s, u_run)
@@ -157,7 +157,7 @@ def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
         # tile inital state and convert inputs to tensorflow tensors
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
 
         #first retrieve suboptimal control sequence with mppi
diff --git a/Optimizers/optimizer_mppi_var_tf.py b/Optimizers/optimizer_mppi_var_tf.py
index 5c4410e..4417b73 100644
--- a/Optimizers/optimizer_mppi_var_tf.py
+++ b/Optimizers/optimizer_mppi_var_tf.py
@@ -28,7 +28,7 @@ def __init__(
         R: float,
         LBD_mc: float,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         NU_mc: float,
         SQRTRHOINV_mc: float,
         GAMMA: float,
@@ -47,7 +47,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -94,7 +94,7 @@ def reward_weighted_average(self, S, delta_u):
 
     #initialize the pertubations
     def inizialize_pertubation(self, random_gen, nuvec):
-        delta_u = random_gen.normal([self.num_rollouts, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], dtype=tf.float32) * nuvec * self.SQRTRHODTINV
+        delta_u = random_gen.normal([self.batch_size, self.Interpolator.number_of_interpolation_inducing_points, self.num_control_inputs], dtype=tf.float32) * nuvec * self.SQRTRHODTINV
         delta_u = self.Interpolator.interpolate(delta_u)
         return delta_u
 
@@ -105,7 +105,7 @@ def do_step(self, s, u_nom, random_gen, u_old, nuvec):
             tape.watch(nuvec) #watch variances on tape
             delta_u = self.inizialize_pertubation(random_gen, nuvec) #initialize pertubations
             #build real input and clip, preserving gradient
-            u_run = tf.tile(u_nom, [self.num_rollouts, 1, 1]) + delta_u
+            u_run = tf.tile(u_nom, [self.batch_size, 1, 1]) + delta_u
             u_run = tfp.math.clip_by_value_preserve_gradient(u_run, self.action_low, self.action_high)
             #rollout and cost
             rollout_trajectory = self.predictor.predict_tf(s, u_run)
@@ -131,7 +131,7 @@ def do_step(self, s, u_nom, random_gen, u_old, nuvec):
     def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
         self.u, self.u_nom, new_nuvec, u_run, traj_cost = self.do_step(s, self.u_nom, self.rng, self.u, self.nuvec)
         
diff --git a/Optimizers/optimizer_random_action_tf.py b/Optimizers/optimizer_random_action_tf.py
index b7f18e2..7055d93 100644
--- a/Optimizers/optimizer_random_action_tf.py
+++ b/Optimizers/optimizer_random_action_tf.py
@@ -22,7 +22,7 @@ def __init__(
         computation_library: "type[ComputationLibrary]",
         seed: int,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         optimizer_logging: bool,
     ):
         super().__init__(
@@ -33,7 +33,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -54,11 +54,11 @@ def step(self, s: np.ndarray, time=None):
         if self.optimizer_logging:
             self.logging_values = {"s_logged": s.copy()}
         # Start all trajectories in current state
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
         
         Q = self.rng.uniform(
-            shape=[self.num_rollouts, self.mpc_horizon, self.num_control_inputs],
+            shape=[self.batch_size, self.mpc_horizon, self.num_control_inputs],
             minval=self.action_low,
             maxval=self.action_high,
             dtype=tf.float32,
@@ -82,7 +82,7 @@ def step(self, s: np.ndarray, time=None):
     def optimizer_reset(self):
         # generate random input sequence and clip to control limits
         Q = self.rng.uniform(
-                shape=[self.num_rollouts, self.mpc_horizon, self.num_control_inputs],
+                shape=[self.batch_size, self.mpc_horizon, self.num_control_inputs],
                 minval=self.action_low,
                 maxval=self.action_high,
                 dtype=tf.float32,
diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py
index c11ae3e..233a4bb 100644
--- a/Optimizers/optimizer_rpgd_me_tf.py
+++ b/Optimizers/optimizer_rpgd_me_tf.py
@@ -31,7 +31,7 @@ def __init__(
         computation_library: "type[ComputationLibrary]",
         seed: int,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         outer_its: int,
         sample_stdev: float,
         resamp_per: int,
@@ -56,7 +56,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -200,7 +200,7 @@ def step(self, s: np.ndarray, time=None):
             self.logging_values = {"s_logged": s.copy()}
             
         # tile inital state and convert inputs to tensorflow tensors
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
 
         # warm start setup
@@ -254,13 +254,13 @@ def step(self, s: np.ndarray, time=None):
         if self.count % self.resamp_per == 0:
             # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories
             epsilon_res = self.sample_actions(
-                self.rng, self.num_rollouts - self.opt_keep_k
+                self.rng, self.batch_size - self.opt_keep_k
             )
             epsilon_keep = tf.gather(self.epsilon, best_idx, axis=0)  # resorting according to costs
             self.epsilon = tf.concat([epsilon_res, epsilon_keep], axis=0)
             self.trajectory_ages = tf.concat([
                 tf.gather(self.trajectory_ages, best_idx, axis=0),
-                tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32)
+                tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32)
             ], axis=0)
             # Updating the weights of adam:
             # For the trajectories which are kept, the weights are shifted for a warmstart
@@ -317,10 +317,10 @@ def optimizer_reset(self):
         self.alpha = tf.constant(self.maximum_entropy_alpha, dtype=tf.float32)
             
         # Sample new initial guesses for trajectories
-        self.epsilon = self.sample_actions(self.rng, self.num_rollouts)
+        self.epsilon = self.sample_actions(self.rng, self.batch_size)
         self.count = 0
 
         # Reset optimizer
         adam_weights = self.opt.get_weights()
         self.opt.set_weights([tf.zeros_like(el) for el in adam_weights])
-        self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32)
+        self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32)
diff --git a/Optimizers/optimizer_rpgd_ml_tf.py b/Optimizers/optimizer_rpgd_ml_tf.py
index 515b083..fcc4374 100644
--- a/Optimizers/optimizer_rpgd_ml_tf.py
+++ b/Optimizers/optimizer_rpgd_ml_tf.py
@@ -31,7 +31,7 @@ def __init__(
         computation_library: "type[ComputationLibrary]",
         seed: int,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         outer_its: int,
         sample_stdev: float,
         resamp_per: int,
@@ -57,7 +57,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -229,7 +229,7 @@ def step(self, s: np.ndarray, time=None):
             self.logging_values = {"s_logged": s.copy()}
             
         # tile inital state and convert inputs to tensorflow tensors
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
 
         # warm start setup
@@ -286,12 +286,12 @@ def step(self, s: np.ndarray, time=None):
         adam_weights = self.opt.get_weights()
         if self.count % self.resamp_per == 0:
             # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories
-            epsilon_resampled = self.sample_actions(self.rng, self.num_rollouts - self.opt_keep_k)
+            epsilon_resampled = self.sample_actions(self.rng, self.batch_size - self.opt_keep_k)
             epsilon_retained = tf.gather(self.epsilon, best_idx, axis=0)  # resorting according to costs
             self.epsilon = tf.concat([epsilon_resampled, epsilon_retained], axis=0)
             self.trajectory_ages = tf.concat([
                 tf.gather(self.trajectory_ages, best_idx, axis=0),
-                tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32)
+                tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32)
             ], axis=0)
         
         # Updating the weights of Adam:
@@ -300,14 +300,14 @@ def step(self, s: np.ndarray, time=None):
             w1 = tf.concat(
                 [
                     adam_weights[1][:, 1:, :],
-                    tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                    tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                 ],
                 axis=1,
             )
             w2 = tf.concat(
                 [
                     adam_weights[2][:, 1:, :],
-                    tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                    tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                 ],
                 axis=1,
             )
@@ -334,7 +334,7 @@ def optimizer_reset(self):
         else:
             self.theta = tf.Variable(initial_theta, dtype=tf.float32)
         
-        initial_Q = tf.zeros([self.num_rollouts, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32)
+        initial_Q = tf.zeros([self.batch_size, self.mpc_horizon, self.num_control_inputs], dtype=tf.float32)
         if hasattr(self, "Q"):
             self.Q.assign(initial_Q)
         else:
@@ -343,10 +343,10 @@ def optimizer_reset(self):
         self.alpha = tf.constant(self.maximum_entropy_alpha, dtype=tf.float32)
             
         # Sample new initial guesses for trajectories
-        self.epsilon = self.sample_actions(self.rng, self.num_rollouts)
+        self.epsilon = self.sample_actions(self.rng, self.batch_size)
         self.count = 0
 
         # Reset optimizer
         adam_weights = self.opt.get_weights()
         self.opt.set_weights([tf.zeros_like(el) for el in adam_weights])
-        self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32)
+        self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32)
diff --git a/Optimizers/optimizer_rpgd_particle_tf.py b/Optimizers/optimizer_rpgd_particle_tf.py
index 4585fe4..f7a3749 100644
--- a/Optimizers/optimizer_rpgd_particle_tf.py
+++ b/Optimizers/optimizer_rpgd_particle_tf.py
@@ -25,7 +25,7 @@ def __init__(
         computation_library: "type[ComputationLibrary]",
         seed: int,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         outer_its: int,
         sample_stdev: float,
         resamp_per: int,
@@ -49,7 +49,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -164,7 +164,7 @@ def step(self, s: np.ndarray, time=None):
             self.logging_values = {"s_logged": s.copy()}
             
         # tile inital state and convert inputs to tensorflow tensors
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
 
         # warm start setup
@@ -214,13 +214,13 @@ def step(self, s: np.ndarray, time=None):
         if self.count % self.resamp_per == 0:
             # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories
             Qres = self.sample_actions(
-                self.rng, self.num_rollouts - self.opt_keep_k
+                self.rng, self.batch_size - self.opt_keep_k
             )
             Q_keep = tf.gather(Qn, best_idx)  # resorting according to costs
             Qn = tf.concat([Qres, Q_keep], axis=0)
             self.trajectory_ages = tf.concat([
                 tf.gather(self.trajectory_ages, best_idx),
-                tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32)
+                tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32)
             ], axis=0)
             # Updating the weights of adam:
             # For the trajectories which are kept, the weights are shifted for a warmstart
@@ -242,14 +242,14 @@ def step(self, s: np.ndarray, time=None):
                 # For the new trajectories they are reset to 0
                 w1 = tf.zeros(
                     [
-                        self.num_rollouts - self.opt_keep_k,
+                        self.batch_size - self.opt_keep_k,
                         self.mpc_horizon,
                         self.num_control_inputs,
                     ]
                 )
                 w2 = tf.zeros(
                     [
-                        self.num_rollouts - self.opt_keep_k,
+                        self.batch_size - self.opt_keep_k,
                         self.mpc_horizon,
                         self.num_control_inputs,
                     ]
@@ -264,14 +264,14 @@ def step(self, s: np.ndarray, time=None):
                 w1 = tf.concat(
                     [
                         adam_weights[1][:, 1:, :],
-                        tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                        tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                     ],
                     axis=1,
                 )
                 w2 = tf.concat(
                     [
                         adam_weights[2][:, 1:, :],
-                        tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                        tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                     ],
                     axis=1,
                 )
@@ -294,7 +294,7 @@ def optimizer_reset(self):
         # # end of unnecessary part
 
         # sample new initial guesses for trajectories
-        Qn = self.sample_actions(self.rng, self.num_rollouts)
+        Qn = self.sample_actions(self.rng, self.batch_size)
         if hasattr(self, "Q_tf"):
             self.Q_tf.assign(Qn)
         else:
@@ -304,4 +304,4 @@ def optimizer_reset(self):
         # reset optimizer
         adam_weights = self.opt.get_weights()
         self.opt.set_weights([tf.zeros_like(el) for el in adam_weights])
-        self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32)
+        self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32)
diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py
index 20eaf56..36143b8 100644
--- a/Optimizers/optimizer_rpgd_tf.py
+++ b/Optimizers/optimizer_rpgd_tf.py
@@ -25,7 +25,7 @@ def __init__(
         computation_library: "type[ComputationLibrary]",
         seed: int,
         mpc_horizon: int,
-        num_rollouts: int,
+        batch_size: int,
         outer_its: int,
         sample_stdev: float,
         resamp_per: int,
@@ -49,7 +49,7 @@ def __init__(
             control_limits=control_limits,
             optimizer_logging=optimizer_logging,
             seed=seed,
-            num_rollouts=num_rollouts,
+            batch_size=batch_size,
             mpc_horizon=mpc_horizon,
             computation_library=computation_library,
         )
@@ -166,7 +166,7 @@ def step(self, s: np.ndarray, time=None):
             self.logging_values = {"s_logged": s.copy()}
             
         # tile inital state and convert inputs to tensorflow tensors
-        s = np.tile(s, tf.constant([self.num_rollouts, 1]))
+        s = np.tile(s, tf.constant([self.batch_size, 1]))
         s = tf.convert_to_tensor(s, dtype=tf.float32)
 
         # warm start setup
@@ -216,13 +216,13 @@ def step(self, s: np.ndarray, time=None):
         if self.count % self.resamp_per == 0:
             # if it is time to resample, new random input sequences are drawn for the worst bunch of trajectories
             Qres = self.sample_actions(
-                self.rng, self.num_rollouts - self.opt_keep_k
+                self.rng, self.batch_size - self.opt_keep_k
             )
             Q_keep = tf.gather(Qn, best_idx)  # resorting according to costs
             Qn = tf.concat([Qres, Q_keep], axis=0)
             self.trajectory_ages = tf.concat([
                 tf.gather(self.trajectory_ages, best_idx),
-                tf.zeros(self.num_rollouts - self.opt_keep_k, dtype=tf.int32)
+                tf.zeros(self.batch_size - self.opt_keep_k, dtype=tf.int32)
             ], axis=0)
             # Updating the weights of adam:
             # For the trajectories which are kept, the weights are shifted for a warmstart
@@ -244,14 +244,14 @@ def step(self, s: np.ndarray, time=None):
                 # For the new trajectories they are reset to 0
                 w1 = tf.zeros(
                     [
-                        self.num_rollouts - self.opt_keep_k,
+                        self.batch_size - self.opt_keep_k,
                         self.mpc_horizon,
                         self.num_control_inputs,
                     ]
                 )
                 w2 = tf.zeros(
                     [
-                        self.num_rollouts - self.opt_keep_k,
+                        self.batch_size - self.opt_keep_k,
                         self.mpc_horizon,
                         self.num_control_inputs,
                     ]
@@ -266,14 +266,14 @@ def step(self, s: np.ndarray, time=None):
                 w1 = tf.concat(
                     [
                         adam_weights[1][:, 1:, :],
-                        tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                        tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                     ],
                     axis=1,
                 )
                 w2 = tf.concat(
                     [
                         adam_weights[2][:, 1:, :],
-                        tf.zeros([self.num_rollouts, 1, self.num_control_inputs]),
+                        tf.zeros([self.batch_size, 1, self.num_control_inputs]),
                     ],
                     axis=1,
                 )
@@ -296,7 +296,7 @@ def optimizer_reset(self):
         # # end of unnecessary part
 
         # sample new initial guesses for trajectories
-        Qn = self.sample_actions(self.rng, self.num_rollouts)
+        Qn = self.sample_actions(self.rng, self.batch_size)
         if hasattr(self, "Q_tf"):
             self.Q_tf.assign(Qn)
         else:
@@ -306,4 +306,4 @@ def optimizer_reset(self):
         # reset optimizer
         adam_weights = self.opt.get_weights()
         self.opt.set_weights([tf.zeros_like(el) for el in adam_weights])
-        self.trajectory_ages: tf.Tensor = tf.zeros((self.num_rollouts), dtype=tf.int32)
+        self.trajectory_ages: tf.Tensor = tf.zeros((self.batch_size), dtype=tf.int32)
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index edbc331..beedf15 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -11,7 +11,7 @@
 from numpy.random import SFC64, Generator
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary
 
-LOGGING_LEVEL = logging.INFO
+LOGGING_LEVEL = logging.DEBUG # usually INFO is good
 class CustomFormatter(logging.Formatter):
     """Logging Formatter to add colors and count warning / errors"""
     # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762

From 64148a325befc3bd3c18500344f3409c413ba0b3 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Mon, 12 Dec 2022 08:52:34 +0100
Subject: [PATCH 07/38] now spin and balance both work!  and so does changing
 the policy and changing the cost weights during runtime!  Yay, finally! Trick
 was to make sure that all values are actually propagated to tf variables,
 e.g. numpy arrays, ints, string, not just float

Changed logging format to start with level for better readability.
added string type to computation_library.py
---
 Controllers/cartpole_trajectory_generator.py | 27 +++++++++++++-------
 Controllers/controller_mpc.py                |  2 +-
 others/globals_and_utils.py                  |  6 ++---
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py
index a00559f..74595eb 100644
--- a/Controllers/cartpole_trajectory_generator.py
+++ b/Controllers/cartpole_trajectory_generator.py
@@ -1,13 +1,16 @@
 # stub for genrating desired future trajectory of cartpole
 import numpy
+import numpy as np
 from torch import TensorType
 
 from Control_Toolkit.Controllers import template_controller
+from Control_Toolkit.others.globals_and_utils import get_logger
 from SI_Toolkit.computation_library import ComputationLibrary
 from CartPole import state_utilities
 
 period=1
 
+log=get_logger(__name__)
 
 class cartpole_trajectory_generator:
     """ Generates target state trajectory for the cartpole """
@@ -34,22 +37,28 @@ def step(self, time: float, horizon: int) -> TensorType:
             target_angleD=-target_angleD
         traj=numpy.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable
         traj[:]=self.lib.nan # set all states undetermined
-        aim='balance'
-        if aim=='spin':
+        policy=self.controller.cost_function_wrapper.cost_function.policy
+        if policy is None:
+            raise RuntimeError(f'set policy in config_cost_functions.yml')
+
+        if policy == 'spin':
             traj[state_utilities.POSITION_IDX] = self.controller.target_position
             # traj[state_utilities.ANGLE_COS_IDX, :] = self.controller.target_equilibrium
             # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
             # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * self.controller.target_equilibrium
             traj[state_utilities.ANGLED_IDX, :] = 1000*self.controller.target_equilibrium
             # traj[state_utilities.POSITIOND_IDX, :] = 0
-        elif aim=='balance':
+        elif policy == 'balance':
             traj[state_utilities.POSITION_IDX] = self.controller.target_position
-            # traj[state_utilities.ANGLE_COS_IDX, :] = self.controller.target_equilibrium
-            # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
-            traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * (1-self.controller.target_equilibrium)/2 # either 0 for up and pi for down
-            # traj[state_utilities.ANGLED_IDX, :] = 0
-            # traj[state_utilities.POSITIOND_IDX, :] = 0
+            target_angle=self.lib.pi * (1-self.controller.target_equilibrium)/2 # either 0 for up and pi for down
+            traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
+            traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
+            traj[state_utilities.ANGLE_IDX, :] = target_angle
+            traj[state_utilities.ANGLED_IDX, :] = 0
+            traj[state_utilities.POSITIOND_IDX, :] = 0
+        else:
+            log.error(f'cost policy "{policy}" is unknown')
 
-        traj=self.lib.to_variable(traj, self.lib.float32)
+        # traj=self.lib.to_variable(traj, self.lib.float32)
 
         return traj
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index d85f42c..589c18d 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -93,7 +93,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
             # process changes to configs using new returned change list
             if not changes is None:
                 for k,v in changes.items():
-                    if isinstance(v, (int, float)):
+                    if isinstance(v, (int, float, str)):
                         updated_attributes[k]=v
                         for o in objs: # for each object in objs, update its attributes
                             update_attributes(updated_attributes,o)
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index beedf15..b6dcb2a 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -24,13 +24,13 @@ class CustomFormatter(logging.Formatter):
     bold_red = "\x1b[31;1m"
     reset = "\x1b[0m"
     # File "{file}", line {max(line, 1)}'.replace("\\", "/")
-    format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
+    format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
 
     FORMATS = {
         logging.DEBUG: grey + format + reset,
         logging.INFO: cyan + format + reset,
-        logging.WARNING: yellow + format + reset,
-        logging.ERROR: red + format + reset,
+        logging.WARNING: red + format + reset,
+        logging.ERROR: bold_red + format + reset,
         logging.CRITICAL: bold_red + format + reset
     }
 

From fef38b083ed2744128f47cc6057ef5f587d2fb0d Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Mon, 12 Dec 2022 09:52:36 +0100
Subject: [PATCH 08/38] got basic shimmy movement to work now. added helper
 vars to access config values in cartpole_trajectory_generator.py. added dt to
 params in cartpole_trajectory_generator.step() call.

---
 Controllers/cartpole_trajectory_generator.py | 48 ++++++++++++++------
 Controllers/controller_mpc.py                | 14 +++---
 2 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py
index 74595eb..f7e7114 100644
--- a/Controllers/cartpole_trajectory_generator.py
+++ b/Controllers/cartpole_trajectory_generator.py
@@ -23,39 +23,57 @@ def __init__(self, lib:ComputationLibrary, controller:template_controller=None):
         self.lib = lib
         self.controller:template_controller=controller
 
-    def step(self, time: float, horizon: int) -> TensorType:
+    def step(self, time: float, horizon: int, dt:float) -> TensorType:
         """ Computes the desired future state trajectory at this time.
 
         :param time: the scalar time in seconds
+        :param horizon: the number of horizon steps
+        :param dt: the timestep in seconds
 
         :returns: the target state trajectory of cartpole.
-        It should be a Tensor with NaN entries for don't care states, and otherwise the desired state values.
+        It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired state values.
 
         """
-        target_angleD=1000
-        if time%period<period/2:
-            target_angleD=-target_angleD
-        traj=numpy.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable
+
+        traj=np.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable
         traj[:]=self.lib.nan # set all states undetermined
-        policy=self.controller.cost_function_wrapper.cost_function.policy
+
+        cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml
+        controller=self.controller # use controller to access attributes set in config_optimizers
+
+        policy=cost_function.policy
         if policy is None:
             raise RuntimeError(f'set policy in config_cost_functions.yml')
 
-        if policy == 'spin':
-            traj[state_utilities.POSITION_IDX] = self.controller.target_position
-            # traj[state_utilities.ANGLE_COS_IDX, :] = self.controller.target_equilibrium
+        if policy == 'spin': # spin pole CW or CCW depending on target_equilibrium up or down
+            traj[state_utilities.POSITION_IDX] = controller.target_position
+            # traj[state_utilities.ANGLE_COS_IDX, :] = controller.target_equilibrium
             # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
-            # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * self.controller.target_equilibrium
-            traj[state_utilities.ANGLED_IDX, :] = 1000*self.controller.target_equilibrium
+            # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * controller.target_equilibrium
+            traj[state_utilities.ANGLED_IDX, :] = 1000*controller.target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target
             # traj[state_utilities.POSITIOND_IDX, :] = 0
-        elif policy == 'balance':
-            traj[state_utilities.POSITION_IDX] = self.controller.target_position
-            target_angle=self.lib.pi * (1-self.controller.target_equilibrium)/2 # either 0 for up and pi for down
+        elif policy == 'balance': # balance upright or down at desired cart position
+            traj[state_utilities.POSITION_IDX] = controller.target_position
+            target_angle=self.lib.pi * (1-controller.target_equilibrium)/2 # either 0 for up and pi for down
             traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
             traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
             traj[state_utilities.ANGLE_IDX, :] = target_angle
             traj[state_utilities.ANGLED_IDX, :] = 0
             traj[state_utilities.POSITIOND_IDX, :] = 0
+        elif policy == 'shimmy': # cart follows a desired cart position shimmy while keeping pole up or down
+            shimmy_period=cost_function.shimmy_period # seconds
+            shimmy_amp=cost_function.shimmy_amp # meters
+            endtime=time+horizon*dt
+            times=np.linspace(time,endtime,num=horizon)
+            shimmy=shimmy_amp*np.sin((2*np.pi/shimmy_period)*times)
+            shimmyd=np.gradient(shimmy,dt)
+            traj[state_utilities.POSITION_IDX] = controller.target_position+shimmy
+            target_angle=self.lib.pi * (1-controller.target_equilibrium)/2 # either 0 for up and pi for down
+            traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
+            traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
+            traj[state_utilities.ANGLE_IDX, :] = target_angle
+            # traj[state_utilities.ANGLED_IDX, :] = 0
+            traj[state_utilities.POSITIOND_IDX, :] = shimmyd
         else:
             log.error(f'cost policy "{policy}" is unknown')
 
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 589c18d..e0b8e28 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -1,5 +1,7 @@
 import os
 from typing import Optional
+
+from GUI import gui_default_params
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
 import numpy as np
@@ -78,12 +80,6 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
     def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         # log.debug(f'step time={time:.3f}s')
 
-        # following is hack to get a target trajectory passed to tensorflow. The trajectory is passed in
-        # as updated_attributes, and is transferred to tensorflow by the update_attributes call
-        new_target_trajectory = self.target_trajectory_generator.step(time=time, horizon=self.optimizer.mpc_horizon)
-        updated_attributes['target_trajectory'] = new_target_trajectory
-        update_attributes(updated_attributes,self)
-
         # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer
         updated_attributes.clear()
         # detect any changes in config scalar values and pass to this controller or the cost function or optimizer
@@ -99,6 +95,12 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
                             update_attributes(updated_attributes,o)
                 log.debug(f'updated {objs} with scalar updated_attributes {updated_attributes}')
 
+        # following gets target_position, target_equilibrium, and target_trajectory passed to tensorflow. The trajectory is passed in
+        # as updated_attributes, and is transferred to tensorflow by the update_attributes call
+        new_target_trajectory = self.target_trajectory_generator.step(time=time, horizon=self.optimizer.mpc_horizon, dt=gui_default_params.controller_update_interval)
+        updated_attributes['target_trajectory'] = new_target_trajectory
+        update_attributes(updated_attributes,self)
+
         # log.info(f'targetposition={self.target_position}, equil={self.target_equilibrium}')
         u = self.optimizer.step(s, time)
         self.update_logs(self.optimizer.logging_values)

From d2e9942fff98ffb4779eaaeb01c5b79703377cf5 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Mon, 12 Dec 2022 12:12:04 +0100
Subject: [PATCH 09/38] added cartonly trajectory and fixed bug that erased the
 target position and equilibrium from updated attributes before they were
 updated in controller

---
 Controllers/cartpole_trajectory_generator.py | 52 +++++++++++++-------
 Controllers/controller_mpc.py                |  1 -
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py
index f7e7114..0929e89 100644
--- a/Controllers/cartpole_trajectory_generator.py
+++ b/Controllers/cartpole_trajectory_generator.py
@@ -38,42 +38,60 @@ def step(self, time: float, horizon: int, dt:float) -> TensorType:
         traj=np.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable
         traj[:]=self.lib.nan # set all states undetermined
 
-        cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml
-        controller=self.controller # use controller to access attributes set in config_optimizers
+        # cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml
+        # controller=self.controller # use controller to access attributes set in config_optimizers
 
-        policy=cost_function.policy
+        policy=self.controller.cost_function_wrapper.cost_function.policy
         if policy is None:
-            raise RuntimeError(f'set policy in config_cost_functions.yml')
+            raise RuntimeError(f'set policy in config_self.controller.cost_function_wrapper.cost_functions.yml')
+
+        gui_target_position=self.controller.target_position # GUI slider position
+        gui_target_equilibrium=self.controller.target_equilibrium # GUI switch +1 or -1 to make pole target up or down position
 
         if policy == 'spin': # spin pole CW or CCW depending on target_equilibrium up or down
-            traj[state_utilities.POSITION_IDX] = controller.target_position
-            # traj[state_utilities.ANGLE_COS_IDX, :] = controller.target_equilibrium
+            traj[state_utilities.POSITION_IDX] = gui_target_position
+            # traj[state_utilities.ANGLE_COS_IDX, :] = gui_target_equilibrium
             # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
-            # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * controller.target_equilibrium
-            traj[state_utilities.ANGLED_IDX, :] = 1000*controller.target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target
+            # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * gui_target_equilibrium
+            traj[state_utilities.ANGLED_IDX, :] = 1000*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target
             # traj[state_utilities.POSITIOND_IDX, :] = 0
         elif policy == 'balance': # balance upright or down at desired cart position
-            traj[state_utilities.POSITION_IDX] = controller.target_position
-            target_angle=self.lib.pi * (1-controller.target_equilibrium)/2 # either 0 for up and pi for down
+            traj[state_utilities.POSITION_IDX] = gui_target_position
+            target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down
             traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
             traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
             traj[state_utilities.ANGLE_IDX, :] = target_angle
             traj[state_utilities.ANGLED_IDX, :] = 0
             traj[state_utilities.POSITIOND_IDX, :] = 0
         elif policy == 'shimmy': # cart follows a desired cart position shimmy while keeping pole up or down
-            shimmy_period=cost_function.shimmy_period # seconds
-            shimmy_amp=cost_function.shimmy_amp # meters
+            per=self.controller.cost_function_wrapper.cost_function.shimmy_per # seconds
+            amp=self.controller.cost_function_wrapper.cost_function.shimmy_amp # meters
             endtime=time+horizon*dt
             times=np.linspace(time,endtime,num=horizon)
-            shimmy=shimmy_amp*np.sin((2*np.pi/shimmy_period)*times)
-            shimmyd=np.gradient(shimmy,dt)
-            traj[state_utilities.POSITION_IDX] = controller.target_position+shimmy
-            target_angle=self.lib.pi * (1-controller.target_equilibrium)/2 # either 0 for up and pi for down
+            cartpos=amp*np.sin((2*np.pi/per)*times)
+            cartvel=np.gradient(cartpos,dt)
+            traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos
+            target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down
             traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
             traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
             traj[state_utilities.ANGLE_IDX, :] = target_angle
             # traj[state_utilities.ANGLED_IDX, :] = 0
-            traj[state_utilities.POSITIOND_IDX, :] = shimmyd
+            traj[state_utilities.POSITIOND_IDX, :] = cartvel
+        elif policy == 'cartonly': # cart follows the trajectory, pole ignored
+            per=self.controller.cost_function_wrapper.cost_function.cartonly_per # seconds
+            amp=self.controller.cost_function_wrapper.cost_function.cartonly_amp # meters
+            endtime=time+horizon*dt
+            times=np.linspace(time,endtime,num=horizon)
+            from scipy.signal import sawtooth
+            cartpos=amp*sawtooth((2*np.pi/per)*times, width=.5) # .5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html
+            cartvel=np.gradient(cartpos,dt)
+            traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos
+            # target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down
+            # traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
+            # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
+            # traj[state_utilities.ANGLE_IDX, :] = target_angle
+            # traj[state_utilities.ANGLED_IDX, :] = 0
+            traj[state_utilities.POSITIOND_IDX, :] = cartvel
         else:
             log.error(f'cost policy "{policy}" is unknown')
 
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index e0b8e28..1d12f80 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -81,7 +81,6 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
         # log.debug(f'step time={time:.3f}s')
 
         # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer
-        updated_attributes.clear()
         # detect any changes in config scalar values and pass to this controller or the cost function or optimizer
         # note that the cost function that has its attributes updated is the enclosed cost function of the wrapper!
         for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')):

From 03fa3af90c0a6514dcd46722324283926ec5c4d0 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Mon, 12 Dec 2022 20:23:00 +0100
Subject: [PATCH 10/38] passing current state to
 cartpole_trajectory_generator.py so it can extrapolate it, and added option
 for using terminal cost or entire trajectory cost in
 cartpole_trajectory_cost.py. added more params to config_cost_functions.yml.
 fiddled with logging colors. changed yaml loader to one that can deal with
 scientific format numbers (ruamel.yaml)

---
 Controllers/cartpole_trajectory_generator.py | 19 +++++++++++++------
 Controllers/controller_mpc.py                |  5 ++++-
 Cost_Functions/cost_function_wrapper.py      |  2 +-
 others/globals_and_utils.py                  |  4 +++-
 requirements.txt                             |  3 ++-
 5 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py
index 0929e89..806a209 100644
--- a/Controllers/cartpole_trajectory_generator.py
+++ b/Controllers/cartpole_trajectory_generator.py
@@ -23,23 +23,24 @@ def __init__(self, lib:ComputationLibrary, controller:template_controller=None):
         self.lib = lib
         self.controller:template_controller=controller
 
-    def step(self, time: float, horizon: int, dt:float) -> TensorType:
+    def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorType:
         """ Computes the desired future state trajectory at this time.
 
         :param time: the scalar time in seconds
         :param horizon: the number of horizon steps
         :param dt: the timestep in seconds
+        :param state: the current state of the cartpole
 
         :returns: the target state trajectory of cartpole.
-        It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired state values.
+        It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired future state values.
 
         """
 
         traj=np.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable
         traj[:]=self.lib.nan # set all states undetermined
 
-        # cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml
-        # controller=self.controller # use controller to access attributes set in config_optimizers
+        cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml
+        controller=self.controller # use controller to access attributes set in config_optimizers
 
         policy=self.controller.cost_function_wrapper.cost_function.policy
         if policy is None:
@@ -52,8 +53,14 @@ def step(self, time: float, horizon: int, dt:float) -> TensorType:
             traj[state_utilities.POSITION_IDX] = gui_target_position
             # traj[state_utilities.ANGLE_COS_IDX, :] = gui_target_equilibrium
             # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
-            # traj[state_utilities.ANGLE_IDX, :] = self.lib.pi * gui_target_equilibrium
-            traj[state_utilities.ANGLED_IDX, :] = 1000*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target
+            endtime=horizon*dt
+            times=np.linspace(0,endtime,num=horizon)
+            s_per_rev_target=cost_function.spin_rev_period_sec
+            rad_per_s_target=2*np.pi/s_per_rev_target
+            rad_per_dt=rad_per_s_target*dt
+            current_angle=state[state_utilities.ANGLE_IDX]
+            traj[state_utilities.ANGLE_IDX, :] = current_angle+gui_target_equilibrium*times*rad_per_dt
+            # traj[state_utilities.ANGLED_IDX, :] = rad_per_s_target*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target
             # traj[state_utilities.POSITIOND_IDX, :] = 0
         elif policy == 'balance': # balance upright or down at desired cart position
             traj[state_utilities.POSITION_IDX] = gui_target_position
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 1d12f80..d1ace9b 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -96,7 +96,10 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
 
         # following gets target_position, target_equilibrium, and target_trajectory passed to tensorflow. The trajectory is passed in
         # as updated_attributes, and is transferred to tensorflow by the update_attributes call
-        new_target_trajectory = self.target_trajectory_generator.step(time=time, horizon=self.optimizer.mpc_horizon, dt=gui_default_params.controller_update_interval)
+        new_target_trajectory = self.target_trajectory_generator.step(time=time,
+                                                                      horizon=self.optimizer.mpc_horizon,
+                                                                      dt=gui_default_params.controller_update_interval,
+                                                                      state=s)
         updated_attributes['target_trajectory'] = new_target_trajectory
         update_attributes(updated_attributes,self)
 
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index 39b6234..4e2a9a4 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -55,7 +55,7 @@ def configure(
 
         config=self.cost_function_config['CartPole'][self.cost_function_name] # todo hardcoded 'CartPole' has to go, not sure how to determine it, maybe from module folder?
         update_attributes(config, self.cost_function)
-        log.info(f'configured controller {controller.__class__} with cost function {self.cost_function.__class__}')
+        log.info(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}')
 
     def update_cost_function_name_from_specification(
         self, environment_name: str, cost_function_specification: str = None
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index b6dcb2a..4270e56 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -22,12 +22,14 @@ class CustomFormatter(logging.Formatter):
     green = "\x1b[31;21m" # dark green
     red = "\x1b[31;21m"
     bold_red = "\x1b[31;1m"
+    light_blue = "\x1b[1;36m"
+    blue = "\x1b[1;34m"
     reset = "\x1b[0m"
     # File "{file}", line {max(line, 1)}'.replace("\\", "/")
     format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
 
     FORMATS = {
-        logging.DEBUG: grey + format + reset,
+        logging.DEBUG: light_blue + format + reset,
         logging.INFO: cyan + format + reset,
         logging.WARNING: red + format + reset,
         logging.ERROR: bold_red + format + reset,
diff --git a/requirements.txt b/requirements.txt
index 9fa1784..fdde05b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ numpy
 torch
 torchvision
 gym
-dictdiffer
\ No newline at end of file
+dictdiffer
+ruamel.yaml
\ No newline at end of file

From ab0f34e6177a87a42959b9459e0e6fd8c9d0a4d9 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Tue, 13 Dec 2022 08:14:58 +0100
Subject: [PATCH 11/38] added MPPI papers to docstring

---
 Optimizers/optimizer_mppi.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py
index 51a96e3..38a3ed3 100644
--- a/Optimizers/optimizer_mppi.py
+++ b/Optimizers/optimizer_mppi.py
@@ -14,6 +14,17 @@
 log = get_logger(__name__)
 
 class optimizer_mppi(template_optimizer):
+    """ Model Predictive Path Integral optimizer, based on
+
+    Drews, Paul, Grady Williams, Brian Goldfain, Evangelos A. Theodorou, and James M. Rehg. 2017. “Aggressive Deep Driving: Model Predictive Control with a CNN Cost Model.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1707.05303.
+
+    Williams, G., P. Drews, B. Goldfain, J. M. Rehg, and E. A. Theodorou. 2016. “Aggressive Driving with Model Predictive Path Integral Control.” In 2016 IEEE International Conference on Robotics and Automation (ICRA), 1433–40. https://doi.org/10.1109/ICRA.2016.7487277.
+
+    Williams, Grady, Andrew Aldrich, and Evangelos A. Theodorou. 2017. “Model Predictive Path Integral Control: From Theory to Parallel Computation.” Journal of Guidance, Control, and Dynamics: A Publication of the American Institute of Aeronautics and Astronautics Devoted to the Technology of Dynamics and Control 40 (2): 344–57. https://doi.org/10.2514/1.G001921.
+
+    Williams, Grady, Eric Rombokas, and Tom Daniel. 2015. “GPU Based Path Integral Control with Learned Dynamics.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1503.00330.
+
+    """
     supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary}
     
     def __init__(

From 7ad7be12ccd2640e9c0644b0d48742dbf791d2d2 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Tue, 13 Dec 2022 09:10:12 +0100
Subject: [PATCH 12/38] added MPPI papers to docstring

---
 Optimizers/optimizer_mppi.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py
index 38a3ed3..c41b006 100644
--- a/Optimizers/optimizer_mppi.py
+++ b/Optimizers/optimizer_mppi.py
@@ -16,12 +16,16 @@
 class optimizer_mppi(template_optimizer):
     """ Model Predictive Path Integral optimizer, based on
 
-    Drews, Paul, Grady Williams, Brian Goldfain, Evangelos A. Theodorou, and James M. Rehg. 2017. “Aggressive Deep Driving: Model Predictive Control with a CNN Cost Model.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1707.05303.
+    The equations and parameters are defined in the following:
 
     Williams, G., P. Drews, B. Goldfain, J. M. Rehg, and E. A. Theodorou. 2016. “Aggressive Driving with Model Predictive Path Integral Control.” In 2016 IEEE International Conference on Robotics and Automation (ICRA), 1433–40. https://doi.org/10.1109/ICRA.2016.7487277.
 
+    A longer paper with all the math is
+
     Williams, Grady, Andrew Aldrich, and Evangelos A. Theodorou. 2017. “Model Predictive Path Integral Control: From Theory to Parallel Computation.” Journal of Guidance, Control, and Dynamics: A Publication of the American Institute of Aeronautics and Astronautics Devoted to the Technology of Dynamics and Control 40 (2): 344–57. https://doi.org/10.2514/1.G001921.
 
+    The following paper uses a LWPR model to fly crazyfly drones using another type of MPPI with minibatches of rollouts
+
     Williams, Grady, Eric Rombokas, and Tom Daniel. 2015. “GPU Based Path Integral Control with Learned Dynamics.” arXiv [cs.RO]. arXiv. http://arxiv.org/abs/1503.00330.
 
     """

From 86c8d6802176168d840c13079dd40423ccf85414 Mon Sep 17 00:00:00 2001
From: heetmeyer <heerfn@yahoo.de>
Date: Wed, 14 Dec 2022 15:46:13 +0100
Subject: [PATCH 13/38] Rename num_rollouts -> batch_size in config templates

---
 .../config_controllers.yml                    |  2 --
 .../config_optimizers.yml                     | 28 +++++++++----------
 Controllers/__init__.py                       |  5 ++--
 Optimizers/optimizer_rpgd_me_tf.py            |  2 +-
 4 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/Control_Toolkit_ASF_Template/config_controllers.yml b/Control_Toolkit_ASF_Template/config_controllers.yml
index a603ba2..2d3df73 100644
--- a/Control_Toolkit_ASF_Template/config_controllers.yml
+++ b/Control_Toolkit_ASF_Template/config_controllers.yml
@@ -11,7 +11,6 @@ mpc:
   controller_logging: true
 do-mpc-discrete:
   mpc_horizon: 50                       # steps
-  num_rollouts: 1
   # Initial positions
   position_init: 0.0
   positionD_init: 0.0
@@ -21,7 +20,6 @@ do-mpc-discrete:
 do-mpc:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 50                       # steps
-  num_rollouts: 1
   p_Q: 0.00  # Perturbation factors: Change of output from optimal
   # Random change of cost function by factor
   p_position: 0.0
diff --git a/Control_Toolkit_ASF_Template/config_optimizers.yml b/Control_Toolkit_ASF_Template/config_optimizers.yml
index d3571fe..485d06b 100644
--- a/Control_Toolkit_ASF_Template/config_optimizers.yml
+++ b/Control_Toolkit_ASF_Template/config_optimizers.yml
@@ -7,7 +7,7 @@ cem-tf:
   mpc_horizon: 40                       # steps
   cem_outer_it: 3                    #how many outer iterations to use
   cem_initial_action_stdev: 0.5
-  num_rollouts: 200          #how many rollouts per outer cem iteration
+  batch_size: 200          #how many rollouts per outer cem iteration
   cem_stdev_min: 0.01
   cem_best_k: 40
   warmup: false
@@ -16,7 +16,7 @@ cem-gmm-tf:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 40                       # steps
   cem_outer_it: 3                    #how many outer iterations to use
-  num_rollouts: 200          #how many rollouts per outer cem iteration
+  batch_size: 200          #how many rollouts per outer cem iteration
   cem_stdev_min: 0.01
   cem_initial_action_stdev: 0.5
   cem_best_k: 40
@@ -24,7 +24,7 @@ cem-naive-grad-tf:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 35                       # steps
   cem_outer_it: 1                       # how many outer iterations to use
-  num_rollouts: 200                     # how many rollouts per outer cem iteration
+  batch_size: 200                     # how many rollouts per outer cem iteration
   cem_stdev_min: 0.1
   cem_initial_action_stdev: 0.5
   cem_best_k: 40
@@ -37,7 +37,7 @@ cem-grad-bharadhwaj-tf:
   adam_beta_1: 0.9
   adam_beta_2: 0.999
   adam_epsilon: 1.0e-08
-  num_rollouts: 32
+  batch_size: 32
   cem_best_k: 8
   cem_outer_it: 2
   cem_initial_action_stdev: 2
@@ -54,7 +54,7 @@ gradient-tf:
   adam_epsilon: 1.0e-07
   rtol: 1.0e-3
   gradient_steps: 5
-  num_rollouts: 40
+  batch_size: 40
   initial_action_stdev: 0.5
   gradmax_clip: 5
   warmup: false
@@ -67,7 +67,7 @@ mppi-optimize-tf:
   adam_epsilon: 1.0e-7                  #default: 1.0e-7
   gradmax_clip: 1000
   mpc_horizon: 35                       # steps
-  num_rollouts: 400                     # Number of Monte Carlo samples
+  batch_size: 400                     # Number of Monte Carlo samples
   cc_weight: 1.0
   R: 1.0                                # How much to punish Q
   LBD: 100.0                            # Cost parameter lambda
@@ -86,7 +86,7 @@ dist-adam-resamp2-tf:
   adam_epsilon: 1.0e-08
   gradmax_clip: 5
   rtol: 1.0e-3
-  num_rollouts: 32
+  batch_size: 32
   opt_keep_k: 8
   outer_its: 2
   resamp_per: 10
@@ -103,7 +103,7 @@ rpgd-tf:
   adam_epsilon: 1.0e-08
   gradmax_clip: 5
   rtol: 1.0e-3
-  num_rollouts: 32
+  batch_size: 32
   opt_keep_k: 8
   outer_its: 2
   resamp_per: 10
@@ -121,7 +121,7 @@ rpgd-me-tf:
   maximum_entropy_alpha: 0.0
   gradmax_clip: 10
   rtol: 1.0e-3
-  num_rollouts: 32
+  batch_size: 32
   opt_keep_k: 0
   outer_its: 25
   resamp_per: 1
@@ -140,7 +140,7 @@ rpgd-ml-tf:
   maximum_entropy_alpha: 0.1
   gradmax_clip: 10
   rtol: 1.0e-3
-  num_rollouts: 32
+  batch_size: 32
   opt_keep_k: 8
   outer_its: 5
   resamp_per: 1
@@ -157,7 +157,7 @@ rpgd-particle-tf:
   adam_epsilon: 1.0e-08
   gradmax_clip: 10
   rtol: 1.0e-3
-  num_rollouts: 32
+  batch_size: 32
   opt_keep_k: 8
   outer_its: 5
   resamp_per: 1
@@ -167,7 +167,7 @@ rpgd-particle-tf:
 mppi-var-tf:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 35                       # steps
-  num_rollouts: 400                     # Number of Monte Carlo samples
+  batch_size: 400                     # Number of Monte Carlo samples
   period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
   cc_weight: 1.0
   R: 1.0                                # How much to punish Q
@@ -183,7 +183,7 @@ mppi-var-tf:
 mppi:
   seed: null                            # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!)
   mpc_horizon: 35                       # steps
-  num_rollouts: 3500                    # Number of Monte Carlo samples
+  batch_size: 3500                    # Number of Monte Carlo samples
   cc_weight: 1.0
   R: 1.0                                # How much to punish Q
   LBD: 100.0                            # Cost parameter lambda
@@ -194,4 +194,4 @@ mppi:
 random-action-tf:
   seed: null                          # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!)
   mpc_horizon: 35                      # steps
-  num_rollouts: 320
+  batch_size: 320
diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index a447167..64fa78f 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -8,9 +8,8 @@
 from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary,
                                             PyTorchLibrary, TensorFlowLibrary,
                                             TensorType)
-from others.globals_and_utils import load_or_reload_config_if_modified
+from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
 
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 logger = get_logger(__name__)
 
 """
@@ -126,7 +125,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[Te
         """
         ### Any computations in order to retrieve the current control. Such as:
         ## If the environment's target positions etc. change, copy the new attributes over to this controller so the cost function knows about it:
-        # self.update_attributes(updated_attributes)
+        # update_attributes(updated_attributes,self)
         ## Use some sort of optimization procedure to get your control, e.g.
         # u = self.optimizer.step(s, time)
         ## Use the following call to populate the self.logs dictionary with savevars, such as:
diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py
index 233a4bb..0f3b44b 100644
--- a/Optimizers/optimizer_rpgd_me_tf.py
+++ b/Optimizers/optimizer_rpgd_me_tf.py
@@ -139,7 +139,7 @@ def grad_step(
     ):
         # rollout trajectories and retrieve cost
         with tf.GradientTape(watch_accessed_variables=False) as tape:
-            # theta = tf.tile(tf.expand_dims(theta, 0), (self.num_rollouts, 1))
+            # theta = tf.tile(tf.expand_dims(theta, 0), (self.batch_size, 1))
             tape.watch(theta)
             Q = self.zeta(theta, epsilon)
             traj_cost, _ = self.predict_and_cost(s, Q)

From 3ade5d072169630afe74117d9dfe4b5fc3cf7b46 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Sun, 18 Dec 2022 11:38:08 +0100
Subject: [PATCH 14/38] add comment

---
 Controllers/controller_mpc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index d1ace9b..d607df1 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -84,7 +84,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
         # detect any changes in config scalar values and pass to this controller or the cost function or optimizer
         # note that the cost function that has its attributes updated is the enclosed cost function of the wrapper!
         for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')):
-            (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config))
+            (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config)) # all the config files are currently assumed to be in Control_Toolkit_ASF folder
             # process changes to configs using new returned change list
             if not changes is None:
                 for k,v in changes.items():

From 56d3e516e0830dbcdb4f4528870c55dcabe55c8f Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Sun, 18 Dec 2022 11:41:07 +0100
Subject: [PATCH 15/38] local changes, all minor except for trajectory cost
 that is in flux

---
 Controllers/cartpole_trajectory_generator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py
index 806a209..ed20c12 100644
--- a/Controllers/cartpole_trajectory_generator.py
+++ b/Controllers/cartpole_trajectory_generator.py
@@ -66,8 +66,8 @@ def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorT
             traj[state_utilities.POSITION_IDX] = gui_target_position
             target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down
             traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
-            traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
-            traj[state_utilities.ANGLE_IDX, :] = target_angle
+            # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
+            # traj[state_utilities.ANGLE_IDX, :] = target_angle
             traj[state_utilities.ANGLED_IDX, :] = 0
             traj[state_utilities.POSITIOND_IDX, :] = 0
         elif policy == 'shimmy': # cart follows a desired cart position shimmy while keeping pole up or down
@@ -90,7 +90,7 @@ def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorT
             endtime=time+horizon*dt
             times=np.linspace(time,endtime,num=horizon)
             from scipy.signal import sawtooth
-            cartpos=amp*sawtooth((2*np.pi/per)*times, width=.5) # .5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html
+            cartpos=amp*sawtooth((2*np.pi/per)*times, width=cost_function.cartonly_duty_cycle) # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html
             cartvel=np.gradient(cartpos,dt)
             traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos
             # target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down

From 67ba59997d4b6ce823f533de416c077a2d494793 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.cch>
Date: Sat, 24 Dec 2022 07:42:53 -0800
Subject: [PATCH 16/38] renamed s to state for clariy in many of the classes.
 added prefs to cartpole gui for initial position and angle increased
 num_rollouts to 1000, decreased horizon to 25 time steps, changed cost
 function for spin to pure cart angle speed. Now spin, balance, and shimmy all
 work quite well. moved cartpole_trajectory_generator back out outside
 compiled TF code for ease of development and debugging, it is cheap so this
 is OK. added more logging of what gets compiled. added time to arguments for
 the base controller so that contained methods can access current time. added
 ruamel.yaml to requirements (this is the yaml parser that can handle
 scientific notation numbers)

---
 .../Controllers/controller_barebone.py        |   2 +-
 .../Controllers/controller_do_mpc.py          |  12 +-
 .../Controllers/controller_do_mpc_discrete.py |  12 +-
 .../Controllers/controller_lqr.py             |   4 +-
 Controllers/__init__.py                       |   4 +-
 Controllers/cartpole_trajectory_generator.py  | 107 ------------------
 Controllers/controller_mpc.py                 |  45 +++++---
 .../controller_neural_imitator_pytorch.py     |   4 +-
 Controllers/controller_neural_imitator_tf.py  |   4 +-
 Cost_Functions/__init__.py                    |  11 +-
 Cost_Functions/cost_function_wrapper.py       |   5 +-
 Optimizers/optimizer_mppi.py                  |  62 ++++++----
 others/globals_and_utils.py                   |   4 +-
 13 files changed, 103 insertions(+), 173 deletions(-)
 delete mode 100644 Controllers/cartpole_trajectory_generator.py

diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
index 29c6add..b25092c 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_barebone.py
@@ -32,7 +32,7 @@ def configure(self):
         # u = 0.0
         pass
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
 
         # The controller has to adapt when environment-related attributes such as target positions change
         # Updated targets etc. are passed as a dictionary updated_attributes
diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py
index 7d01b1e..c2431fa 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc.py
@@ -136,16 +136,16 @@ def tvp_fun(self, t_ind):
         return self.tvp_template
 
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         update_attributes(updated_attributes,self)
 
-        s = cartpole_state_vector_to_namespace(s)
+        state = cartpole_state_vector_to_namespace(state)
 
-        self.x0['s.position'] = s.position
-        self.x0['s.positionD'] = s.positionD
+        self.x0['s.position'] = state.position
+        self.x0['s.positionD'] = state.positionD
 
-        self.x0['s.angle'] = s.angle
-        self.x0['s.angleD'] = s.angleD
+        self.x0['s.angle'] = state.angle
+        self.x0['s.angleD'] = state.angleD
 
         self.tvp_template['_tvp', :, 'target_position'] = self.target_position
 
diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py
index c2e5171..5e23315 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_do_mpc_discrete.py
@@ -148,16 +148,16 @@ def configure(self):
     def tvp_fun(self, t_ind):
         return self.tvp_template
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         update_attributes(updated_attributes,self)
 
-        s = cartpole_state_vector_to_namespace(s)
+        state = cartpole_state_vector_to_namespace(state)
 
-        self.x0['s.position'] = s.position
-        self.x0['s.positionD'] = s.positionD
+        self.x0['s.position'] = state.position
+        self.x0['s.positionD'] = state.positionD
 
-        self.x0['s.angle'] = s.angle
-        self.x0['s.angleD'] = s.angleD
+        self.x0['s.angle'] = state.angle
+        self.x0['s.angleD'] = state.angleD
 
         self.tvp_template['_tvp', :, 'target_position'] = self.target_position
 
diff --git a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py
index 6aef009..3b895a5 100644
--- a/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py
+++ b/Control_Toolkit_ASF_Template/Controllers/controller_lqr.py
@@ -80,11 +80,11 @@ def configure(self):
         self.X = X
         self.eigVals = eigVals
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         update_attributes(updated_attributes,self)
         
         state = np.array(
-            [[s[POSITION_IDX] - self.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]])
+            [[state[POSITION_IDX] - self.target_position], [state[POSITIOND_IDX]], [state[ANGLE_IDX]], [state[ANGLED_IDX]]])
 
         Q = np.dot(-self.K, state).item()
 
diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index 64fa78f..99ed1b1 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -112,11 +112,11 @@ def configure(self, **kwargs):
         pass
     
     @abstractmethod
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()):
+    def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()):
         """
         Execute one timestep of control.
 
-        :param s: the state array, dimensions are TODO add dimension to help users
+        :param state: the state array, dimensions are TODO add dimension to help users
         :param time: the time in seconds
         :param updated_attributes: dict of updated attributes
 
diff --git a/Controllers/cartpole_trajectory_generator.py b/Controllers/cartpole_trajectory_generator.py
deleted file mode 100644
index ed20c12..0000000
--- a/Controllers/cartpole_trajectory_generator.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# stub for genrating desired future trajectory of cartpole
-import numpy
-import numpy as np
-from torch import TensorType
-
-from Control_Toolkit.Controllers import template_controller
-from Control_Toolkit.others.globals_and_utils import get_logger
-from SI_Toolkit.computation_library import ComputationLibrary
-from CartPole import state_utilities
-
-period=1
-
-log=get_logger(__name__)
-
-class cartpole_trajectory_generator:
-    """ Generates target state trajectory for the cartpole """
-    def __init__(self, lib:ComputationLibrary, controller:template_controller=None):
-        """ Construct the trajectory generator.
-
-        :param lib: the computation library, e.g. tensorflow
-        :param horizon: the MPC horizon in timesteps
-        """
-        self.lib = lib
-        self.controller:template_controller=controller
-
-    def step(self, time: float, horizon: int, dt:float, state:np.ndarray) -> TensorType:
-        """ Computes the desired future state trajectory at this time.
-
-        :param time: the scalar time in seconds
-        :param horizon: the number of horizon steps
-        :param dt: the timestep in seconds
-        :param state: the current state of the cartpole
-
-        :returns: the target state trajectory of cartpole.
-        It should be a Tensor with NaN as at least first entries for don't care states, and otherwise the desired future state values.
-
-        """
-
-        traj=np.zeros((state_utilities.NUM_STATES, horizon)) # must be numpy here because tensor is immutable
-        traj[:]=self.lib.nan # set all states undetermined
-
-        cost_function=self.controller.cost_function_wrapper.cost_function # use cost_function to access attributes (fields) set in config_cost_functions.yml
-        controller=self.controller # use controller to access attributes set in config_optimizers
-
-        policy=self.controller.cost_function_wrapper.cost_function.policy
-        if policy is None:
-            raise RuntimeError(f'set policy in config_self.controller.cost_function_wrapper.cost_functions.yml')
-
-        gui_target_position=self.controller.target_position # GUI slider position
-        gui_target_equilibrium=self.controller.target_equilibrium # GUI switch +1 or -1 to make pole target up or down position
-
-        if policy == 'spin': # spin pole CW or CCW depending on target_equilibrium up or down
-            traj[state_utilities.POSITION_IDX] = gui_target_position
-            # traj[state_utilities.ANGLE_COS_IDX, :] = gui_target_equilibrium
-            # traj[state_utilities.ANGLE_SIN_IDX, :] = 0
-            endtime=horizon*dt
-            times=np.linspace(0,endtime,num=horizon)
-            s_per_rev_target=cost_function.spin_rev_period_sec
-            rad_per_s_target=2*np.pi/s_per_rev_target
-            rad_per_dt=rad_per_s_target*dt
-            current_angle=state[state_utilities.ANGLE_IDX]
-            traj[state_utilities.ANGLE_IDX, :] = current_angle+gui_target_equilibrium*times*rad_per_dt
-            # traj[state_utilities.ANGLED_IDX, :] = rad_per_s_target*gui_target_equilibrium # 1000 rad/s is arbitrary, not sure if this is best target
-            # traj[state_utilities.POSITIOND_IDX, :] = 0
-        elif policy == 'balance': # balance upright or down at desired cart position
-            traj[state_utilities.POSITION_IDX] = gui_target_position
-            target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down
-            traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
-            # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
-            # traj[state_utilities.ANGLE_IDX, :] = target_angle
-            traj[state_utilities.ANGLED_IDX, :] = 0
-            traj[state_utilities.POSITIOND_IDX, :] = 0
-        elif policy == 'shimmy': # cart follows a desired cart position shimmy while keeping pole up or down
-            per=self.controller.cost_function_wrapper.cost_function.shimmy_per # seconds
-            amp=self.controller.cost_function_wrapper.cost_function.shimmy_amp # meters
-            endtime=time+horizon*dt
-            times=np.linspace(time,endtime,num=horizon)
-            cartpos=amp*np.sin((2*np.pi/per)*times)
-            cartvel=np.gradient(cartpos,dt)
-            traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos
-            target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down
-            traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
-            traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
-            traj[state_utilities.ANGLE_IDX, :] = target_angle
-            # traj[state_utilities.ANGLED_IDX, :] = 0
-            traj[state_utilities.POSITIOND_IDX, :] = cartvel
-        elif policy == 'cartonly': # cart follows the trajectory, pole ignored
-            per=self.controller.cost_function_wrapper.cost_function.cartonly_per # seconds
-            amp=self.controller.cost_function_wrapper.cost_function.cartonly_amp # meters
-            endtime=time+horizon*dt
-            times=np.linspace(time,endtime,num=horizon)
-            from scipy.signal import sawtooth
-            cartpos=amp*sawtooth((2*np.pi/per)*times, width=cost_function.cartonly_duty_cycle) # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html
-            cartvel=np.gradient(cartpos,dt)
-            traj[state_utilities.POSITION_IDX] = gui_target_position+cartpos
-            # target_angle=self.lib.pi * (1-gui_target_equilibrium)/2 # either 0 for up and pi for down
-            # traj[state_utilities.ANGLE_COS_IDX, :] = np.cos(target_angle)
-            # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
-            # traj[state_utilities.ANGLE_IDX, :] = target_angle
-            # traj[state_utilities.ANGLED_IDX, :] = 0
-            traj[state_utilities.POSITIOND_IDX, :] = cartvel
-        else:
-            log.error(f'cost policy "{policy}" is unknown')
-
-        # traj=self.lib.to_variable(traj, self.lib.float32)
-
-        return traj
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index d607df1..2a549f0 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -1,6 +1,7 @@
 import os
 from typing import Optional
 
+from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import generate_cartpole_trajectory
 from GUI import gui_default_params
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
@@ -15,7 +16,6 @@
 
 from torch import inference_mode
 
-from Control_Toolkit.Controllers.cartpole_trajectory_generator import cartpole_trajectory_generator
 from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
 
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
@@ -68,21 +68,43 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
             predictor_specification=predictor_specification
         )
 
-        # make a target position trajectory generator
-        self.target_trajectory_generator = cartpole_trajectory_generator(lib=self.computation_library, controller=self)
-
         if self.lib.lib == 'Pytorch':
             self.step = inference_mode()(self.step)
         else:
             self.step = self.step
 
         
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, TensorType]" = {}):
+        """ Compute one step of control.
+
+        :param state: the current state as 1d state vector
+        :param time: the current time in seconds
+        :param updated_attributes: a dict of values to update tensorflow assignment values in the cost function
+
+        :returns: the control vector (for cartpole, a scalar cart acceleration)
+
+        """
         # log.debug(f'step time={time:.3f}s')
 
+
+        # following gets target_position, target_equilibrium, and target_trajectory passed to tensorflow. The trajectory is passed in
+        # as updated_attributes, and is transferred to tensorflow by the update_attributes call
+        cost_function=self.cost_function_wrapper.cost_function
+        update_attributes(updated_attributes,cost_function) # update target_position and target_equilibrium in cost function to use
+        updated_attributes.clear()
+        target_trajectory = generate_cartpole_trajectory(time=time, state=state,controller=self, cost_function=self.cost_function_wrapper.cost_function)
+        updated_attributes['target_trajectory']=target_trajectory
+        update_attributes(updated_attributes,cost_function) # update
+        updated_attributes.clear()
+
         # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer
         # detect any changes in config scalar values and pass to this controller or the cost function or optimizer
+
         # note that the cost function that has its attributes updated is the enclosed cost function of the wrapper!
+        # note the following mapping for config files
+        #   config_controller.yml -> self (i.e. controller_mpc)
+        #   config_cost_functions.yml -> self.cost_function_wrapper.cost_function
+        #   config_optimizers.yml -> self.optimizer AND self.predictor_wrapper.predictor
         for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')):
             (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config)) # all the config files are currently assumed to be in Control_Toolkit_ASF folder
             # process changes to configs using new returned change list
@@ -94,17 +116,8 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
                             update_attributes(updated_attributes,o)
                 log.debug(f'updated {objs} with scalar updated_attributes {updated_attributes}')
 
-        # following gets target_position, target_equilibrium, and target_trajectory passed to tensorflow. The trajectory is passed in
-        # as updated_attributes, and is transferred to tensorflow by the update_attributes call
-        new_target_trajectory = self.target_trajectory_generator.step(time=time,
-                                                                      horizon=self.optimizer.mpc_horizon,
-                                                                      dt=gui_default_params.controller_update_interval,
-                                                                      state=s)
-        updated_attributes['target_trajectory'] = new_target_trajectory
-        update_attributes(updated_attributes,self)
-
-        # log.info(f'targetposition={self.target_position}, equil={self.target_equilibrium}')
-        u = self.optimizer.step(s, time)
+
+        u = self.optimizer.step(state, time)
         self.update_logs(self.optimizer.logging_values)
         return u
 
diff --git a/Controllers/controller_neural_imitator_pytorch.py b/Controllers/controller_neural_imitator_pytorch.py
index 56438a5..0e96ad5 100644
--- a/Controllers/controller_neural_imitator_pytorch.py
+++ b/Controllers/controller_neural_imitator_pytorch.py
@@ -45,10 +45,10 @@ def configure(self):
         self.net.reset()
         self.net.eval()
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         update_attributes(updated_attributes,self)
 
-        net_input = s[
+        net_input = state[
             ..., [STATE_INDICES.get(key) for key in self.net_info.inputs[:-1]]
         ]  # -1 is a fix to exclude target position
         net_input = np.append(net_input, self.target_position)
diff --git a/Controllers/controller_neural_imitator_tf.py b/Controllers/controller_neural_imitator_tf.py
index 70855f1..20da62c 100644
--- a/Controllers/controller_neural_imitator_tf.py
+++ b/Controllers/controller_neural_imitator_tf.py
@@ -40,10 +40,10 @@ def configure(self):
         self.evaluate_net = CompileAdaptive(self._evaluate_net)
 
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         update_attributes(updated_attributes,self)
 
-        net_input = s[
+        net_input = state[
             ..., [STATE_INDICES.get(key) for key in self.net_info.inputs[:-1]]
         ]  # -1 is a fix to exclude target position
         net_input = np.append(net_input, self.target_position)
diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py
index 3c4731c..a9ed600 100644
--- a/Cost_Functions/__init__.py
+++ b/Cost_Functions/__init__.py
@@ -38,7 +38,7 @@ def get_terminal_cost(self, terminal_states: TensorType) -> TensorType:
         """
         raise NotImplementedError("To be implemented in subclass.")
 
-    def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType) -> TensorType:
+    def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType, time:float) -> TensorType:
         """Compute all stage costs of a batch of states and contol inputs.
         One "stage" is one step in the MPC horizon.
 
@@ -53,9 +53,7 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input:
         """
         raise NotImplementedError("To be implemented in subclass.")
 
-    def get_trajectory_cost(
-        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None
-    ) -> TensorType:
+    def get_trajectory_cost(self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, time:float=None) -> TensorType:
         """Helper function which computes a batch of the summed cost of a trajectory.
         Can be overwritten in a subclass, e.g. if weighted sum is required.
         The batch dimension is used to compute for multiple rollouts in parallel.
@@ -66,10 +64,13 @@ def get_trajectory_cost(
         :type inputs: TensorType
         :param previous_input: The most recent actually applied control, defaults to None
         :type previous_input: TensorType, optional
+        :param time: the time in seconds
+        :type time: float
+
         :return: The summed cost of the trajectory. Has shape [batch_size].
         :rtype: TensorType
         """
-        stage_cost = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input)  # Select all but last state of the horizon
+        stage_cost = self.get_stage_cost(states=state_horizon[:, :-1, :], inputs=inputs, previous_input=previous_input, time=time)  # Select all but last state of the horizon
         total_cost = self.lib.sum(stage_cost, 1)  # Sum across the MPC horizon dimension
         total_cost = total_cost + self.get_terminal_cost(state_horizon[:, -1, :])
         return total_cost
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index 4e2a9a4..aea21b3 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -78,10 +78,9 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input:
         return self.cost_function.get_stage_cost(states, inputs, previous_input)
 
     def get_trajectory_cost(
-        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None
-    ):
+        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None, time:float=None):
         """Refer to :func:`the base cost function <Control_Toolkit.Cost_Functions.cost_function_base.get_trajectory_cost>`"""
-        return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input)
+        return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input, time=time)
 
     def copy(self):
         """
diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py
index c41b006..711f8b8 100644
--- a/Optimizers/optimizer_mppi.py
+++ b/Optimizers/optimizer_mppi.py
@@ -1,7 +1,8 @@
 from typing import Tuple
 
 from Control_Toolkit.others.globals_and_utils import get_logger
-from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary
+from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary, \
+    TensorType
 
 import numpy as np
 
@@ -86,7 +87,6 @@ def __init__(
         self.Interpolator = Interpolator(self.mpc_horizon, period_interpolation_inducing_points, self.num_control_inputs, self.lib)
 
         # here the predictor, cost computer, and optimizer are compiled to native instrutions by tensorflow graphs and XLA JIT
-        # before this, we copy all the required configuration to these objects so that they can be later updated during runtime.
 
         self.predict_and_cost = CompileAdaptive(self._predict_and_cost)
         self.predict_optimal_trajectory = CompileAdaptive(self._predict_optimal_trajectory)
@@ -115,13 +115,23 @@ def check_dimensions_s(self, s):
         return s
 
     #mppi correction
-    def mppi_correction_cost(self, u, delta_u):
+    def mppi_correction_cost(self, u, delta_u, time=None):
         return self.lib.sum(self.cc_weight * (0.5 * (1 - 1.0 / self.NU) * self.R * (delta_u ** 2) + self.R * u * delta_u + 0.5 * self.R * (u ** 2)), (1, 2))
 
     #total cost of the trajectory
-    def get_mppi_trajectory_cost(self, state_horizon ,u, u_prev, delta_u):
-        total_cost = self.cost_function.get_trajectory_cost(state_horizon,u, u_prev)
-        mppi_correction_cost =self.mppi_correction_cost(u, delta_u)
+    def get_mppi_trajectory_cost(self, state_horizon ,u, u_prev, delta_u, time:float=None):
+        """ Compute the total trajectory costs for all the rollouts
+
+        :param state_horizon: the states as [rollouts,timesteps,states]
+        :param u: the control as ??? TODO
+        :param u_prev: the previous control input
+        :param delta_u: change in control input, TODO passed in for efficiency?
+        :param time: the time in seconds
+
+         :returns: the total mppi cost for each rollout, i.e. 1d-vector of costs per rollout
+         """
+        total_cost = self.cost_function.get_trajectory_cost(state_horizon,u, u_prev,time=time)
+        mppi_correction_cost =self.mppi_correction_cost(u, delta_u, time=time)
         total_mppi_cost = total_cost +mppi_correction_cost
         return total_mppi_cost
 
@@ -143,25 +153,28 @@ def inizialize_pertubation(self, random_gen):
 
         return delta_u
 
-    def _predict_and_cost(self, s, u_nom, random_gen, u_old):
+    def _predict_and_cost(self, state:TensorType, u_nom:TensorType, random_gen, u_old:TensorType, time:float=None):
         """ Predict dynamics and compute costs of trajectories
-        TODO add params and return
 
-        :param ??
+        :param state: the current state of system, dimensions are [rollouts, timesteps, states]
+        :param u_nom: the nominal control input
+        :param random_gen: the random generator
+        :param u_old: previous control input
+        :param time: time in seconds
 
-        :returns: ??
+        :returns: u, u_nom: the new control input TODO what are u and u_nom?
         """
-        s = self.lib.tile(s, (self.batch_size, 1))
+        state = self.lib.tile(state, (self.batch_size, 1))
         # generate random input sequence and clip to control limits
         u_nom = self.lib.concat([u_nom[:, 1:, :], u_nom[:, -1:, :]], 1)
         delta_u = self.inizialize_pertubation(random_gen)
         u_run = self.lib.tile(u_nom, (self.batch_size, 1, 1)) + delta_u
         u_run = self.lib.clip(u_run, self.action_low, self.action_high)
-        rollout_trajectory = self.predictor.predict_tf(s, u_run)
-        traj_cost = self.get_mppi_trajectory_cost(rollout_trajectory, u_run, u_old, delta_u)
+        rollout_trajectory = self.predictor.predict_tf(state, u_run, time=time)
+        traj_cost = self.get_mppi_trajectory_cost(rollout_trajectory, u_run, u_old, delta_u, time=time)
         u_nom = self.lib.clip(u_nom + self.reward_weighted_average(traj_cost, delta_u), self.action_low, self.action_high)
         u = u_nom[0, 0, :]
-        self.update_internal_state(s, u_nom)
+        self.update_internal_state(state, u_nom)
         return self.mppi_output(u, u_nom, rollout_trajectory, traj_cost, u_run)
 
     def update_internal_state_of_RNN(self, s, u_nom):
@@ -174,13 +187,22 @@ def _predict_optimal_trajectory(self, s, u_nom):
         return optimal_trajectory
 
     #step function to find control
-    def step(self, s: np.ndarray, time=None):
+    def step(self, state: np.ndarray, time=None):
+        """ Does one timestep of control
+
+        :param state: the current state
+        :param time: the current time in seconds
+
+        :returns: u, the new control input to system
+        """
         if self.optimizer_logging:
-            self.logging_values = {"s_logged": s.copy()}
-        s = self.lib.to_tensor(s, self.lib.float32)
-        s = self.check_dimensions_s(s)
+            self.logging_values = {"s_logged": state.copy()}
+        state = self.lib.to_tensor(state, self.lib.float32)
+        state = self.check_dimensions_s(state)
+
+        tf_time=self.lib.to_tensor(time,self.lib.float32) # must pass scalar tensor for time to prevent recompiling tensorflow functions over and over
 
-        self.u, self.u_nom, rollout_trajectory, traj_cost, u_run = self.predict_and_cost(s, self.u_nom, self.rng, self.u)
+        self.u, self.u_nom, rollout_trajectory, traj_cost, u_run = self.predict_and_cost(state, self.u_nom, self.rng, self.u, time=tf_time)
         self.u = self.lib.to_numpy(self.lib.squeeze(self.u))
 
         # print(f'mean traj cost={np.mean(traj_cost.numpy()):.2f}') # todo debug
@@ -193,7 +215,7 @@ def step(self, s: np.ndarray, time=None):
             self.logging_values["u_logged"] = self.u
 
         if False:
-            self.optimal_trajectory = self.lib.to_numpy(self.predict_optimal_trajectory(s, self.u_nom))
+            self.optimal_trajectory = self.lib.to_numpy(self.predict_optimal_trajectory(state, self.u_nom))
 
         return self.u
 
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index 4270e56..3a2b12d 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -246,4 +246,6 @@ def get_optimizer_name(optimizer_names=None, optimizer_name=None, optimizer_idx=
     else:
         optimizer_name = optimizer_names[optimizer_idx]
 
-    return optimizer_name, optimizer_idx
\ No newline at end of file
+    return optimizer_name, optimizer_idx
+
+

From 1f324f4135f9733c84c55ee1db1de5560c3ce54c Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.cch>
Date: Mon, 26 Dec 2022 04:53:12 -0800
Subject: [PATCH 17/38] added dancer that reads CSV file to specify sequence of
 'steps' (behaviors).

---
 Controllers/controller_mpc.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 2a549f0..2489b6b 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -1,7 +1,7 @@
 import os
 from typing import Optional
 
-from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import generate_cartpole_trajectory
+from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import cartpole_trajectory_generator
 from GUI import gui_default_params
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
@@ -25,7 +25,20 @@
 class controller_mpc(template_controller):
 
     _has_optimizer = True
-    
+
+    def __init__(
+            self,
+            dt: float,
+            environment_name: str,
+            num_states: int,
+            num_control_inputs: int,
+            control_limits: "Tuple[np.ndarray, np.ndarray]",
+            initial_environment_attributes: "dict[str, TensorType]",
+    ):
+        super().__init__(dt, environment_name, num_states, num_control_inputs, control_limits,
+                         initial_environment_attributes)
+        self.cartpole_trajectory_generator = cartpole_trajectory_generator()
+
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
         if optimizer_name in {None, ""}:
             optimizer_name = str(self.config_controller["optimizer"])
@@ -92,7 +105,9 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str
         cost_function=self.cost_function_wrapper.cost_function
         update_attributes(updated_attributes,cost_function) # update target_position and target_equilibrium in cost function to use
         updated_attributes.clear()
-        target_trajectory = generate_cartpole_trajectory(time=time, state=state,controller=self, cost_function=self.cost_function_wrapper.cost_function)
+        target_trajectory = \
+            self.cartpole_trajectory_generator.\
+                generate_cartpole_trajectory(time=time, state=state, controller=self, cost_function=self.cost_function_wrapper.cost_function)
         updated_attributes['target_trajectory']=target_trajectory
         update_attributes(updated_attributes,cost_function) # update
         updated_attributes.clear()

From 0b543a01df6adaa424cd6d8bce66a9a8929b72cb Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Sat, 28 Jan 2023 12:36:55 +0100
Subject: [PATCH 18/38] fixed import of CompileTF to point to SI_Toolkit

---
 Optimizers/optimizer_rpgd_tf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py
index 36143b8..82051de 100644
--- a/Optimizers/optimizer_rpgd_tf.py
+++ b/Optimizers/optimizer_rpgd_tf.py
@@ -1,11 +1,13 @@
 from typing import Tuple
+
+from SI_Toolkit.Functions.TF.Compile import CompileTF
 from SI_Toolkit.computation_library import ComputationLibrary, TensorFlowLibrary
 
 import numpy as np
 import tensorflow as tf
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger
+from Control_Toolkit.others.globals_and_utils import get_logger
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 

From c0ee6205af886ec540c0391b341f67fe1af82f9c Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Fri, 3 Feb 2023 16:31:58 +0100
Subject: [PATCH 19/38] merged from Tobi_Dance and added some loggers

---
 .../config_optimizers.yml                     |  3 --
 Controllers/__init__.py                       |  9 +++---
 Controllers/controller_mpc.py                 |  5 +--
 Optimizers/optimizer_mppi.py                  |  2 --
 Optimizers/optimizer_mppi_optimize_tf.py      |  2 --
 Optimizers/optimizer_mppi_var_tf.py           |  2 --
 others/globals_and_utils.py                   | 32 ++++++++++++-------
 7 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/Control_Toolkit_ASF_Template/config_optimizers.yml b/Control_Toolkit_ASF_Template/config_optimizers.yml
index 6786ce8..32ff5aa 100644
--- a/Control_Toolkit_ASF_Template/config_optimizers.yml
+++ b/Control_Toolkit_ASF_Template/config_optimizers.yml
@@ -73,7 +73,6 @@ mppi-optimize-tf:
   LBD: 100.0                            # Cost parameter lambda
   NU: 1000.0                            # Exploration variance
   SQRTRHOINV: 0.02
-  GAMMA: 1.00                           # Future cost discount
   period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
   optim_steps: 10
 dist-adam-resamp2-tf:
@@ -175,7 +174,6 @@ mppi-var-tf:
   LBD_mc: 10.0                          # Cost parameter lambda
   SQRTRHOINV_mc: 0.002                  # Sampling variance
   NU_mc: 20.0                           # Exploration variance
-  GAMMA: 1.00                           # Future cost discount
   LR: 1000                              # Learning rate for adaption of variance, !!! Set to 0 to retrieve a mppi version in accordance with mppi paper
   STDEV_min: 0.01                       # Maximal variance for sampling
   STDEV_max: 10                         # Minimal sampling variance for sampling
@@ -189,7 +187,6 @@ mppi-tf:
   LBD: 100.0                            # Cost parameter lambda
   NU: 1000.0                            # Exploration variance
   SQRTRHOINV: 0.03                      # Sampling variance
-  GAMMA: 1.00                           # Future cost discount
   period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
 random-action-tf:
   seed: null                          # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!)
diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index d8753d0..4f31edf 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -1,5 +1,6 @@
 import os
 from abc import ABC, abstractmethod
+from pathlib import Path
 from typing import Tuple
 
 import numpy as np
@@ -38,10 +39,10 @@ def __init__(
         initial_environment_attributes: "dict[str, TensorType]",
     ):
         # Load controller config and select the entry for the current controller
-        config_controllers = yaml.load(
-            open(os.path.join("Control_Toolkit_ASF", "config_controllers.yml")),
-            Loader=yaml.FullLoader
-        )
+        f=os.path.join("Control_Toolkit_ASF", "config_controllers.yml")
+        fp=Path(f)
+        logger.debug(f'loading controller config from "{fp.absolute()}"')
+        config_controllers = yaml.load(open(f), Loader=yaml.FullLoader)
         # self.controller_name is inferred from the class name, which is the class being instantiated
         # Example: If you create a controller_mpc, this controller_template.__init__ will be called
         # but the class name will be controller_mpc, not template_controller.
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index d6cd4ae..e3bd585 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -25,15 +25,16 @@ class controller_mpc(template_controller):
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
         if optimizer_name in {None, ""}:
             optimizer_name = str(self.config_controller["optimizer"])
-            logger.info(f"Using optimizer {optimizer_name} specified in controller config file")
+            logger.info(f'Using optimizer "{optimizer_name}" specified in controller config file')
         if predictor_specification in {None, ""}:
             predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None)
-            logger.info(f"Using predictor {predictor_specification} specified in controller config file")
+            logger.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file')
         
         config_optimizer = config_optimizers[optimizer_name]
         
         # Create cost function
         cost_function_specification = self.config_controller.get("cost_function_specification", None)
+        logger.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}')
         self.cost_function = CostFunctionWrapper()
         self.cost_function.configure(self, cost_function_specification=cost_function_specification)
         
diff --git a/Optimizers/optimizer_mppi.py b/Optimizers/optimizer_mppi.py
index 81e48c2..c55b14d 100644
--- a/Optimizers/optimizer_mppi.py
+++ b/Optimizers/optimizer_mppi.py
@@ -29,7 +29,6 @@ def __init__(
         num_rollouts: int,
         NU: float,
         SQRTRHOINV: float,
-        GAMMA: float,
         period_interpolation_inducing_points: int,
         optimizer_logging: bool,
     ):
@@ -56,7 +55,6 @@ def __init__(
         self.LBD = LBD
         self.NU = self.lib.to_tensor(NU, self.lib.float32)
         self._SQRTRHOINV = SQRTRHOINV
-        self.GAMMA = GAMMA
 
         self.update_internal_state = self.update_internal_state_of_RNN  # FIXME: There is one unnecessary operation in this function in case it is not an RNN.
 
diff --git a/Optimizers/optimizer_mppi_optimize_tf.py b/Optimizers/optimizer_mppi_optimize_tf.py
index f19a584..180eee0 100644
--- a/Optimizers/optimizer_mppi_optimize_tf.py
+++ b/Optimizers/optimizer_mppi_optimize_tf.py
@@ -30,7 +30,6 @@ def __init__(
         num_rollouts: int,
         NU: float,
         SQRTRHOINV: float,
-        GAMMA: float,
         gradmax_clip: float,
         optim_steps: int,
         mppi_LR: float,
@@ -61,7 +60,6 @@ def __init__(
         # MPPI parameters
         self.NU = NU
         self._SQRTRHOINV = SQRTRHOINV
-        self.GAMMA = GAMMA
 
         # Optimization params
         self.gradmax_clip = tf.constant(gradmax_clip, dtype = tf.float32)
diff --git a/Optimizers/optimizer_mppi_var_tf.py b/Optimizers/optimizer_mppi_var_tf.py
index 5c4410e..3edd3c3 100644
--- a/Optimizers/optimizer_mppi_var_tf.py
+++ b/Optimizers/optimizer_mppi_var_tf.py
@@ -31,7 +31,6 @@ def __init__(
         num_rollouts: int,
         NU_mc: float,
         SQRTRHOINV_mc: float,
-        GAMMA: float,
         LR: float,
         max_grad_norm: float,
         STDEV_min: float,
@@ -57,7 +56,6 @@ def __init__(
         self.R = R
         self.LBD = LBD_mc
         self.NU = NU_mc
-        self.GAMMA = GAMMA
         self.mppi_lr = LR
         self.stdev_min = STDEV_min
         self.stdev_max = STDEV_max
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index 13ac619..fa15c38 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -12,36 +12,44 @@
 from SI_Toolkit.Functions.TF.Compile import CompileTF, CompileAdaptive
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary
 
-LOGGING_LEVEL = logging.INFO
-
-
+LOGGING_LEVEL = logging.DEBUG # usually INFO is good
 class CustomFormatter(logging.Formatter):
     """Logging Formatter to add colors and count warning / errors"""
+    # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762
 
     grey = "\x1b[38;21m"
     yellow = "\x1b[33;21m"
+    cyan = "\x1b[1;36m" # dark green
+    green = "\x1b[31;21m" # dark green
     red = "\x1b[31;21m"
     bold_red = "\x1b[31;1m"
+    light_blue = "\x1b[1;36m"
+    blue = "\x1b[1;34m"
     reset = "\x1b[0m"
-    format = (
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
-    )
+    # File "{file}", line {max(line, 1)}'.replace("\\", "/")
+    format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
 
     FORMATS = {
-        logging.DEBUG: grey + format + reset,
-        logging.INFO: grey + format + reset,
-        logging.WARNING: yellow + format + reset,
-        logging.ERROR: red + format + reset,
-        logging.CRITICAL: bold_red + format + reset,
+        logging.DEBUG: light_blue + format + reset,
+        logging.INFO: cyan + format + reset,
+        logging.WARNING: red + format + reset,
+        logging.ERROR: bold_red + format + reset,
+        logging.CRITICAL: bold_red + format + reset
     }
 
     def format(self, record):
         log_fmt = self.FORMATS.get(record.levelno)
         formatter = logging.Formatter(log_fmt)
-        return formatter.format(record)
+        return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links
 
 
 def get_logger(name):
+    """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL.
+
+    :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it.
+
+    :returns: the logger.
+    """
     # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
     logger = logging.getLogger(name)
     logger.setLevel(LOGGING_LEVEL)

From ee9ca38f92b208c11f84601f58f257c845e4a424 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Mon, 6 Feb 2023 16:19:19 +0100
Subject: [PATCH 20/38] moved get_logger to own file in SI_Toolkit Started
 merging from remote/origin/Tobi_Dance

---
 Controllers/__init__.py                  | 13 +++---
 Controllers/controller_mpc.py            | 13 +++---
 Cost_Functions/__init__.py               | 56 +++++++++++++++++-------
 Cost_Functions/cost_function_wrapper.py  | 55 +++++++++++++++++------
 Optimizers/optimizer_rpgd_me_tf.py       |  5 ++-
 Optimizers/optimizer_rpgd_ml_tf.py       |  5 ++-
 Optimizers/optimizer_rpgd_particle_tf.py |  5 ++-
 Optimizers/optimizer_rpgd_tf.py          |  5 ++-
 others/environment.py                    |  2 +-
 others/globals_and_utils.py              | 50 +--------------------
 10 files changed, 112 insertions(+), 97 deletions(-)

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index 4f31edf..db61e4f 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -5,13 +5,14 @@
 
 import numpy as np
 import yaml
-from Control_Toolkit.others.globals_and_utils import get_logger
 from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary,
                                             PyTorchLibrary, TensorFlowLibrary,
                                             TensorType)
 
+from get_logger import get_logger
+log = get_logger(__name__)
+
 config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
-logger = get_logger(__name__)
 
 """
 For a controller to be found and imported by CartPoleGUI/DataGenerator it must:
@@ -41,7 +42,7 @@ def __init__(
         # Load controller config and select the entry for the current controller
         f=os.path.join("Control_Toolkit_ASF", "config_controllers.yml")
         fp=Path(f)
-        logger.debug(f'loading controller config from "{fp.absolute()}"')
+        log.debug(f'loading controller config from "{fp.absolute()}"')
         config_controllers = yaml.load(open(f), Loader=yaml.FullLoader)
         # self.controller_name is inferred from the class name, which is the class being instantiated
         # Example: If you create a controller_mpc, this controller_template.__init__ will be called
@@ -54,7 +55,7 @@ def __init__(
         
         if computation_library_name:
             # Assign computation library from config
-            logger.info(f"Found library {computation_library_name} for MPC controller.")
+            log.info(f"Found library {computation_library_name} for MPC controller.")
             if "tensorflow" in computation_library_name.lower():
                 self._computation_library = TensorFlowLibrary
             elif "pytorch" in computation_library_name.lower():
@@ -68,7 +69,7 @@ def __init__(
             if not issubclass(self.computation_library, ComputationLibrary):
                 raise ValueError(f"{self.__class__.__name__} does not have a default computation library set. You have to define one in this controller's config.")
             else:
-                logger.info(f"No computation library specified in controller config. Using default {self.computation_library} for class.")
+                log.info(f"No computation library specified in controller config. Using default {self.computation_library} for class.")
         self.lib = self.computation_library  # Shortcut to make easy using functions from computation library, this is also used by CompileAdaptive to recognize library
 
         # Environment-related parameters
@@ -124,7 +125,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
     # Optionally: A method called after an experiment.
     # May be used to print some statistics about controller performance (e.g. number of iter. to converge)
     def controller_report(self):
-        logger.info("No controller report implemented for this controller. Stopping without report.")
+        log.info("No controller report implemented for this controller. Stopping without report.")
         pass
 
     # Optionally: reset the controller after an experiment
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index e3bd585..493b3ac 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -9,14 +9,17 @@
 
 from Control_Toolkit.Optimizers import template_optimizer
 from SI_Toolkit.computation_library import TensorType
-from Control_Toolkit.others.globals_and_utils import get_logger, import_optimizer_by_name
+from Control_Toolkit.others.globals_and_utils import import_optimizer_by_name
 
 from torch import inference_mode
 
 
+from get_logger import get_logger
+log = get_logger(__name__)
+
+
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
 config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
-logger = get_logger(__name__)
 
 
 class controller_mpc(template_controller):
@@ -25,16 +28,16 @@ class controller_mpc(template_controller):
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
         if optimizer_name in {None, ""}:
             optimizer_name = str(self.config_controller["optimizer"])
-            logger.info(f'Using optimizer "{optimizer_name}" specified in controller config file')
+            log.info(f'Using optimizer "{optimizer_name}" specified in controller config file')
         if predictor_specification in {None, ""}:
             predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None)
-            logger.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file')
+            log.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file')
         
         config_optimizer = config_optimizers[optimizer_name]
         
         # Create cost function
         cost_function_specification = self.config_controller.get("cost_function_specification", None)
-        logger.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}')
+        log.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}')
         self.cost_function = CostFunctionWrapper()
         self.cost_function.configure(self, cost_function_specification=cost_function_specification)
         
diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py
index a98da9d..67bcf0d 100644
--- a/Cost_Functions/__init__.py
+++ b/Cost_Functions/__init__.py
@@ -1,18 +1,34 @@
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType
 from Control_Toolkit.Controllers import template_controller
-from Control_Toolkit.others.globals_and_utils import get_logger
-
-logger = get_logger(__name__)
 
+from get_logger import get_logger
+log = get_logger(__name__)
 
 class cost_function_base:
+    """ Base cost function for all MPC systems
+    """
     # Default: Class supports all libs to compute costs
     supported_computation_libraries = {NumpyLibrary, TensorFlowLibrary, PyTorchLibrary}
-    
-    def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]") -> None:
-        self.controller = controller
+    # Define default values used for cost normalization
+    MIN_COST = -1.0
+    MAX_COST = 0.0
+    COST_RANGE = MAX_COST - MIN_COST
+
+    def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]", config:dict=None) -> None:
+        """ makes a new cost function
+
+        :param controller: the controller
+        :param ComputationLib: the library, e.g. python, tensorflow
+        :param config: the dict of configuration for this cost function.  The caller can modify the config to change behavior during runtime.
+
+         """
+
+        self.lib:Optional[ComputationLibrary] = None
+        self.controller:template_controller = controller
+        self.config:dict=config
         self.set_computation_library(ComputationLib)
-    
+        logger.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}')
+
     def get_terminal_cost(self, terminal_states: TensorType) -> TensorType:
         """Compute a batch of terminal costs for a batch of terminal states.
 
@@ -21,11 +37,13 @@ def get_terminal_cost(self, terminal_states: TensorType) -> TensorType:
         :return: The terminal costs. Has shape [batch_size]
         :rtype: TensorType
         """
-        raise NotImplementedError("To be implemented in subclass.")
+        # Default behavior: Return a zero cost scalar per sample of batch
+        return self.lib.zeros_like(terminal_states)[:,:1]  # Shape: (batch_size x 1)
 
-    def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType) -> TensorType:
+    def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType, time:float) -> TensorType:
         """Compute all stage costs of a batch of states and contol inputs.
         One "stage" is one step in the MPC horizon.
+        Stage costs are shifted so that they are <= 0. Reason: reward = -cost is then >= 0 and therefore easier to interpret.
 
         :param states: Has shape [batch_size, mpc_horizon, num_states]
         :type states: TensorType
@@ -36,11 +54,15 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input:
         :return: The stage costs. Has shape [batch_size, mpc_horizon]
         :rtype: TensorType
         """
+        stage_costs = self._get_stage_cost(states, inputs, previous_input)  # Select all but last state of the horizon
+        return stage_costs - self.MAX_COST
+        # Could also normalize to [-1, 0]:
+        # (stage_costs - self.MIN_COST) / self.COST_RANGE - 1
+
+    def _get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType) -> TensorType:
         raise NotImplementedError("To be implemented in subclass.")
 
-    def get_trajectory_cost(
-        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None
-    ) -> TensorType:
+    def get_trajectory_cost(self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, time:float=None) -> TensorType:
         """Helper function which computes a batch of the summed cost of a trajectory.
         Can be overwritten in a subclass, e.g. if weighted sum is required.
         The batch dimension is used to compute for multiple rollouts in parallel.
@@ -51,12 +73,15 @@ def get_trajectory_cost(
         :type inputs: TensorType
         :param previous_input: The most recent actually applied control, defaults to None
         :type previous_input: TensorType, optional
+        :param time: the time in seconds
+        :type time: float
+
         :return: The summed cost of the trajectory. Has shape [batch_size].
         :rtype: TensorType
         """
-        stage_cost = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input)  # Select all but last state of the horizon
-        total_cost = self.lib.sum(stage_cost, 1)  # Sum across the MPC horizon dimension
-        total_cost = total_cost + self.get_terminal_cost(state_horizon[:, -1, :])
+        stage_costs = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input)  # Select all but last state of the horizon
+        terminal_cost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1))
+        total_cost = self.lib.mean(self.lib.concat([stage_costs, terminal_cost], 1), 1)  # Average across the MPC horizon dimension
         return total_cost
 
     def set_computation_library(self, ComputationLib: "type[ComputationLibrary]"):
@@ -64,3 +89,4 @@ def set_computation_library(self, ComputationLib: "type[ComputationLibrary]"):
         if not ComputationLib in self.supported_computation_libraries:
             raise ValueError(f"The cost function {self.__class__.__name__} does not support {ComputationLib.__name__}")
         self.lib = ComputationLib
+
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index d0a7ae5..e192019 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -7,26 +7,50 @@
 from Control_Toolkit.Controllers import template_controller
 
 from Control_Toolkit.Cost_Functions import cost_function_base
+from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
+from get_logger import get_logger
+log=get_logger(__name__)
 
-# cost_function config
-cost_function_config = yaml.load(
-    open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"),
-    Loader=yaml.FullLoader,
-)
+class CostFunctionWrapper:
+    """
+    Wrapper class for cost functions.
+    It allows the creation of an instance with deferred specification which specific class containing cost functions is to be used.
 
+    Usage:
+    1) Instantiate this wrapper in controller.
+    2) Call `configure` with a reference to the controller instance and the name of the cost function, once this is known to the controller.
 
-class CostFunctionWrapper:
+    You can do both steps
+    """
     def __init__(self):
         self.cost_function = None
-        self.cost_function_name_default: str = cost_function_config[
-            "cost_function_name_default"
-        ]
+        # cost_function config
+        (self.cost_function_config, _) = load_or_reload_config_if_modified(
+            os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"),search_path=['CartPoleSimulation'])
+
+        self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default
+        self.lib = None # filled by configure(), needed to update TF variables
+
+        log.info(f'default cost function name is {self.cost_function_name_default}')
 
     def configure(
         self,
         controller: template_controller,
-        cost_function_specification=None,
+        cost_function_specification: str=None,
     ):
+        """
+        Configures the cost function. TODO This lazy constructor is needed why?
+
+        :param controller: the controller that uses this cost function
+        :param cost_function_specification: the string name of the cost function class, to construct the class and find the config values in config_cost_functions.yml
+        """
+        """Configure this wrapper as part of a controller.
+
+        :param controller: Reference to the controller instance this wrapper is used by
+        :type controller: template_controller
+        :param cost_function_specification: Name of cost function class within Control_Toolkit_ASF.Cost_Functions.<<Environment_Name>> package. If not specified, the 'default' one is used.
+        :type cost_function_specification: str, optional
+        """
         environment_name = controller.environment_name
         computation_library = controller.computation_library  # Use library dictated by controller
         
@@ -42,6 +66,12 @@ def configure(
             cost_function_module, self.cost_function_name
         )(controller, computation_library)
 
+        self.lib=self.cost_function.lib
+
+        config=self.cost_function_config['CartPole'][self.cost_function_name] # todo hardcoded 'CartPole' has to go, not sure how to determine it, maybe from module folder?
+        update_attributes(config, self.cost_function)
+        log.info(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}')
+
     def update_cost_function_name_from_specification(
         self, environment_name: str, cost_function_specification: str = None
     ):
@@ -63,10 +93,9 @@ def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input:
         return self.cost_function.get_stage_cost(states, inputs, previous_input)
 
     def get_trajectory_cost(
-        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None
-    ):
+        self, state_horizon: TensorType, inputs: TensorType, previous_input: TensorType = None, config:dict=None, time:float=None):
         """Refer to :func:`the base cost function <Control_Toolkit.Cost_Functions.cost_function_base.get_trajectory_cost>`"""
-        return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input)
+        return self.cost_function.get_trajectory_cost(state_horizon, inputs, previous_input, time=time)
 
     def copy(self):
         """
diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py
index 7b38d08..df834a4 100644
--- a/Optimizers/optimizer_rpgd_me_tf.py
+++ b/Optimizers/optimizer_rpgd_me_tf.py
@@ -6,11 +6,12 @@
 import tensorflow_probability as tfp
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger
+from Control_Toolkit.others.globals_and_utils import CompileTF
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-logger = get_logger(__name__)
+from get_logger import get_logger
+log = get_logger(__name__)
 
 
 class optimizer_rpgd_me_tf(template_optimizer):
diff --git a/Optimizers/optimizer_rpgd_ml_tf.py b/Optimizers/optimizer_rpgd_ml_tf.py
index 2611ed0..69f031f 100644
--- a/Optimizers/optimizer_rpgd_ml_tf.py
+++ b/Optimizers/optimizer_rpgd_ml_tf.py
@@ -6,11 +6,12 @@
 import tensorflow_probability as tfp
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger
+from Control_Toolkit.others.globals_and_utils import CompileTF
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-logger = get_logger(__name__)
+from get_logger import get_logger
+log = get_logger(__name__)
 
 
 class optimizer_rpgd_ml_tf(template_optimizer):
diff --git a/Optimizers/optimizer_rpgd_particle_tf.py b/Optimizers/optimizer_rpgd_particle_tf.py
index b1c7531..e4a2b65 100644
--- a/Optimizers/optimizer_rpgd_particle_tf.py
+++ b/Optimizers/optimizer_rpgd_particle_tf.py
@@ -5,11 +5,12 @@
 import tensorflow as tf
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger
+from Control_Toolkit.others.globals_and_utils import CompileTF
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-logger = get_logger(__name__)
+from get_logger import get_logger
+log = get_logger(__name__)
 
 
 class optimizer_rpgd_particle_tf(template_optimizer):
diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py
index 3357857..fdc4578 100644
--- a/Optimizers/optimizer_rpgd_tf.py
+++ b/Optimizers/optimizer_rpgd_tf.py
@@ -5,11 +5,12 @@
 import tensorflow as tf
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import CompileTF, get_logger
+from Control_Toolkit.others.globals_and_utils import CompileTF
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-logger = get_logger(__name__)
+from get_logger import get_logger
+log = get_logger(__name__)
 
 
 class optimizer_rpgd_tf(template_optimizer):
diff --git a/others/environment.py b/others/environment.py
index 899ff1f..515c927 100644
--- a/others/environment.py
+++ b/others/environment.py
@@ -5,9 +5,9 @@
 import torch
 
 from SI_Toolkit.computation_library import ComputationLibrary, TensorType
-from Control_Toolkit.others.globals_and_utils import get_logger
 from gym.spaces import Box
 
+from get_logger import get_logger
 log = get_logger(__name__)
 
 
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index fa15c38..f7c6f24 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -11,55 +11,7 @@
 from numpy.random import SFC64, Generator
 from SI_Toolkit.Functions.TF.Compile import CompileTF, CompileAdaptive
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary
-
-LOGGING_LEVEL = logging.DEBUG # usually INFO is good
-class CustomFormatter(logging.Formatter):
-    """Logging Formatter to add colors and count warning / errors"""
-    # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762
-
-    grey = "\x1b[38;21m"
-    yellow = "\x1b[33;21m"
-    cyan = "\x1b[1;36m" # dark green
-    green = "\x1b[31;21m" # dark green
-    red = "\x1b[31;21m"
-    bold_red = "\x1b[31;1m"
-    light_blue = "\x1b[1;36m"
-    blue = "\x1b[1;34m"
-    reset = "\x1b[0m"
-    # File "{file}", line {max(line, 1)}'.replace("\\", "/")
-    format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
-
-    FORMATS = {
-        logging.DEBUG: light_blue + format + reset,
-        logging.INFO: cyan + format + reset,
-        logging.WARNING: red + format + reset,
-        logging.ERROR: bold_red + format + reset,
-        logging.CRITICAL: bold_red + format + reset
-    }
-
-    def format(self, record):
-        log_fmt = self.FORMATS.get(record.levelno)
-        formatter = logging.Formatter(log_fmt)
-        return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links
-
-
-def get_logger(name):
-    """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL.
-
-    :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it.
-
-    :returns: the logger.
-    """
-    # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-    logger = logging.getLogger(name)
-    logger.setLevel(LOGGING_LEVEL)
-    # create console handler
-    ch = logging.StreamHandler()
-    ch.setFormatter(CustomFormatter())
-    logger.addHandler(ch)
-    return logger
-
-
+from get_logger import get_logger
 log = get_logger(__name__)
 
 

From 69b886cda37363f8a77db3d031986bce8e163d91 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Tue, 7 Feb 2023 07:27:42 +0100
Subject: [PATCH 21/38] added search path for running from physical-cartpole.
 fixed logger->log renaming

---
 Controllers/controller_mpc.py           | 2 +-
 Cost_Functions/__init__.py              | 2 +-
 Cost_Functions/cost_function_wrapper.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index e09a7b7..7c1a94e 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -127,7 +127,7 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str
         #   config_cost_functions.yml -> self.cost_function_wrapper.cost_function
         #   config_optimizers.yml -> self.optimizer AND self.predictor_wrapper.predictor
         for (objs,config) in (((self,),'config_controllers.yml'), ((self.cost_function_wrapper.cost_function,), 'config_cost_functions.yml'), ((self.optimizer,self.predictor_wrapper.predictor), 'config_optimizers.yml')):
-            (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config)) # all the config files are currently assumed to be in Control_Toolkit_ASF folder
+            (config,changes)=load_or_reload_config_if_modified(os.path.join('Control_Toolkit_ASF',config), search_path=['CartPoleSimulation']) # all the config files are currently assumed to be in Control_Toolkit_ASF folder
             # process changes to configs using new returned change list
             if not changes is None:
                 for k,v in changes.items():
diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py
index 78a7546..8908115 100644
--- a/Cost_Functions/__init__.py
+++ b/Cost_Functions/__init__.py
@@ -29,7 +29,7 @@ def __init__(self, controller: template_controller, ComputationLib: "type[Comput
         self.controller:template_controller = controller
         self.config:dict=config
         self.set_computation_library(ComputationLib)
-        logger.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}')
+        log.info(f'constructed {self} with controller {controller} computation library {ComputationLib} and config {config}')
 
     def get_terminal_cost(self, terminal_states: TensorType) -> TensorType:
         """Compute a batch of terminal costs for a batch of terminal states.
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index cff0ece..e1c0fd4 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -26,7 +26,7 @@ def __init__(self):
         self.cost_function = None
         # cost_function config
         (self.cost_function_config, _) = load_or_reload_config_if_modified(
-            os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"))
+            os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), search_path=['CartPoleSimulation']) # if run from physical-cartpole, need to find it relative to CartPoleSimulation submodule
 
         self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default
         self.lib = None # filled by configure(), needed to update TF variables

From 4ce131e34114312c996ecaddf93338e3dd565aa7 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Tue, 7 Feb 2023 08:51:42 +0100
Subject: [PATCH 22/38] update path to config_cost_functions.yml

---
 Controllers/__init__.py       | 2 +-
 Controllers/controller_mpc.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index 138ebbb..4b7f854 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -14,7 +14,7 @@
 from get_logger import get_logger
 log = get_logger(__name__)
 
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
+config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 
 """
 For a controller to be found and imported by CartPoleGUI/DataGenerator it must:
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 7c1a94e..cecd016 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -23,7 +23,7 @@
 
 
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
+config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 
 
 class controller_mpc(template_controller):

From 32c0a50b82f6a30d4195fe0641175c0f09b08563 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Wed, 8 Feb 2023 12:40:58 +0100
Subject: [PATCH 23/38] move get_logger.py to Control_Toolkit so that it can be
 used by physical-cartpole repo. Fix some cartpole parameter import. Fix
 rename of config_cost_functions.yml. Add timer.py and yes_or_no.py utility
 classes.

---
 Controllers/__init__.py                  |   7 +-
 Controllers/controller_mpc.py            |   5 +-
 Cost_Functions/__init__.py               |   2 +-
 Cost_Functions/cost_function_wrapper.py  |   5 +-
 Optimizers/optimizer_rpgd_me_tf.py       |   2 +-
 Optimizers/optimizer_rpgd_ml_tf.py       |   2 +-
 Optimizers/optimizer_rpgd_particle_tf.py |   2 +-
 Optimizers/optimizer_rpgd_tf.py          |   4 +-
 others/environment.py                    |   4 +-
 others/get_logger.py                     |  50 +++++++++
 others/globals_and_utils.py              |   3 +-
 others/timer.py                          | 129 +++++++++++++++++++++++
 others/yes_or_no.py                      |  58 ++++++++++
 13 files changed, 250 insertions(+), 23 deletions(-)
 create mode 100644 others/get_logger.py
 create mode 100644 others/timer.py
 create mode 100644 others/yes_or_no.py

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index 138ebbb..ccb7340 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -1,7 +1,6 @@
 import os
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Tuple
 from typing import Tuple, Union
 
 import numpy as np
@@ -9,12 +8,12 @@
 from SI_Toolkit.computation_library import (ComputationLibrary, NumpyLibrary,
                                             PyTorchLibrary, TensorFlowLibrary,
                                             TensorType)
-from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
+from others.globals_and_utils import load_or_reload_config_if_modified
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
+config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 
 """
 For a controller to be found and imported by CartPoleGUI/DataGenerator it must:
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 7c1a94e..4ebd136 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -2,7 +2,6 @@
 from typing import Optional
 
 from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_trajectory_generator import cartpole_trajectory_generator
-from GUI import gui_default_params
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
 import numpy as np
@@ -18,12 +17,12 @@
 
 from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 
 config_optimizers = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_optimizers.yml")), Loader=yaml.FullLoader)
-config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml")), Loader=yaml.FullLoader)
+config_cost_function = yaml.load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")), Loader=yaml.FullLoader)
 
 
 class controller_mpc(template_controller):
diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py
index 8908115..4637629 100644
--- a/Cost_Functions/__init__.py
+++ b/Cost_Functions/__init__.py
@@ -3,7 +3,7 @@
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, PyTorchLibrary, TensorFlowLibrary, TensorType
 from Control_Toolkit.Controllers import template_controller
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 class cost_function_base:
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index e1c0fd4..6f4e746 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -1,14 +1,11 @@
 from importlib import import_module
 import os
 from SI_Toolkit.computation_library import TensorType
-import yaml
-from copy import deepcopy as dcp
-from types import MappingProxyType
 from Control_Toolkit.Controllers import template_controller
 
 from Control_Toolkit.Cost_Functions import cost_function_base
 from others.globals_and_utils import load_or_reload_config_if_modified, update_attributes
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log=get_logger(__name__)
 
 class CostFunctionWrapper:
diff --git a/Optimizers/optimizer_rpgd_me_tf.py b/Optimizers/optimizer_rpgd_me_tf.py
index 68c80ae..e352781 100644
--- a/Optimizers/optimizer_rpgd_me_tf.py
+++ b/Optimizers/optimizer_rpgd_me_tf.py
@@ -10,7 +10,7 @@
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 
diff --git a/Optimizers/optimizer_rpgd_ml_tf.py b/Optimizers/optimizer_rpgd_ml_tf.py
index f82af1c..c966f99 100644
--- a/Optimizers/optimizer_rpgd_ml_tf.py
+++ b/Optimizers/optimizer_rpgd_ml_tf.py
@@ -10,7 +10,7 @@
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 
diff --git a/Optimizers/optimizer_rpgd_particle_tf.py b/Optimizers/optimizer_rpgd_particle_tf.py
index d560c72..c7f388a 100644
--- a/Optimizers/optimizer_rpgd_particle_tf.py
+++ b/Optimizers/optimizer_rpgd_particle_tf.py
@@ -9,7 +9,7 @@
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 
diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py
index 3b5a0e7..8e04d2f 100644
--- a/Optimizers/optimizer_rpgd_tf.py
+++ b/Optimizers/optimizer_rpgd_tf.py
@@ -1,18 +1,16 @@
 from typing import Tuple
 
-from SI_Toolkit.Functions.TF.Compile import CompileTF
 from SI_Toolkit.computation_library import ComputationLibrary, TensorFlowLibrary
 
 import numpy as np
 import tensorflow as tf
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import get_logger
 from Control_Toolkit.others.globals_and_utils import CompileTF
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 
diff --git a/others/environment.py b/others/environment.py
index 3196e05..9dbef5c 100644
--- a/others/environment.py
+++ b/others/environment.py
@@ -1,13 +1,11 @@
 from typing import Any, Optional, Tuple, Union
 
 import numpy as np
-import tensorflow as tf
-import torch
 
 from SI_Toolkit.computation_library import ComputationLibrary, TensorType
 from gymnasium.spaces import Box
 
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 
diff --git a/others/get_logger.py b/others/get_logger.py
new file mode 100644
index 0000000..ce468fe
--- /dev/null
+++ b/others/get_logger.py
@@ -0,0 +1,50 @@
+import logging
+# general logger for all control/si_toolkit users. Produces nice output format with live hyperlinks for pycharm users
+# to use it, just call log=get_logger(__name__) at the top of your python file
+
+LOGGING_LEVEL = logging.DEBUG # usually INFO is good
+class CustomFormatter(logging.Formatter):
+    """Logging Formatter to add colors and count warning / errors"""
+    # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762
+
+    grey = "\x1b[38;21m"
+    yellow = "\x1b[33;21m"
+    cyan = "\x1b[1;36m" # dark green
+    green = "\x1b[31;21m" # dark green
+    red = "\x1b[31;21m"
+    bold_red = "\x1b[31;1m"
+    light_blue = "\x1b[1;36m"
+    blue = "\x1b[1;34m"
+    reset = "\x1b[0m"
+    # File "{file}", line {max(line, 1)}'.replace("\\", "/")
+    format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
+
+    FORMATS = {
+        logging.DEBUG: light_blue + format + reset,
+        logging.INFO: cyan + format + reset,
+        logging.WARNING: red + format + reset,
+        logging.ERROR: bold_red + format + reset,
+        logging.CRITICAL: bold_red + format + reset
+    }
+
+    def format(self, record):
+        log_fmt = self.FORMATS.get(record.levelno)
+        formatter = logging.Formatter(log_fmt)
+        return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links
+
+
+def get_logger(name):
+    """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL.
+
+    :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it.
+
+    :returns: the logger.
+    """
+    # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+    logger = logging.getLogger(name)
+    logger.setLevel(LOGGING_LEVEL)
+    # create console handler
+    ch = logging.StreamHandler()
+    ch.setFormatter(CustomFormatter())
+    logger.addHandler(ch)
+    return logger
\ No newline at end of file
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index 678e0f2..87fe0c9 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -1,5 +1,4 @@
 import glob
-import logging
 import os
 from datetime import datetime
 from importlib import import_module
@@ -10,7 +9,7 @@
 import torch
 from numpy.random import SFC64, Generator
 from SI_Toolkit.computation_library import ComputationLibrary, NumpyLibrary, TensorFlowLibrary, PyTorchLibrary
-from get_logger import get_logger
+from Control_Toolkit.others.get_logger import get_logger
 log = get_logger(__name__)
 
 
diff --git a/others/timer.py b/others/timer.py
new file mode 100644
index 0000000..b83b6a9
--- /dev/null
+++ b/others/timer.py
@@ -0,0 +1,129 @@
+import atexit
+
+# a useful timer class to measure execution time statistics
+
+from engineering_notation import EngNumber  as eng  # only from pip
+from matplotlib import pyplot as plt
+import numpy as np
+import time
+
+from get_logger import get_logger
+log=get_logger(__name__)
+
+timers = {}
+times = {}
+class timer:
+    def __init__(self, timer_name='', delay=None, show_hist=False, numpy_file=None):
+        """ Make a timer() in a _with_ statement for a block of code.
+        The timer is started when the block is entered and stopped when exited.
+
+        The accumulated timer statistics are automatically printed on exit with atexit (see end of this file).
+
+        The timer _must_ be used in a with statement, e.g.
+
+        while True:
+            timestr = time.strftime("%Y%m%d-%H%M")
+            with timer('overall consumer loop', numpy_file=f'{DATA_FOLDER}/consumer-frame-rate-{timestr}.npy', show_hist=True):
+                with timer('recieve UDP'):
+                    # do something
+
+        timers can be nested
+
+
+        :param timer_name: the str by which this timer is repeatedly called and which it is named when summary is printed on exit
+        :param delay: set this to a value to simply accumulate this externally determined interval
+        :param show_hist: whether to plot a histogram with pyplot
+        :param numpy_file: optional numpy file path
+        """
+        self.timer_name = timer_name
+        self.show_hist = show_hist
+        self.numpy_file = numpy_file
+        self.delay=delay
+
+        if self.timer_name not in timers.keys():
+            timers[self.timer_name] = self
+        if self.timer_name not in times.keys():
+            times[self.timer_name]=[]
+
+    def __enter__(self):
+        if self.delay is None:
+            self.start = time.time()
+        return self
+
+    def __exit__(self, *args):
+        if self.delay is None:
+            self.end = time.time()
+            self.interval = self.end - self.start  # measured in seconds
+        else:
+            self.interval=self.delay
+        times[self.timer_name].append(self.interval)
+
+    def print_timing_info(self, logger=None):
+        """ Prints the timing information accumulated for this timer
+
+        :param logger: write to the supplied logger, otherwise use the built-in logger
+        """
+        if len(times)==0:
+            log.error(f'Timer {self.timer_name} has no statistics; was it used without a "with" statement?')
+            return
+        a = np.array(times[self.timer_name])
+        timing_mean = np.mean(a) # todo use built in print method for timer
+        timing_std = np.std(a)
+        timing_median = np.median(a)
+        timing_min = np.min(a)
+        timing_max = np.max(a)
+        s='{} n={}: {}s +/- {}s (median {}s, min {}s max {}s)'.format(self.timer_name, len(a),
+                                                                      eng(timing_mean), eng(timing_std),
+                                                                      eng(timing_median), eng(timing_min),
+                                                                      eng(timing_max))
+
+        if logger is not None:
+            logger.info(s)
+        else:
+            log.info(s)
+
+def print_timing_info():
+    for k,v in times.items():  # k is the name, v is the list of times
+        a = np.array(v)
+        timing_mean = np.mean(a)
+        timing_std = np.std(a)
+        timing_median = np.median(a)
+        timing_min = np.min(a)
+        timing_max = np.max(a)
+        log.info('== Timing statistics from all Timer ==\n{} n={}: {}s +/- {}s (median {}s, min {}s max {}s)'.format(k, len(a),
+                                                                          eng(timing_mean), eng(timing_std),
+                                                                          eng(timing_median), eng(timing_min),
+                                                                          eng(timing_max)))
+        if timers[k].numpy_file is not None:
+            try:
+                log.info(f'saving timing data for {k} in numpy file {timers[k].numpy_file}')
+                log.info('there are {} times'.format(len(a)))
+                np.save(timers[k].numpy_file, a)
+            except Exception as e:
+                log.error(f'could not save numpy file {timers[k].numpy_file}; caught {e}')
+
+        if timers[k].show_hist:
+
+            def plot_loghist(x, bins):
+                hist, bins = np.histogram(x, bins=bins) # histogram x linearly
+                if len(bins)<2 or bins[0]<=0:
+                    log.error(f'cannot plot histogram since bins={bins}')
+                    return
+                logbins = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), len(bins)) # use resulting bin ends to get log bins
+                plt.hist(x, bins=logbins) # now again histogram x, but with the log-spaced bins, and plot this histogram
+                plt.xscale('log')
+
+            dt = np.clip(a,1e-6, None)
+            # logbins = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), len(bins))
+            try:
+                plot_loghist(dt,bins=100)
+                plt.xlabel('interval[ms]')
+                plt.ylabel('frequency')
+                plt.title(k)
+                plt.show()
+            except Exception as e:
+                log.error(f'could not plot histogram: got {e}')
+
+
+# this will print all the timer values upon termination of any program that imported this file
+atexit.register(print_timing_info)
diff --git a/others/yes_or_no.py b/others/yes_or_no.py
new file mode 100644
index 0000000..14a53ca
--- /dev/null
+++ b/others/yes_or_no.py
@@ -0,0 +1,58 @@
+import signal
+def alarm_handler(signum, frame):
+    raise TimeoutError
+def input_with_timeout(prompt, timeout=30):
+    """ get input with timeout
+
+    :param prompt: the prompt to print
+    :param timeout: timeout in seconds, or None to disable
+
+    :returns: the input
+    :raises: TimeoutError if times out
+    """
+    # set signal handler
+    if timeout is not None:
+        signal.signal(signal.SIGALRM, alarm_handler)
+        signal.alarm(timeout) # produce SIGALRM in `timeout` seconds
+    try:
+        time.sleep(.5) # get input to be printed after logging
+        return input(prompt)
+    except TimeoutError as to:
+        raise to
+    finally:
+        if timeout is not None:
+            signal.alarm(0) # cancel alarm
+
+def yes_or_no(question, default='y', timeout=None):
+    """ Get y/n answer with default choice and optional timeout
+
+    :param question: prompt
+    :param default: the default choice, i.e. 'y' or 'n'
+    :param timeout: the timeout in seconds, default is None
+
+    :returns: True or False
+    """
+    if default is not None and (default!='y' and default!='n'):
+        log.error(f'bad option for default: {default}')
+        quit(1)
+    y='Y' if default=='y' else 'y'
+    n='N' if default=='n' else 'n'
+    while "the answer is invalid":
+        try:
+            to_str='' if timeout is None or os.name=='nt' else f'(Timeout {default} in {timeout}s)'
+            if os.name=='nt':
+                log.warning('cannot use timeout signal on windows')
+                time.sleep(.1) # make the warning come out first
+                reply=str(input(f'{question} {to_str} ({y}/{n}): ')).lower().strip()
+            else:
+                reply = str(input_with_timeout(f'{question} {to_str} ({y}/{n}): ',timeout=timeout)).lower().strip()
+        except TimeoutError:
+            log.warning(f'timeout expired, returning default={default} answer')
+            reply=''
+        if len(reply)==0 or reply=='':
+            return True if default=='y' else False
+        elif reply[0].lower() == 'y':
+            return True
+        if reply[0].lower() == 'n':
+            return False
+

From f0ec1d115892f57e875b0a8f8d63bcc5728f31b3 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Fri, 10 Feb 2023 06:31:59 +0100
Subject: [PATCH 24/38] cartpole_dancer.py starts to work. Music starts and
 stops, some steps seem to work. mppi parameters copied from development. cost
 weights adjusted. controller update interval inccreased to 25ms from 20ms to
 match simulator and actual achieved rate with dancer mppi. mppi rollouts
 reduced to 700 to speed up control. PhysicalCartPoleDriver.py has class self
 variable for reference from other classes to the instance. Compile.py raises
 exception when either option is undefined.

---
 Controllers/__init__.py | 2 +-
 others/get_logger.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index ccb7340..b660eb1 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -124,7 +124,7 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str
         Execute one timestep of control.
 
         :param state: the state array, dimensions are TODO add dimension to help users
-        :param time: the time in seconds
+        :param time: the time now in seconds
         :param updated_attributes: dict of updated attributes
 
         :returns: the next control action u e.g. a normed control input in the range [-1,1] TODO is this correct?
diff --git a/others/get_logger.py b/others/get_logger.py
index ce468fe..8973ea3 100644
--- a/others/get_logger.py
+++ b/others/get_logger.py
@@ -9,7 +9,7 @@ class CustomFormatter(logging.Formatter):
 
     grey = "\x1b[38;21m"
     yellow = "\x1b[33;21m"
-    cyan = "\x1b[1;36m" # dark green
+    cyan = "\x1b[1;36m" #
     green = "\x1b[31;21m" # dark green
     red = "\x1b[31;21m"
     bold_red = "\x1b[31;1m"
@@ -20,7 +20,7 @@ class CustomFormatter(logging.Formatter):
     format = '[%(levelname)s]: %(asctime)s - %(name)s - %(message)s (File "%(pathname)s", line %(lineno)d, in %(funcName)s)'
 
     FORMATS = {
-        logging.DEBUG: light_blue + format + reset,
+        logging.DEBUG: grey + format + reset,
         logging.INFO: cyan + format + reset,
         logging.WARNING: red + format + reset,
         logging.ERROR: bold_red + format + reset,

From 5d13e63e48c47fefc45b63cf4136c25776632e54 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Sat, 11 Feb 2023 14:01:59 +0100
Subject: [PATCH 25/38] improved control slightly by adding back more cost
 terms to provide some damping. But balancing still not working very well.
 Cannot follow cart position well. gets into endless repeats of trying to
 swing up. Fixed increment and dec of target position. Reduced control period
 back to 20ms from 25ms. added a controller reset to controller_mpc.py that
 stops the dance song. added a return to center function in
 PhysicalCartPoleDriver.py with key "C"

---
 Controllers/controller_mpc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 4ebd136..1eb61ca 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -143,4 +143,6 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str
 
     def controller_reset(self):
         self.optimizer.optimizer_reset()
+        self.cartpole_trajectory_generator.reset()
+
         
\ No newline at end of file

From e2162fdae3d341536fd6c144a1ff2598a2982773 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Sun, 12 Feb 2023 11:37:00 +0100
Subject: [PATCH 26/38] added some docstrings, but they are not very
 informative setting up to measure model mismatch in controller_mpc.py, not
 implemented yet.

---
 Controllers/__init__.py       | 2 ++
 Controllers/controller_mpc.py | 8 ++++++--
 README.md                     | 2 ++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index b660eb1..245a7e0 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -196,6 +196,8 @@ def get_outputs(self) -> "dict[str, np.ndarray]":
         }
         
     def update_logs(self, logging_values: "dict[str, TensorType]") -> None:
+        """ Appends controller logging information to memory in self.logs if self.controller_logging exists, according to self.save_vars
+        """
         if self.controller_logging:
             for name, var in zip(
                 self.save_vars, [logging_values.get(var_name, None) for var_name in self.save_vars]
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 1eb61ca..9ef1ffb 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -92,7 +92,7 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
             self.step = self.step
 
         
-    def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, TensorType]" = {})->float:
         """ Compute one step of control.
 
         :param state: the current state as 1d state vector
@@ -136,8 +136,12 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str
                             update_attributes(updated_attributes,o)
                 log.debug(f'updated {objs} with scalar updated_attributes {updated_attributes}')
 
-
+        # obtain the next control action from the optimizer
         u = self.optimizer.step(state, time)
+
+        # for analysis of model mismatch, obtain the prediction of next state with this control action
+
+
         self.update_logs(self.optimizer.logging_values)
         return u
 
diff --git a/README.md b/README.md
index 6d712ac..d7d2abe 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,8 @@ The toolkit provides a uniform interface to log values in the controller. These
 
 The `controller_mpc.step` method takes the `optimizer.logging_values` dictionary and copies it to its `controller_mpc.logs` dictionary in each step. The `template_controller` has two related attributes: `controller_logging` and `save_vars`. If the former is `true`, then the controller populates the fields of `save_vars` in the `template_controller.logs` dictionary with values if your controller calls `update_logs` within the `step` method.
 
+The resulting dict (stored in RAM during runtime) is written out to disk on exit.
+
 
 ## Examples of Application-Specific Controllers
 

From 518a00f3cfb51cc3981619d41e0655f769e0c7c6 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Mon, 13 Feb 2023 17:52:08 +0100
Subject: [PATCH 27/38] added prediction and target trajectory to logging to
 allow model mismatch error measurement. updated jupyter script to use
 filedialog to select csv. reverted the mysterious lowpass filter in the cart
 target position so that the position is changed instantaneously. added
 partially working M key command to cartpole_trajectory_generator.py to allow
 switching dance step from keyboard. added keyboard_input() and
 print_keyboard_help() to template_controller and implemented it in
 controller_mpc.py and cartpole_trajectory_generator.py. added easygui to
 requirements.txt. added warning to __init__.py in SI_Toolkit to warn about
 slow tensorflow eager mode. moved
 is_physical_cartpole_runnning_and_control_enabled to CartPole.__init__.

---
 Controllers/__init__.py       | 10 ++++++++++
 Controllers/controller_mpc.py | 14 ++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index 245a7e0..f5c0e2f 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -206,3 +206,13 @@ def update_logs(self, logging_values: "dict[str, TensorType]") -> None:
                     self.logs[name].append(
                         var.numpy().copy() if hasattr(var, "numpy") else var.copy()
                     )
+
+    def keyboard_input(self, c):
+        """ process keyboard input character. Default implementation does nothing.
+        :param c: character
+        """
+        pass
+
+    def print_keyboard_help(self):
+        """ Print help for keybaord input"""
+        pass
diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 9ef1ffb..7128051 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -40,7 +40,7 @@ def __init__(
     ):
         super().__init__(dt, environment_name, num_states, num_control_inputs, control_limits,
                          initial_environment_attributes)
-        self.cartpole_trajectory_generator = cartpole_trajectory_generator()
+        self.cartpole_trajectory_generator:cartpole_trajectory_generator = cartpole_trajectory_generator()
 
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
         if optimizer_name in {None, ""}:
@@ -149,4 +149,14 @@ def controller_reset(self):
         self.optimizer.optimizer_reset()
         self.cartpole_trajectory_generator.reset()
 
-        
\ No newline at end of file
+    def keyboard_input(self, c):
+      try:
+          self.cartpole_trajectory_generator.keyboard_input(c)
+      except AttributeError:
+          pass
+
+    def print_keyboard_help(self):
+        try:
+            self.cartpole_trajectory_generator.print_keyboard_help()
+        except AttributeError:
+            pass

From 9190c2aa5b2ae1373073f38b45f6625852bda7cc Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Tue, 14 Feb 2023 17:44:20 +0100
Subject: [PATCH 28/38] add computation of pole natural frequency to
 p_globals.py. remove terminal cost in cartpole_dancer_cost.py to move track
 edge barrier cost inside stage costs function where it is easier to manage.
 comment out terminal cost computation in Control_Toolkit - note this will
 break cost functions that use it.

---
 Cost_Functions/__init__.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/Cost_Functions/__init__.py b/Cost_Functions/__init__.py
index 4637629..2a26ea6 100644
--- a/Cost_Functions/__init__.py
+++ b/Cost_Functions/__init__.py
@@ -81,9 +81,14 @@ def get_trajectory_cost(self, state_horizon: TensorType, inputs: TensorType, pre
         :return: The summed cost of the trajectory. Has shape [batch_size].
         :rtype: TensorType
         """
-        stage_costs = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input)  # Select all but last state of the horizon
-        terminal_cost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1))
-        total_cost = self.lib.mean(self.lib.concat([stage_costs, terminal_cost], 1), 1)  # Average across the MPC horizon dimension
+        # Select all but last state of the horizon
+        # stages costs has dimension [num_rollouts, horizon, states]
+        stage_costs = self.get_stage_cost(state_horizon[:, :-1, :], inputs, previous_input)
+        # select last states of horizon for all rollouts
+        # compute the cost of these terminal states, result is ???
+        # terminal_cost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1))
+        # total_cost = self.lost = self.lib.reshape(self.get_terminal_cost(state_horizon[:, -1, :]), (-1, 1))
+        total_cost = self.lib.mean(stage_costs, 1)  # Average across the MPC horizon dimension to leave a 1-d vector of num_rollouts dimension
         return total_cost
 
     def set_computation_library(self, ComputationLib: "type[ComputationLibrary]"):

From 6b8a67673b4ff5c3a95677e3dd51fa7a116bd0a7 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Thu, 16 Feb 2023 14:43:47 +0100
Subject: [PATCH 29/38] added 'cartwheel' step to
 cartpole_trajectory_generator.py. commented out some noisy log.debug. made
 horizon same as physical cartpole

---
 Controllers/controller_mpc.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 7128051..8100efa 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -45,8 +45,7 @@ def __init__(
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
         if optimizer_name in {None, ""}:
             optimizer_name = str(self.config_controller["optimizer"])
-            log.info(f'Using optimizer "{optimizer_name}" specified in controller config file')
-            log.info(f"Using optimizer {optimizer_name} specified in controller config file")
+            log.info(f'Using optimizer \'{optimizer_name}\' specified in controller config file')
         if predictor_specification in {None, ""}:
             predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None)
             log.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file')

From 039d385b71602071d84af3d8436b5325ad71dad7 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Sun, 19 Feb 2023 11:05:17 +0100
Subject: [PATCH 30/38] added warning for cart calibration.

---
 others/yes_or_no.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/others/yes_or_no.py b/others/yes_or_no.py
index 14a53ca..bf2aa32 100644
--- a/others/yes_or_no.py
+++ b/others/yes_or_no.py
@@ -1,4 +1,8 @@
+# useful utilies to ask question at console terminal with default answer and timeout
+
 import signal
+import time
+
 def alarm_handler(signum, frame):
     raise TimeoutError
 def input_with_timeout(prompt, timeout=30):

From ed2c1180807ebe6b8abafa47ae030566d42a2488 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Sun, 19 Feb 2023 20:50:18 +0100
Subject: [PATCH 31/38] fixed some logic and reduced some loggers  to debug
 level updated satisfaction CSV to use markers from audition and added more
 cartsheels

---
 Cost_Functions/cost_function_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index 6f4e746..134842f 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -75,7 +75,7 @@ def configure(
 
         config=self.cost_function_config['CartPole'][self.cost_function_name] # todo hardcoded 'CartPole' has to go, not sure how to determine it, maybe from module folder?
         update_attributes(config, self.cost_function)
-        log.info(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}')
+        log.debug(f'configured controller {controller.__class__.__name__} with cost function {self.cost_function.__class__}')
 
     def update_cost_function_name_from_specification(
         self, environment_name: str, cost_function_specification: str = None

From a16b45420ca15cf829d7ba647c0c9fa588d34cff Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Mon, 20 Feb 2023 10:59:19 +0100
Subject: [PATCH 32/38] fixed shimmy math. fixed import of CompileTF in
 optimizer_rpgd_tf.py. other small code changes and comments

---
 Optimizers/optimizer_rpgd_tf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py
index 8e04d2f..1a2429a 100644
--- a/Optimizers/optimizer_rpgd_tf.py
+++ b/Optimizers/optimizer_rpgd_tf.py
@@ -6,7 +6,7 @@
 import tensorflow as tf
 from Control_Toolkit.Cost_Functions.cost_function_wrapper import CostFunctionWrapper
 from Control_Toolkit.Optimizers import template_optimizer
-from Control_Toolkit.others.globals_and_utils import CompileTF
+from SI_Toolkit.Functions.TF.Compile import CompileTF
 from Control_Toolkit.others.Interpolator import Interpolator
 from SI_Toolkit.Predictors.predictor_wrapper import PredictorWrapper
 
@@ -15,6 +15,7 @@
 
 
 class optimizer_rpgd_tf(template_optimizer):
+    """ The tensorflow (tf) version of the Resampling Parallel Gradient Descent (RPGD) optimizer reported in ICRA paper """
     supported_computation_libraries = {TensorFlowLibrary}
     
     def __init__(

From ea33a0024f7fe20b15aca31129ac4312d7bbe725 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Tue, 21 Feb 2023 14:04:39 +0100
Subject: [PATCH 33/38] improved console reporting of current objective and
 logging output so that it is shown in live console display instead of
 scrolling away. fixed cartonly trajectory so that cartonly works with
 sawtooth much better. changed PhysicalCartPoleDriver.py logger so that the
 handler logs with \r like other status indicators. added another exception
 handler in globals_and_utils.py to handle AttributeErrors in config files.
 Optimized shimmy amplitudes in satisfaction

---
 Controllers/controller_mpc.py | 2 +-
 others/get_logger.py          | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 8100efa..1d8d65f 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -40,7 +40,7 @@ def __init__(
     ):
         super().__init__(dt, environment_name, num_states, num_control_inputs, control_limits,
                          initial_environment_attributes)
-        self.cartpole_trajectory_generator:cartpole_trajectory_generator = cartpole_trajectory_generator()
+        self.cartpole_trajectory_generator:cartpole_trajectory_generator = cartpole_trajectory_generator() # TODO awkward to add this specific cost function generator here to generic controller_mpc
 
     def configure(self, optimizer_name: Optional[str]=None, predictor_specification: Optional[str]=None):
         if optimizer_name in {None, ""}:
diff --git a/others/get_logger.py b/others/get_logger.py
index 8973ea3..5cd5049 100644
--- a/others/get_logger.py
+++ b/others/get_logger.py
@@ -33,10 +33,11 @@ def format(self, record):
         return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links
 
 
-def get_logger(name):
+def get_logger(name=None):
     """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL.
 
     :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it.
+        If no name is supplied, it defaults to root logger which may turn on a LOT of undesired logging output.
 
     :returns: the logger.
     """

From 6e8ef5157f29206a265522db14f26da377df6653 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Tue, 21 Feb 2023 15:33:40 +0100
Subject: [PATCH 34/38] improved logging output to make debug logger light
 gray, include filename in console logger for logging during control

---
 others/get_logger.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/others/get_logger.py b/others/get_logger.py
index 5cd5049..1d9d916 100644
--- a/others/get_logger.py
+++ b/others/get_logger.py
@@ -7,9 +7,11 @@ class CustomFormatter(logging.Formatter):
     """Logging Formatter to add colors and count warning / errors"""
     # see https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output/7995762#7995762
 
-    grey = "\x1b[38;21m"
+    # \x1b[ (ESC[) is the CSI introductory sequence for ANSI https://en.wikipedia.org/wiki/ANSI_escape_code
+    # The control sequence CSI n m, named Select Graphic Rendition (SGR), sets display attributes.
+    grey = "\x1b[2;37m" # 2 faint, 37 gray
     yellow = "\x1b[33;21m"
-    cyan = "\x1b[1;36m" #
+    cyan = "\x1b[1;36m" # 1 bold 36 cyan
     green = "\x1b[31;21m" # dark green
     red = "\x1b[31;21m"
     bold_red = "\x1b[31;1m"
@@ -33,16 +35,15 @@ def format(self, record):
         return formatter.format(record).replace("\\", "/") #replace \ with / for pycharm links
 
 
-def get_logger(name=None):
+def get_logger(name='ControlToolkit'):
     """ Use get_logger to define a logger with useful color output and info and warning turned on according to the global LOGGING_LEVEL.
 
-    :param name: the name of this logger. Use __name__ to give it the name of the module that instantiates it.
-        If no name is supplied, it defaults to root logger which may turn on a LOT of undesired logging output.
+    :param name: ignored -- all loggers here have the name 'ControlToolkit' so that all can be affected uniformly
 
     :returns: the logger.
     """
     # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-    logger = logging.getLogger(name)
+    logger = logging.getLogger('ControlToolkit') # tobi changed so all have same name so we can uniformly affect all of them
     logger.setLevel(LOGGING_LEVEL)
     # create console handler
     ch = logging.StreamHandler()

From e894dfce1a2594a18f2cf4f4476f367885ab0250 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Wed, 22 Feb 2023 08:46:26 +0100
Subject: [PATCH 35/38] fixed get_logger.py that now uses a single logger name
 to only add the handler once

---
 others/get_logger.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/others/get_logger.py b/others/get_logger.py
index 1d9d916..6546b15 100644
--- a/others/get_logger.py
+++ b/others/get_logger.py
@@ -1,6 +1,7 @@
 import logging
 # general logger for all control/si_toolkit users. Produces nice output format with live hyperlinks for pycharm users
-# to use it, just call log=get_logger(__name__) at the top of your python file
+# to use it, just call log=get_logger() at the top of your python file
+# all these loggers share the same logger name 'Control_Toolkit'
 
 LOGGING_LEVEL = logging.DEBUG # usually INFO is good
 class CustomFormatter(logging.Formatter):
@@ -11,9 +12,9 @@ class CustomFormatter(logging.Formatter):
     # The control sequence CSI n m, named Select Graphic Rendition (SGR), sets display attributes.
     grey = "\x1b[2;37m" # 2 faint, 37 gray
     yellow = "\x1b[33;21m"
-    cyan = "\x1b[1;36m" # 1 bold 36 cyan
+    cyan = "\x1b[0;36m" # 0 normal 36 cyan
     green = "\x1b[31;21m" # dark green
-    red = "\x1b[31;21m"
+    red = "\x1b[31;21m" # bold red
     bold_red = "\x1b[31;1m"
     light_blue = "\x1b[1;36m"
     blue = "\x1b[1;34m"
@@ -45,8 +46,9 @@ def get_logger(name='ControlToolkit'):
     # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
     logger = logging.getLogger('ControlToolkit') # tobi changed so all have same name so we can uniformly affect all of them
     logger.setLevel(LOGGING_LEVEL)
-    # create console handler
-    ch = logging.StreamHandler()
-    ch.setFormatter(CustomFormatter())
-    logger.addHandler(ch)
+    # create console handler if this logger does not have handler yet
+    if len(logger.handlers)==0:
+        ch = logging.StreamHandler()
+        ch.setFormatter(CustomFormatter())
+        logger.addHandler(ch)
     return logger
\ No newline at end of file

From 3489a44996c524b33ffaa0a8c431ac79246852a5 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Thu, 23 Feb 2023 20:08:21 +0100
Subject: [PATCH 36/38] reduced chatter in logging

---
 Controllers/controller_mpc.py           | 4 ++--
 Cost_Functions/cost_function_wrapper.py | 4 ++--
 others/globals_and_utils.py             | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index 1d8d65f..db1571b 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -48,13 +48,13 @@ def configure(self, optimizer_name: Optional[str]=None, predictor_specification:
             log.info(f'Using optimizer \'{optimizer_name}\' specified in controller config file')
         if predictor_specification in {None, ""}:
             predictor_specification: Optional[str] = self.config_controller.get("predictor_specification", None)
-            log.info(f'Using predictor_specification="{predictor_specification}" specified in controller config file')
+            log.info(f"Using predictor_specification='{predictor_specification}' specified in controller config file")
 
         config_optimizer = config_optimizers[optimizer_name]
 
         # Create cost function
         cost_function_specification = self.config_controller.get("cost_function_specification", None)
-        log.info(f'using cost_function_specification="{cost_function_specification}" in config {self.config_controller}')
+        log.info(f'using cost_function_specification=\'{cost_function_specification}\' in config {self.config_controller}')
         self.cost_function_wrapper = CostFunctionWrapper()
         self.cost_function_wrapper.configure(self, cost_function_specification=cost_function_specification)
 
diff --git a/Cost_Functions/cost_function_wrapper.py b/Cost_Functions/cost_function_wrapper.py
index 134842f..b484190 100644
--- a/Cost_Functions/cost_function_wrapper.py
+++ b/Cost_Functions/cost_function_wrapper.py
@@ -28,7 +28,7 @@ def __init__(self):
         self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default
         self.lib = None # filled by configure(), needed to update TF variables
 
-        log.info(f'default cost function name is {self.cost_function_name_default}')
+        log.debug(f'default cost function name is {self.cost_function_name_default}')
         # cost_function config
         (self.cost_function_config, _) = load_or_reload_config_if_modified(
             os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"),search_path=['CartPoleSimulation'])
@@ -36,7 +36,7 @@ def __init__(self):
         self.cost_function_name_default: str = self.cost_function_config.cost_function_name_default
         self.lib = None # filled by configure(), needed to update TF variables
 
-        log.info(f'default cost function name is {self.cost_function_name_default}')
+        log.debug(f'default cost function name is {self.cost_function_name_default}')
 
     def configure(
         self,
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index 87fe0c9..7cf8b9d 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -100,7 +100,7 @@ def import_controller_by_name(controller_name: str) -> type:
     assert len(controller_relative_paths) == 1, f"Controller {controller_full_name} must be in a unique location. {len(controller_relative_paths)} found."
     controller_relative_path = controller_relative_paths[0]
 
-    log.info(f"Importing controller from {controller_relative_path}")
+    log.debug(f"Importing controller from {controller_relative_path}")
     
     Controller: type = getattr(import_module(controller_relative_path.replace(".py", "").replace(os.sep, ".")), controller_full_name)
     return Controller

From a1a36db4aa8dac2a81f51c62913d47fc4ee82a57 Mon Sep 17 00:00:00 2001
From: Tobi Delbruck <todelbru@ethz.ch>
Date: Tue, 28 Feb 2023 11:42:07 +0100
Subject: [PATCH 37/38] major changes to cartpole_dancer_cost and
 cartpole_trajectory_generator for compatibilty with RPGD gradient
 computation. update_attributes in globals_and_utils.py supports assignment of
 tf.Variable and now raises exception if we try to .assign an immutable
 Tensor. Balance step now uses int +1 or -1 for desired pole up or down, not
 'up' or 'down'. computation_library.py now casts variale to numpy() from
 tensorflow only if it is a Tensor type.

---
 Controllers/controller_mpc.py   | 6 +-----
 Optimizers/optimizer_rpgd_tf.py | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/Controllers/controller_mpc.py b/Controllers/controller_mpc.py
index db1571b..056b6d8 100644
--- a/Controllers/controller_mpc.py
+++ b/Controllers/controller_mpc.py
@@ -109,12 +109,8 @@ def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str
         cost_function=self.cost_function_wrapper.cost_function
         update_attributes(updated_attributes,cost_function) # update target_position and target_equilibrium in cost function to use
         updated_attributes.clear()
-        target_trajectory = \
-            self.cartpole_trajectory_generator.\
+        self.cartpole_trajectory_generator.\
                 generate_cartpole_trajectory(time=time, state=state, controller=self, cost_function=self.cost_function_wrapper.cost_function)
-        updated_attributes['target_trajectory']=target_trajectory
-        update_attributes(updated_attributes,cost_function) # update
-        updated_attributes.clear()
 
         # now we fill this dict with config file changes if there are any and update attributes in the controller, the cost function, and the optimizer
         # detect any changes in config scalar values and pass to this controller or the cost function or optimizer
diff --git a/Optimizers/optimizer_rpgd_tf.py b/Optimizers/optimizer_rpgd_tf.py
index 1a2429a..3e1a2cf 100644
--- a/Optimizers/optimizer_rpgd_tf.py
+++ b/Optimizers/optimizer_rpgd_tf.py
@@ -78,7 +78,7 @@ def __init__(
         self.Interpolator = Interpolator(self.mpc_horizon, period_interpolation_inducing_points,
                                          self.num_control_inputs, self.lib)
 
-        self.opt = tf.keras.optimizers.Adam(
+        self.opt:tf.keras.optimizers.Adam = tf.keras.optimizers.Adam(
             learning_rate=learning_rate,
             beta_1=adam_beta_1,
             beta_2=adam_beta_2,

From 807c46de404c542c4a074b99645be8e7c8397be0 Mon Sep 17 00:00:00 2001
From: tobidelbruck <todelbru@ethz.ch>
Date: Sat, 13 May 2023 14:52:02 +0200
Subject: [PATCH 38/38] initial commit of Shreyan's code for energy-based
 controller for cartpole balancing

---
 Controllers/__init__.py     | 8 ++++++++
 others/globals_and_utils.py | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/Controllers/__init__.py b/Controllers/__init__.py
index f5c0e2f..c078b49 100644
--- a/Controllers/__init__.py
+++ b/Controllers/__init__.py
@@ -118,6 +118,14 @@ def configure(self, **kwargs):
         # In your controller, implement any additional initialization steps here
         pass
 
+    def update_attributes(self, updated_attributes: "dict[str,TensorType]"):
+        for property, new_value in updated_attributes.items():
+            try:
+                attr = getattr(self.variable_parameters, property)
+                self.computation_library.assign(attr, self.lib.to_tensor(new_value, attr.dtype))
+            except:
+                setattr(self.variable_parameters, property, new_value)
+
     @abstractmethod
     def step(self, state: np.ndarray, time:float=None, updated_attributes: "dict[str, Union[TensorType,float]]" = dict()):
         """
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index 7cf8b9d..57a7914 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -159,7 +159,7 @@ def get_controller_name(controller_names=None, controller_name=None, controller_
         try:
             controller_idx = controller_names.index(controller_name)
         except ValueError:
-            print('{} is not in list. \n In list are: {}'.format(controller_name, controller_names))
+            log.error('{} is not in list. \n In list are: {}'.format(controller_name, controller_names))
             return None
     else:
         controller_name = controller_names[controller_idx]