kscalelabs · budzianowski · Oct 31, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 30, 2024
diff --git a/examples/original_walking.pt b/examples/original_walking.pt
diff --git a/examples/walking_pro.onnx b/examples/walking_pro.onnx
diff --git a/examples/walking_pro.pt b/examples/walking_pro.pt
diff --git a/policy_1.pt b/policy_1.pt
diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
@@ -102,11 +102,17 @@ def post_physics_step(self):
         # TODO(pfb30) - debug this
         origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1)
         origin = quat_conjugate(origin)
-        self.base_quat[:] = quat_mul(origin, self.root_states[:, 3:7])
-        self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
 
-        self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
-        self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+        if self.imu_indices:
+            self.base_quat = quat_mul(origin, self.rigid_state[:, self.imu_indices, 3:7])
+            self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10])
+            self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13])
+        else:
+            self.base_quat = self.root_states[:, 3:7]
+            self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
+            self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+
+        self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
         self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec)
 
         self._post_physics_step_callback()
@@ -190,7 +196,10 @@ def reset_idx(self, env_ids):
         # TODO(pfb30) - debug this
         origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1)
         origin = quat_conjugate(origin)
-        self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.root_states[env_ids, 3:7])
+        if self.imu_indices:
+            self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.rigid_state[env_ids, self.imu_indices, 3:7])
+        else:
+            self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.root_states[env_ids, 3:7])
 
         self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
         self.projected_gravity[env_ids] = quat_rotate_inverse(self.base_quat[env_ids], self.gravity_vec[env_ids])
@@ -363,7 +372,6 @@ def _compute_torques(self, actions):
         torques = p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos) - d_gains * self.dof_vel
 
         res = torch.clip(torques, -self.torque_limits, self.torque_limits)
-
         return res
 
     def _reset_dofs(self, env_ids):
@@ -481,17 +489,24 @@ def _init_buffers(self):
         self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
         self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0]
         self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1]
-        # self.base_quat = self.root_states[:, 3:7]
+
+        self.rigid_state = gymtorch.wrap_tensor(rigid_body_state)  # .view(self.num_envs, -1, 13)
+        self.rigid_state = self.rigid_state.view(self.num_envs, -1, 13)
         # TODO(pfb30): debug this
+        # self.base_quat = self.root_states[:, 3:7]
         origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1)
         origin = quat_conjugate(origin)
-        self.base_quat = quat_mul(origin, self.root_states[:, 3:7])
+
+        if self.imu_indices:
+            self.base_quat = quat_mul(origin, self.rigid_state[:, self.imu_indices, 3:7])
+        else:
+            self.base_quat = quat_mul(origin, self.root_states[:, 3:7])
+
         self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
 
         self.contact_forces = gymtorch.wrap_tensor(net_contact_forces).view(
             self.num_envs, -1, 3
         )  # shape: num_envs, num_bodies, xyz axis
-        self.rigid_state = gymtorch.wrap_tensor(rigid_body_state).view(self.num_envs, -1, 13)
 
         # initialize some data used later on
         self.common_step_counter = 0
@@ -537,8 +552,13 @@ def _init_buffers(self):
         self.last_contacts = torch.zeros(
             self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device, requires_grad=False
         )
-        self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
-        self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+        if self.imu_indices:
+            self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10])
+            self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13])
+        else:
+            self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
+            self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+
         self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec)
         if self.cfg.terrain.measure_heights:
             self.height_points = self._init_height_points()
@@ -592,6 +612,7 @@ def _prepare_reward_function(self):
         # prepare list of functions
         self.reward_functions = []
         self.reward_names = []
+
         for name, scale in self.reward_scales.items():
             if name == "termination":
                 continue
@@ -696,6 +717,7 @@ def _create_envs(self):
         self.num_dofs = len(self.dof_names)
         feet_names = [s for s in body_names if s in self.cfg.asset.foot_name]
         knee_names = [s for s in body_names if s in self.cfg.asset.knee_name]
+
         penalized_contact_names = []
         for name in self.cfg.asset.penalize_contacts_on:
             penalized_contact_names.extend([s for s in body_names if name in s])
@@ -753,8 +775,7 @@ def _create_envs(self):
         print("feet", self.feet_indices)
         print(f"Processed body properties for env {i}:")
         for j, prop in enumerate(body_props):
-            print(f"  Body {j} mass: {prop.mass}")
-            # prop.mass = prop.mass * 1e7
+            print(f" Body {j} mass: {prop.mass}")
             print(f" Body {j} inertia: {prop.inertia.x}, {prop.inertia.y}, {prop.inertia.z}")
 
         self.knee_indices = torch.zeros(len(knee_names), dtype=torch.long, device=self.device, requires_grad=False)
@@ -763,6 +784,12 @@ def _create_envs(self):
                 self.envs[0], self.actor_handles[0], knee_names[i]
             )
 
+        self.imu_indices = self.gym.find_actor_rigid_body_handle(
+            self.envs[0], self.actor_handles[0], self.cfg.asset.imu_name
+        )
+        if self.imu_indices == -1:
+            self.imu_indices = None
+
         self.penalised_contact_indices = torch.zeros(
             len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False
         )

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
@@ -124,7 +124,10 @@ class safety:
     class asset:
         file = ""
         name = "legged_robot"  # actor name
-        foot_name = "None"  # name of the feet bodies, used to index body state and contact force tensors
+        foot_name = ["None"]  # name of the feet bodies, used to index body state and contact force tensors
+        knee_name = ["None"]
+        imu_name = None
+
         penalize_contacts_on = []
         terminate_after_contacts_on = []
         disable_gravity = False

diff --git a/sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompypro_config.py
@@ -7,7 +7,7 @@
 )
 from sim.resources.stompypro.joints import Robot
 
-NUM_JOINTS = len(Robot.all_joints())  # 12
+NUM_JOINTS = len(Robot.all_joints())
 
 
 class StompyProCfg(LeggedRobotCfg):
@@ -39,6 +39,7 @@ class asset(LeggedRobotCfg.asset):
 
         foot_name = ["L_foot", "R_foot"]
         knee_name = ["L_calf", "R_calf"]
+        imu_name = "imu_link"
 
         termination_height = 0.2
         default_feet_height = 0.0
@@ -50,8 +51,8 @@ class asset(LeggedRobotCfg.asset):
         fix_base_link = False
 
     class terrain(LeggedRobotCfg.terrain):
-        mesh_type = "plane"
-        # mesh_type = 'trimesh'
+        # mesh_type = "plane"
+        mesh_type = "trimesh"
         curriculum = False
         # rough terrain only:
         measure_heights = False
@@ -161,9 +162,9 @@ class rewards:
 
         class scales:
             # reference motion tracking
-            joint_pos = 1.6
-            feet_clearance = 1.6
-            feet_contact_number = 1.5
+            joint_pos = 1.9
+            feet_clearance = 1.7
+            feet_contact_number = 1.7
             # gait
             feet_air_time = 1.6
             foot_slip = -0.05
@@ -172,8 +173,8 @@ class scales:
             # contact
             feet_contact_forces = -0.01
             # vel tracking
-            tracking_lin_vel = 1.2
-            tracking_ang_vel = 1.3
+            tracking_lin_vel = 1.0
+            tracking_ang_vel = 1.0
             vel_mismatch_exp = 0.5  # lin_z; ang x,y
             low_speed = 0.2
             track_vel_hard = 0.5

diff --git a/sim/play.py b/sim/play.py
@@ -1,12 +1,10 @@
+# mypy: ignore-errors
 """Play a trained policy in the environment.
 
 Run:
     python sim/play.py --task g1 --log_h5
     python sim/play.py --task stompymini --log_h5
 """
-
-# mypy: ignore-errors
-
 import argparse
 import copy
 import logging
@@ -46,7 +44,10 @@ def play(args: argparse.Namespace) -> None:
     # override some parameters for testing
     env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
     env_cfg.sim.max_gpu_contact_pairs = 2**10
-    env_cfg.terrain.mesh_type = "plane"
+    if args.trimesh:
+        env_cfg.terrain.mesh_type = "trimesh"
+    else:
+        env_cfg.terrain.mesh_type = "plane"
     env_cfg.terrain.num_rows = 5
     env_cfg.terrain.num_cols = 5
     env_cfg.terrain.curriculum = False
@@ -79,7 +80,6 @@ def play(args: argparse.Namespace) -> None:
 
     # export policy as a onnx module (used to run it on web)
     if args.export_onnx:
-        breakpoint()
         path = ppo_runner.alg.actor_critic
         convert_model_to_onnx(path, ActorCfg(), save_path="policy.onnx")
         print("Exported policy as onnx to: ", path)
@@ -155,7 +155,6 @@ def play(args: argparse.Namespace) -> None:
         actions = policy(obs.detach())
         if args.log_h5:
             dset_actions[t] = actions.detach().numpy()
-
         if args.fix_command:
             env.commands[:, 0] = 0.5
             env.commands[:, 1] = 0.0

diff --git a/sim/resources/stompypro/robot_fixed.urdf b/sim/resources/stompypro/robot_fixed.urdf
@@ -1,7 +1,7 @@
 <robot name="robot">
   <link name="base">
     <inertial>
-      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0" />
+      <origin rpy="0 0.0 0.0" xyz="0.0 0.0 0.0" />
       <mass value="0.001" />
       <inertia ixx="0.0001" ixy="0.0" ixz="0.0" iyy="0.0001" iyz="0.0" izz="0.0001" />
       </inertial>
@@ -10,7 +10,30 @@
     <origin rpy="0 0 0" xyz="0 0 0.0" />
     <parent link="base" />
     <child link="trunk" />
-    </joint>
+    </joint> 
+  <!-- IMU pfb30 - fix the placement -->
+  <link name="imu_link">
+      <visual>
+      <geometry>
+        <box size="0.1 0.1 0.1"/>
+      </geometry>
+      <material name="red">
+        <color rgba="1 0 0 1"/>
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="imu_joint"
+    type="fixed"
+    dont_collapse="true">
+    <origin
+      xyz="0.0 0.0 0.55"
+      rpy="0 0 0" />
+    <parent
+      link="base" />
+    <child
+      link="imu_link" />
+  </joint>
   <link name="trunk">
     <inertial>
       <origin rpy="0.0 0.0 0.0" xyz="-0.00051930492 0.00030343937 0.21140495" />

diff --git a/sim/resources/stompypro/robot_fixed.xml b/sim/resources/stompypro/robot_fixed.xml
@@ -34,7 +34,7 @@
       <freejoint name="root"/>
       <camera name="front" pos="0 -3 1" xyaxes="1 0 0 0 1 2" mode="trackcom"/>
       <camera name="side" pos="-2.893 -1.330 0.757" xyaxes="0.405 -0.914 0.000 0.419 0.186 0.889" mode="trackcom"/>
-      <site name="imu" size="0.01" pos="0 0 0"/>
+      <site name="imu" size="0.01" pos="0 0 .55"/>
       <geom type="mesh" mesh="trunk" class="visualgeom"/>
       <geom type="mesh" rgba="1 0.5 0.75 1" mesh="trunk" contype="0" conaffinity="0" group="1" density="0"/>
       <body name="L_buttock" pos="0 0.1577 0">

diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py
@@ -232,6 +232,12 @@ def get_args() -> argparse.Namespace:
             "default": False,
             "help": "Enable HDF5 logging",
         },
+        {
+            "name": "--trimesh",
+            "action": "store_true",
+            "default": False,
+            "help": "Use trimesh terrain",
+        },
     ]
     # parse arguments
     args = gymutil.parse_arguments(description="RL Policy", custom_parameters=custom_parameters)