Basic Manipulation
Note
Please first complete Basics and Build Gym-style Interface before continuing this tutorial.
In this tutorial, you will learn the following:
Implement a basic manipulation environment (block lifting)
The full code of the environment can be downloaded here lift.py
Setup
Based on SapienEnv
implemented in the previous example (Build Gym-style Interface), we can create LiftEnv
.
The simulation world consists of the ground, a table top, a cube to lift and a panda robot arm.
class LiftEnv(SapienEnv):
def __init__(self):
self.init_qpos = [0, 0.19634954084936207, 0.0, -2.617993877991494,
0.0, 2.941592653589793, 0.7853981633974483, 0, 0]
self.table_height = 0.8
super().__init__(control_freq=20, timestep=0.01)
self.robot = self.get_articulation('panda')
self.end_effector = self.robot.get_links()[8]
self.dof = self.robot.dof
assert self.dof == 9, 'Panda should have 9 DoF'
self.active_joints = self.robot.get_active_joints()
self.cube = self.get_actor('cube')
# The arm is controlled by the internal velocity drive
for joint in self.active_joints[:5]:
joint.set_drive_property(stiffness=0, damping=4.8)
for joint in self.active_joints[5:7]:
joint.set_drive_property(stiffness=0, damping=0.72)
# The gripper will be controlled directly by the torque
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=[self.dof * 2 + 13], dtype=np.float32)
self.action_space = spaces.Box(
low=-1.0, high=1.0, shape=[self.dof], dtype=np.float32)
# ---------------------------------------------------------------------------- #
# Simulation world
# ---------------------------------------------------------------------------- #
def _build_world(self):
physical_material = self._scene.create_physical_material(1.0, 1.0, 0.0)
self._scene.default_physical_material = physical_material
self._scene.add_ground(0.0)
# table top
builder = self._scene.create_actor_builder()
builder.add_box_collision(half_size=[0.4, 0.4, 0.025])
builder.add_box_visual(half_size=[0.4, 0.4, 0.025])
table = builder.build_kinematic(name='table')
table.set_pose(Pose([0, 0, self.table_height - 0.025]))
# cube
builder = self._scene.create_actor_builder()
builder.add_box_collision(half_size=[0.02, 0.02, 0.02])
builder.add_box_visual(half_size=[0.02, 0.02, 0.02], color=[1, 0, 0])
cube = builder.build(name='cube')
cube.set_pose(Pose([0, 0, self.table_height + 0.02]))
# robot
loader = self._scene.create_urdf_loader()
loader.fix_root_link = True
robot = loader.load('../assets/robot/panda/panda.urdf')
robot.set_name('panda')
robot.set_root_pose(Pose([-0.16 - 0.4, 0, self.table_height]))
robot.set_qpos(self.init_qpos)
We use internal velocity drives to control all joints (the first 7 joints) of the arm, except the gripper fingers. The fingers will be directly controlled by torques.
Task Definition
Next, let’s define the task of block lifting. The goal is to lift the cube by at least 2cm above the table top.
def step(self, action):
# Use internal velocity drive
for idx in range(7):
self.active_joints[idx].set_drive_velocity_target(action[idx])
# Control the gripper directly by torque
qf = self.robot.compute_passive_force(True, True, False)
qf[-2:] += action[-2:]
self.robot.set_qf(qf)
for i in range(self.control_freq):
self._scene.step()
obs = self._get_obs()
reward = self._get_reward()
done = self.cube.get_pose().p[2] > self.table_height + 0.04
if done:
reward += 100.0
return obs, reward, done, {}
def reset(self):
self.robot.set_qpos(self.init_qpos)
self.cube.set_pose(Pose(
[np.random.randn() * 0.05, np.random.randn() * 0.05, self.table_height + 0.02]))
self._scene.step()
return self._get_obs()
def _get_obs(self):
qpos = self.robot.get_qpos()
qvel = self.robot.get_qvel()
cube_pose = self.cube.get_pose()
ee_pose = self.end_effector.get_pose()
cube_to_ee = ee_pose.p - cube_pose.p
return np.hstack([qpos, qvel, cube_pose.p, cube_pose.q, cube_to_ee])
def _get_reward(self):
# reaching reward
cube_pose = self.cube.get_pose()
ee_pose = self.end_effector.get_pose()
distance = np.linalg.norm(ee_pose.p - cube_pose.p)
reaching_reward = 1 - np.tanh(10.0 * distance)
# lifting reward
lifting_reward = max(
0, self.cube.pose.p[2] - self.table_height - 0.02) / 0.02
return reaching_reward + lifting_reward
The action is defined as the concatenation of target joint velocities and torques on gripper fingers. The observation is the concatenation of joint positions, joint velocities, the pose of cube and end-effector (the 8-th link), as well as the relative displacement between the cube and end-effector. The total reward is defined as the sum of reaching reward and lifting reward.
Note
The definitions of action, observation, reward are usually heuristically designed.