From 0d6ec557b1eaae0053e354f7ffe99fd0b69606c6 Mon Sep 17 00:00:00 2001
From: runjerry <qinxun.bai@gmail.com>
Date: Mon, 11 Jan 2021 12:57:08 -0800
Subject: [PATCH] add pets environments and reward functions

---
 alf/environments/gym_pets/__init__.py         |  30 +++
 alf/environments/gym_pets/envs/__init__.py    |  18 ++
 .../gym_pets/envs/assets/cartpole.xml         |  35 ++++
 .../gym_pets/envs/assets/half_cheetah.xml     |  95 +++++++++
 .../gym_pets/envs/assets/pusher.xml           | 101 ++++++++++
 .../gym_pets/envs/assets/reacher3d.xml        | 154 ++++++++++++++
 alf/environments/gym_pets/envs/cartpole.py    |  72 +++++++
 .../gym_pets/envs/half_cheetah.py             |  64 ++++++
 alf/environments/gym_pets/envs/pusher.py      |  76 +++++++
 alf/environments/gym_pets/envs/reacher.py     |  97 +++++++++
 alf/examples/mbrl_reward_functions.py         | 188 ++++++++++++++++++
 alf/utils/common.py                           |  21 ++
 12 files changed, 951 insertions(+)
 create mode 100644 alf/environments/gym_pets/__init__.py
 create mode 100644 alf/environments/gym_pets/envs/__init__.py
 create mode 100644 alf/environments/gym_pets/envs/assets/cartpole.xml
 create mode 100644 alf/environments/gym_pets/envs/assets/half_cheetah.xml
 create mode 100644 alf/environments/gym_pets/envs/assets/pusher.xml
 create mode 100644 alf/environments/gym_pets/envs/assets/reacher3d.xml
 create mode 100644 alf/environments/gym_pets/envs/cartpole.py
 create mode 100644 alf/environments/gym_pets/envs/half_cheetah.py
 create mode 100644 alf/environments/gym_pets/envs/pusher.py
 create mode 100644 alf/environments/gym_pets/envs/reacher.py
 create mode 100644 alf/examples/mbrl_reward_functions.py

diff --git a/alf/environments/gym_pets/__init__.py b/alf/environments/gym_pets/__init__.py
new file mode 100644
index 000000000..f87e745d0
--- /dev/null
+++ b/alf/environments/gym_pets/__init__.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from gym.envs.registration import register
+
+register(
+    id='MBRLCartpole-v0',
+    entry_point='alf.environments.gym_pets.envs:CartpoleEnv')
+
+register(
+    id='MBRLReacher3D-v0',
+    entry_point='alf.environments.gym_pets.envs:Reacher3DEnv')
+
+register(
+    id='MBRLPusher-v0', entry_point='alf.environments.gym_pets.envs:PusherEnv')
+
+register(
+    id='MBRLHalfCheetah-v0',
+    entry_point='alf.environments.gym_pets.envs:HalfCheetahEnv')
diff --git a/alf/environments/gym_pets/envs/__init__.py b/alf/environments/gym_pets/envs/__init__.py
new file mode 100644
index 000000000..c659fd52c
--- /dev/null
+++ b/alf/environments/gym_pets/envs/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .cartpole import CartpoleEnv
+from .half_cheetah import HalfCheetahEnv
+from .pusher import PusherEnv
+from .reacher import Reacher3DEnv
diff --git a/alf/environments/gym_pets/envs/assets/cartpole.xml b/alf/environments/gym_pets/envs/assets/cartpole.xml
new file mode 100644
index 000000000..284a58c9a
--- /dev/null
+++ b/alf/environments/gym_pets/envs/assets/cartpole.xml
@@ -0,0 +1,35 @@
+<mujoco model="cartpole">
+	<compiler inertiafromgeom="true"/>
+	<default>
+		<joint armature="0" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+		<tendon/>
+		<motor ctrlrange="-3 3" ctrllimited='true'/>
+	</default>
+    <asset>
+		<texture type="skybox" builtin="checker" rgb1="1 1 1" rgb2="1 1 1"
+                    width="256" height="256"/>
+        <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
+		<texture name="texplane" type="2d" builtin="checker" rgb1=".5 .5 .5" rgb2=".5 .5 .5" width="100" height="100" />
+        <texture name="texplane_show" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.9 0.9 0.9" width="100" height="100" />
+        <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
+        <material name='geom' texture="texgeom" texuniform="true" />
+ 	</asset>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
+	<size nstack="3000"/>
+	<worldbody>
+		<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 3" type="capsule"/>
+		<body name="cart" pos="0 0 0">
+			<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-2.5 2.5" type="slide"/>
+			<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
+			<body name="pole" pos="0 0 0">
+				<joint axis="0 1 0" limited="false" name="hinge" pos="0 0 0" range="-180 180" type="hinge"/>
+				<geom fromto="0 0 0 0.001 0 -0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
+			</body>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor gear="100" joint="slider" name="slide"/>
+	</actuator>
+</mujoco>
+
diff --git a/alf/environments/gym_pets/envs/assets/half_cheetah.xml b/alf/environments/gym_pets/envs/assets/half_cheetah.xml
new file mode 100644
index 000000000..40a1cb62c
--- /dev/null
+++ b/alf/environments/gym_pets/envs/assets/half_cheetah.xml
@@ -0,0 +1,95 @@
+<!-- Cheetah Model
+
+    The state space is populated with joints in the order that they are
+    defined in this file. The actuators also operate on joints.
+
+    State-Space (name/joint/parameter):
+        - rootx     slider      position (m)
+        - rootz     slider      position (m)
+        - rooty     hinge       angle (rad)
+        - bthigh    hinge       angle (rad)
+        - bshin     hinge       angle (rad)
+        - bfoot     hinge       angle (rad)
+        - fthigh    hinge       angle (rad)
+        - fshin     hinge       angle (rad)
+        - ffoot     hinge       angle (rad)
+        - rootx     slider      velocity (m/s)
+        - rootz     slider      velocity (m/s)
+        - rooty     hinge       angular velocity (rad/s)
+        - bthigh    hinge       angular velocity (rad/s)
+        - bshin     hinge       angular velocity (rad/s)
+        - bfoot     hinge       angular velocity (rad/s)
+        - fthigh    hinge       angular velocity (rad/s)
+        - fshin     hinge       angular velocity (rad/s)
+        - ffoot     hinge       angular velocity (rad/s)
+
+    Actuators (name/actuator/parameter):
+        - bthigh    hinge       torque (N m)
+        - bshin     hinge       torque (N m)
+        - bfoot     hinge       torque (N m)
+        - fthigh    hinge       torque (N m)
+        - fshin     hinge       torque (N m)
+        - ffoot     hinge       torque (N m)
+
+-->
+<mujoco model="cheetah">
+  <compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
+  <default>
+    <joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
+    <geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
+    <motor ctrllimited="true" ctrlrange="-1 1"/>
+  </default>
+  <size nstack="300000" nuser_geom="1"/>
+  <option gravity="0 0 -9.81" timestep="0.01"/>
+  <asset>
+    <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
+    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
+    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
+    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="150 150" texture="texplane"/>
+    <material name="geom" texture="texgeom" texuniform="true"/>
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="200 200 200" type="plane"/>
+    <body name="torso" pos="0 0 .7">
+      <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
+      <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
+      <geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
+      <geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
+      <!-- <site name='tip'  pos='.15 0 .11'/>-->
+      <body name="bthigh" pos="-.5 0 0">
+        <joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
+        <geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
+        <body name="bshin" pos=".16 0 -.25">
+          <joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
+          <geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
+          <body name="bfoot" pos="-.28 0 -.14">
+            <joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
+            <geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
+          </body>
+        </body>
+      </body>
+      <body name="fthigh" pos=".5 0 0">
+        <joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
+        <geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
+        <body name="fshin" pos="-.14 0 -.24">
+          <joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
+          <geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
+          <body name="ffoot" pos=".13 0 -.18">
+            <joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
+            <geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor gear="120" joint="bthigh" name="bthigh"/>
+    <motor gear="90" joint="bshin" name="bshin"/>
+    <motor gear="60" joint="bfoot" name="bfoot"/>
+    <motor gear="120" joint="fthigh" name="fthigh"/>
+    <motor gear="60" joint="fshin" name="fshin"/>
+    <motor gear="30" joint="ffoot" name="ffoot"/>
+  </actuator>
+</mujoco>
diff --git a/alf/environments/gym_pets/envs/assets/pusher.xml b/alf/environments/gym_pets/envs/assets/pusher.xml
new file mode 100644
index 000000000..9e81b01a6
--- /dev/null
+++ b/alf/environments/gym_pets/envs/assets/pusher.xml
@@ -0,0 +1,101 @@
+<mujoco model="arm3d">
+  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
+  <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
+  
+  <default>
+    <joint armature='0.04' damping="1" limited="true"/>
+    <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
+  </default>
+  <asset>
+    <texture type="skybox" builtin="checker" rgb1="1 1 1" rgb2="1 1 1"
+             width="256" height="256"/>
+    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
+    <texture name="texplane" type="2d" builtin="checker" rgb1=".5 .5 .5" rgb2=".5 .5 .5" width="100" height="100" />
+    <texture name="texplane_show" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.9 0.9 0.9" width="100" height="100" />
+    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
+    <material name='geom' texture="texgeom" texuniform="true" />
+  </asset>
+
+  <worldbody>
+    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
+    <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
+
+    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
+      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
+      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
+      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
+      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
+      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
+      <!--<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />-->
+      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 2.0" damping="1.0" />
+
+      <body name="r_shoulder_lift_link" pos="0.1 0 0">
+        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
+        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
+
+        <body name="r_upper_arm_roll_link" pos="0 0 0">
+          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
+
+          <body name="r_upper_arm_link" pos="0 0 0">
+            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
+
+            <body name="r_elbow_flex_link" pos="0.4 0 0">
+              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
+              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
+
+              <body name="r_forearm_roll_link" pos="0 0 0">
+                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
+
+                <body name="r_forearm_link" pos="0 0 0">
+                  <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
+
+                  <body name="r_wrist_flex_link" pos="0.321 0 0">
+                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
+                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
+
+                    <body name="r_wrist_roll_link" pos="0 0 0">
+                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
+                      <body name="tips_arm" pos="0 0 0">
+                        <geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
+                        <geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
+                      </body>
+                      <geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
+                      <geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
+                      <geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <!--<body name="object" pos="0.55 -0.3 -0.275" >-->
+    <body name="object" pos="0.45 -0.05 -0.275" >
+      <geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
+      <joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
+      <joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
+    </body>
+
+    <body name="goal" pos="0.45 -0.05 -0.3230">
+      <geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
+      <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
+      <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/> 
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
+    <motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
+  </actuator>
+</mujoco>
diff --git a/alf/environments/gym_pets/envs/assets/reacher3d.xml b/alf/environments/gym_pets/envs/assets/reacher3d.xml
new file mode 100644
index 000000000..a51c71b93
--- /dev/null
+++ b/alf/environments/gym_pets/envs/assets/reacher3d.xml
@@ -0,0 +1,154 @@
+<mujoco model="arm3d">
+
+    <compiler inertiafromgeom="true" angle="radian" coordinate="local" />
+    <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="RK4" />
+    <default>
+        <joint armature="0.04" damping="1" limited="true" />
+        <geom friction=".5 .1 .1" margin="0.002" condim="1" contype="0" conaffinity="0" />
+    </default>
+    <asset>
+        <texture type="skybox" builtin="checker" rgb1="1 1 1" rgb2="1 1 1"
+                 width="256" height="256"/>
+        <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
+        <texture name="texplane" type="2d" builtin="checker" rgb1=".5 .5 .5" rgb2=".5 .5 .5" width="100" height="100" />
+        <texture name="texplane_show" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.9 0.9 0.9" width="100" height="100" />
+        <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
+        <material name='geom' texture="texgeom" texuniform="true" />
+    </asset>
+    <worldbody>
+        <light pos="0 0 5" />
+        <body name="r_shoulder_pan_link" pos="0 -0.188 0">
+            <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
+            <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
+            <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
+            <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
+            <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
+            <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="10.0" />
+
+            <body name="r_shoulder_lift_link" pos="0.1 0 0">
+                <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
+                <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="10.0" />
+
+                <body name="r_upper_arm_roll_link" pos="0 0 0">
+                    <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+                    <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-3.9 0.8" damping="0.1" />
+
+                    <body name="r_upper_arm_link" pos="0 0 0">
+                        <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
+
+                        <body name="r_elbow_flex_link" pos="0.4 0 0">
+                            <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
+                            <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="1.0" />
+
+                            <body name="r_forearm_roll_link" pos="0 0 0">
+                                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
+                                <joint name="r_forearm_roll_joint" type="hinge" limited="false" pos="0 0 0" axis="1 0 0" damping=".1" />
+
+                                <body name="r_forearm_link" pos="0 0 0">
+                                    <geom name="fa" type="capsule" fromto="0 0 0 0.321 0 0" size="0.05" />
+
+                                    <body name="r_wrist_flex_link" pos="0.321 0 0">
+                                        <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
+                                        <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.094 0" damping=".1" />
+
+                                        <body name="r_wrist_roll_link" pos="0 0 0">
+                                            <geom name="wr" type="capsule" fromto="-0.02 0 0 0.02 0 0" size="0.01" />
+                                            <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="false" axis="1 0 0" damping="0.1" />
+
+                                            <body name="r_gripper_palm_link" pos="0 0 0">
+                                                <geom name="pl" type="capsule" fromto="0.05 0 -0.02 0.05 0 0.02" size="0.05" />
+
+                                                <!--
+                                                <body name="r_gripper_tool_frame" pos="0.18 0 0">
+                                                    <site name="leg_bottom" pos="0 0 -0.15" size="0.01" />
+                                                    <site name="leg_top" pos="0 0 0.15" size="0.01" />
+
+                                                    <body name="ball" pos="0 0 0">
+                                                        <geom name="ball_geom" rgba="0.8 0.6 0.6 1" type="cylinder" fromto="0 0 -0.15 0 0 0.15" size="0.028" density="2000" contype="2" conaffinity="1" />
+                                                    </body>
+                                                </body>
+                                                -->
+
+                                                <body name="r_gripper_l_finger_link" pos="0.07691 0.03 0">
+                                                    <geom name="gf3" type="capsule" fromto="0 0 0 0.09137 0.00495 0" size="0.01" />
+
+                                                    <body name="r_gripper_l_finger_tip_link" pos="0.09137 0.00495 0">
+                                                        <geom name="gf4" type="capsule" fromto="0 0 0 0.09137 0.0 0" size="0.01" />
+                                                    </body>
+                                                </body>
+
+                                                <body name="r_gripper_r_finger_link" pos="0.07691 -0.03 0">
+                                                    <geom name="gf1" type="capsule" fromto="0 0 0 0.09137 -0.00495 0" size="0.01" />
+
+                                                    <body name="r_gripper_r_finger_tip_link" pos="0.09137 -0.00495 0">
+                                                        <geom name="gf2" type="capsule" fromto="0 0 0 0.09137 0.0 0" size="0.01" />
+                                                    </body>
+                                                </body>
+                                            </body>
+                                        </body>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+
+        <!--
+        <body name="g1" pos="0.034 0.3 -0.47" axisangle="0 1 0 0.05">
+            <geom name="g1" rgba="0.2 0.2 0.2 1" type="box" size="0.003 0.01 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="g2" pos="-0.034 0.3 -0.47" axisangle="0 1 0 -0.05">
+            <geom name="g2" rgba="0.2 0.2 0.2 1" type="box" size="0.003 0.01 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="g3" pos="0.0 0.334 -0.47" axisangle="1 0 0 -0.05">
+            <geom name="g3" rgba="0.2 0.2 0.2 1" type="box" size="0.01 0.003 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="g4" pos="0.0 0.266 -0.47" axisangle="1 0 0 0.05">
+            <geom name="g4" rgba="0.2 0.2 0.2 1" type="box" size="0.01 0.003 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="fl" pos="0.0 0.3 -0.55">
+            <geom name="fl" rgba="0.2 0.2 0.2 1" type="box" size="0.2 0.2 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="w1" pos="0.216 0.3 -0.45">
+            <geom name="w1" rgba="0.2 0.2 0.2 1" type="box" size="0.183 0.3 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="w2" pos="-0.216 0.3 -0.45">
+            <geom name="w2" rgba="0.2 0.2 0.2 1" type="box" size="0.183 0.3 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="w3" pos="0.0 0.516 -0.45">
+            <geom name="w3" rgba="0.2 0.2 0.2 1" type="box" size="0.032 0.183 0.05" contype="1" conaffinity="1" />
+        </body>
+
+        <body name="w4" pos="0.0 0.084 -0.45">
+            <geom name="w4" rgba="0.2 0.2 0.2 1" type="box" size="0.032 0.183 0.05" contype="1" conaffinity="1" />
+        </body>
+        -->
+
+        <body name="target" pos="0 0.25 0">
+            <joint armature="0" axis="1 0 0" damping="0" limited="false" name="target_x" pos="0 0 0" ref="0" stiffness="0" type="slide"/>
+            <joint armature="0" axis="0 1 0" damping="0" limited="false" name="target_y" pos="0 0 0" ref="0.25" stiffness="0" type="slide"/>
+            <joint armature="0" axis="0 0 1" damping="0" limited="false" name="target_z" pos="0 0 0" ref="0" stiffness="0" type="slide"/>
+            <geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".035" type="sphere"/>
+        </body>
+    </worldbody>
+
+    <actuator>
+        <motor joint="r_shoulder_pan_joint" ctrlrange="-20.0 20.0" ctrllimited="true" />
+        <motor joint="r_shoulder_lift_joint" ctrlrange="-20.0 20.0" ctrllimited="true" />
+        <motor joint="r_upper_arm_roll_joint" ctrlrange="-20.0 20.0" ctrllimited="true" />
+        <motor joint="r_elbow_flex_joint" ctrlrange="-20.0 20.0" ctrllimited="true" />
+        <motor joint="r_forearm_roll_joint" ctrlrange="-20.0 20.0" ctrllimited="true" />
+        <motor joint="r_wrist_flex_joint" ctrlrange="-20.0 20.0" ctrllimited="true" />
+        <motor joint="r_wrist_roll_joint" ctrlrange="-20.0 20.0" ctrllimited="true" />
+    </actuator>
+
+</mujoco>
diff --git a/alf/environments/gym_pets/envs/cartpole.py b/alf/environments/gym_pets/envs/cartpole.py
new file mode 100644
index 000000000..191ea7b61
--- /dev/null
+++ b/alf/environments/gym_pets/envs/cartpole.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+from __future__ import absolute_import
+
+import os
+
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class CartpoleEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    PENDULUM_LENGTH = 0.6
+
+    def __init__(self):
+        utils.EzPickle.__init__(self)
+        dir_path = os.path.dirname(os.path.realpath(__file__))
+        mujoco_env.MujocoEnv.__init__(self,
+                                      '%s/assets/cartpole.xml' % dir_path, 2)
+
+    def step(self, a):
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+
+        cost_lscale = CartpoleEnv.PENDULUM_LENGTH
+        reward = np.exp(-np.sum(
+            np.square(
+                self._get_ee_pos(ob) -
+                np.array([0.0, CartpoleEnv.PENDULUM_LENGTH]))) / (cost_lscale**
+                                                                  2))
+        reward -= 0.01 * np.sum(np.square(a))
+
+        done = False
+        return ob, reward, done, {}
+
+    def reset_model(self):
+        qpos = self.init_qpos + np.random.normal(0, 0.1,
+                                                 np.shape(self.init_qpos))
+        qvel = self.init_qvel + np.random.normal(0, 0.1,
+                                                 np.shape(self.init_qvel))
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([self.data.qpos, self.data.qvel]).ravel()
+
+    @staticmethod
+    def _get_ee_pos(x):
+        x0, theta = x[0], x[1]
+        return np.array([
+            x0 - CartpoleEnv.PENDULUM_LENGTH * np.sin(theta),
+            -CartpoleEnv.PENDULUM_LENGTH * np.cos(theta)
+        ])
+
+    def viewer_setup(self):
+        v = self.viewer
+        v.cam.trackbodyid = 0
+        v.cam.distance = v.model.stat.extent
diff --git a/alf/environments/gym_pets/envs/half_cheetah.py b/alf/environments/gym_pets/envs/half_cheetah.py
new file mode 100644
index 000000000..7a3a58b06
--- /dev/null
+++ b/alf/environments/gym_pets/envs/half_cheetah.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+from __future__ import absolute_import
+
+import os
+
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        self.prev_qpos = None
+        dir_path = os.path.dirname(os.path.realpath(__file__))
+        mujoco_env.MujocoEnv.__init__(
+            self, '%s/assets/half_cheetah.xml' % dir_path, 5)
+        utils.EzPickle.__init__(self)
+
+    def step(self, action):
+        self.prev_qpos = np.copy(self.data.qpos.flat)
+        self.do_simulation(action, self.frame_skip)
+        ob = self._get_obs()
+
+        reward_ctrl = -0.1 * np.square(action).sum()
+        reward_run = ob[0] - 0.0 * np.square(ob[2])
+        reward = reward_run + reward_ctrl
+
+        done = False
+        return ob, reward, done, {}
+
+    def _get_obs(self):
+        return np.concatenate([
+            (self.data.qpos.flat[:1] - self.prev_qpos[:1]) / self.dt,
+            self.data.qpos.flat[1:],
+            self.data.qvel.flat,
+        ])
+
+    def reset_model(self):
+        qpos = self.init_qpos + np.random.normal(
+            loc=0, scale=0.001, size=self.model.nq)
+        qvel = self.init_qvel + np.random.normal(
+            loc=0, scale=0.001, size=self.model.nv)
+        self.set_state(qpos, qvel)
+        self.prev_qpos = np.copy(self.data.qpos.flat)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.distance = self.stat.extent * 0.25
+        self.viewer.cam.elevation = -55
diff --git a/alf/environments/gym_pets/envs/pusher.py b/alf/environments/gym_pets/envs/pusher.py
new file mode 100644
index 000000000..c382bec2c
--- /dev/null
+++ b/alf/environments/gym_pets/envs/pusher.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+from __future__ import absolute_import
+
+import os
+
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        dir_path = os.path.dirname(os.path.realpath(__file__))
+        mujoco_env.MujocoEnv.__init__(self, '%s/assets/pusher.xml' % dir_path,
+                                      4)
+        utils.EzPickle.__init__(self)
+        self.reset_model()
+
+    def step(self, a):
+        obj_pos = self.get_body_com("object"),
+        vec_1 = obj_pos - self.get_body_com("tips_arm")
+        vec_2 = obj_pos - self.get_body_com("goal")
+
+        reward_near = -np.sum(np.abs(vec_1))
+        reward_dist = -np.sum(np.abs(vec_2))
+        reward_ctrl = -np.square(a).sum()
+        reward = 1.25 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
+
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        done = False
+        return ob, reward, done, {}
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = -1
+        self.viewer.cam.distance = 4.0
+
+    def reset_model(self):
+        qpos = self.init_qpos
+
+        self.goal_pos = np.asarray([0, 0])
+        self.cylinder_pos = np.array([-0.25, 0.15]) + np.random.normal(
+            0, 0.025, [2])
+
+        qpos[-4:-2] = self.cylinder_pos
+        qpos[-2:] = self.goal_pos
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv)
+        qvel[-4:] = 0
+        self.set_state(qpos, qvel)
+        self.ac_goal_pos = self.get_body_com("goal")
+
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.data.qpos.flat[:7],
+            self.data.qvel.flat[:7],
+            self.get_body_com("tips_arm"),
+            self.get_body_com("object"),
+        ])
diff --git a/alf/environments/gym_pets/envs/reacher.py b/alf/environments/gym_pets/envs/reacher.py
new file mode 100644
index 000000000..764e7f313
--- /dev/null
+++ b/alf/environments/gym_pets/envs/reacher.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+from __future__ import absolute_import
+
+import os
+
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class Reacher3DEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        self.viewer = None
+        utils.EzPickle.__init__(self)
+        dir_path = os.path.dirname(os.path.realpath(__file__))
+        self.goal = np.zeros(3)
+        mujoco_env.MujocoEnv.__init__(
+            self, os.path.join(dir_path, 'assets/reacher3d.xml'), 2)
+
+    def step(self, a):
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        reward = -np.sum(np.square(self.get_EE_pos(ob[None]) - self.goal))
+        reward -= 0.01 * np.square(a).sum()
+        done = False
+        return ob, reward, done, dict(reward_dist=0, reward_ctrl=0)
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 1
+        self.viewer.cam.distance = 2.5
+        self.viewer.cam.elevation = -30
+        self.viewer.cam.azimuth = 270
+
+    def reset_model(self):
+        qpos, qvel = np.copy(self.init_qpos), np.copy(self.init_qvel)
+        qpos[-3:] += np.random.normal(loc=0, scale=0.1, size=[3])
+        qvel[-3:] = 0
+        self.goal = qpos[-3:]
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate([
+            self.data.qpos.flat,
+            self.data.qvel.flat[:-3],
+        ])
+
+    def get_EE_pos(self, states):
+        theta1, theta2, theta3, theta4, theta5, theta6, theta7 = \
+            states[:, :1], states[:, 1:2], states[:, 2:3], states[:, 3:4], states[:, 4:5], states[:, 5:6], states[:, 6:]
+
+        rot_axis = np.concatenate([
+            np.cos(theta2) * np.cos(theta1),
+            np.cos(theta2) * np.sin(theta1), -np.sin(theta2)
+        ],
+                                  axis=1)
+        rot_perp_axis = np.concatenate(
+            [-np.sin(theta1),
+             np.cos(theta1),
+             np.zeros(theta1.shape)], axis=1)
+        cur_end = np.concatenate([
+            0.1 * np.cos(theta1) + 0.4 * np.cos(theta1) * np.cos(theta2),
+            0.1 * np.sin(theta1) + 0.4 * np.sin(theta1) * np.cos(theta2) -
+            0.188, -0.4 * np.sin(theta2)
+        ],
+                                 axis=1)
+
+        for length, hinge, roll in [(0.321, theta4, theta3),
+                                    (0.16828, theta6, theta5)]:
+            perp_all_axis = np.cross(rot_axis, rot_perp_axis)
+            x = np.cos(hinge) * rot_axis
+            y = np.sin(hinge) * np.sin(roll) * rot_perp_axis
+            z = -np.sin(hinge) * np.cos(roll) * perp_all_axis
+            new_rot_axis = x + y + z
+            new_rot_perp_axis = np.cross(new_rot_axis, rot_axis)
+            new_rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30] = \
+                rot_perp_axis[np.linalg.norm(new_rot_perp_axis, axis=1) < 1e-30]
+            new_rot_perp_axis /= np.linalg.norm(
+                new_rot_perp_axis, axis=1, keepdims=True)
+            rot_axis, rot_perp_axis, cur_end = new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
+
+        return cur_end
diff --git a/alf/examples/mbrl_reward_functions.py b/alf/examples/mbrl_reward_functions.py
new file mode 100644
index 000000000..a2a23b65d
--- /dev/null
+++ b/alf/examples/mbrl_reward_functions.py
@@ -0,0 +1,188 @@
+# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gin
+import torch
+
+from alf.utils import common
+# implement the respective reward functions for desired environments here
+
+
+@gin.configurable
+def reward_function_for_pendulum(obs, action):
+    """Function for computing reward for gym Pendulum environment. It takes
+        as input:
+        (1) observation (Tensor of shape [batch_size, observation_dim])
+        (2) action (Tensor of shape [batch_size, num_actions])
+        and returns a reward Tensor of shape [batch_size].
+    """
+
+    def _observation_cost(obs):
+        c_theta, s_theta, d_theta = obs[..., :1], obs[..., 1:2], obs[..., 2:3]
+        theta = torch.atan2(s_theta, c_theta)
+        cost = theta**2 + 0.1 * d_theta**2
+        cost = torch.sum(cost, dim=1)
+        cost = torch.where(
+            torch.isnan(cost), 1e6 * torch.ones_like(cost), cost)
+        return cost
+
+    def _action_cost(action):
+        return 0.001 * torch.sum(action**2, dim=-1)
+
+    cost = _observation_cost(obs) + _action_cost(action)
+    # negative cost as reward
+    reward = -cost
+    return reward
+
+
+@gin.configurable
+def reward_function_for_cartpole(obs, action):
+    """Function for computing reward for gym CartPole environment. It takes
+        as input:
+        (1) observation (Tensor of shape [batch_size, observation_dim])
+        (2) action (Tensor of shape [batch_size, num_actions])
+        and returns a reward Tensor of shape [batch_size].
+    """
+
+    def _observation_cost(obs):
+        x0, theta = obs[..., :1], obs[..., 1:2]
+        ee_pos = torch.cat(
+            (x0 - 0.6 * torch.sin(theta), -0.6 * torch.cos(theta)), dim=-1)
+        cost = (ee_pos - torch.as_tensor([.0, .6]))**2
+        cost = -torch.exp(torch.sum(cost, dim=-1) / (0.6**2))
+        cost = torch.where(
+            torch.isnan(cost), 1e6 * torch.ones_like(cost), cost)
+
+        return cost
+
+    def _action_cost(action):
+        cost = 0.01 * torch.sum(action**2, dim=-1)
+        return cost
+
+    cost = _observation_cost(obs) + _action_cost(action)
+    reward = -cost
+    return reward
+
+
+@gin.configurable
+def reward_function_for_halfcheetah(obs, action):
+    """Function for computing reward for gym CartPole environment. It takes
+        as input:
+        (1) observation (Tensor of shape [batch_size, observation_dim])
+        (2) action (Tensor of shape [batch_size, num_actions])
+        and returns a reward Tensor of shape [batch_size].
+    """
+
+    def _observation_cost(obs):
+        cost = -obs[..., 0]
+        return cost
+
+    def _action_cost(action):
+        cost = 0.1 * torch.sum(action**2, dim=-1)
+        return cost
+
+    cost = _observation_cost(obs) + _action_cost(action)
+    reward = -cost
+    return reward
+
+
+@gin.configurable
+def reward_function_for_pusher(obs, action):
+    """Function for computing reward for gym CartPole environment. It takes
+        as input:
+        (1) observation (Tensor of shape [batch_size, observation_dim])
+        (3) action (Tensor of shape [batch_size, num_actions])
+        and returns a reward Tensor of shape [batch_size].
+    """
+
+    def _observation_cost(obs):
+        to_w, og_w = 0.5, 1.25
+        tip_pos, obj_pos = obs[..., 14:17], obs[..., 17:20]
+        tip_obj_dist = torch.sum(torch.abs(tip_pos - obj_pos), dim=-1)
+        obj_goal_dist = torch.sum(
+            torch.abs(common.get_gym_env_attr('ac_goal_pos') - obj_pos),
+            dim=-1)
+        cost = to_w * tip_obj_dist + og_w * obj_goal_dist
+        cost = torch.where(
+            torch.isnan(cost), 1e6 * torch.ones_like(cost), cost)
+
+        return cost
+
+    def _action_cost(action):
+        cost = 0.1 * torch.sum(action**2, dim=-1)
+        return cost
+
+    cost = _observation_cost(obs) + _action_cost(action)
+    reward = -cost
+    return reward
+
+
+@gin.configurable
+def reward_function_for_reacher(obs, action):
+    """Function for computing reward for gym CartPole environment. It takes
+        as input:
+        (1) observation (Tensor of shape [batch_size, observation_dim])
+        (2) action (Tensor of shape [batch_size, num_actions])
+        and returns a reward Tensor of shape [batch_size].
+    """
+
+    def _observation_cost(obs):
+        theta1, theta2, theta3, theta4, theta5, theta6, theta7 = \
+            obs[..., :1], obs[..., 1:2], obs[..., 2:3], obs[..., 3:4], \
+            obs[..., 4:5], obs[..., 5:6], obs[..., 6:]
+        rot_axis = torch.cat(
+            (torch.cos(theta2) * torch.cos(theta1),
+             torch.cos(theta2) * torch.sin(theta1), -torch.sin(theta2)),
+            dim=-1)
+        rot_perp_axis = torch.cat(
+            (-torch.sin(theta1), torch.cos(theta1), torch.zeros_like(theta1)),
+            dim=-1)
+        cur_end = torch.cat((
+            0.1 * torch.cos(theta1) + 0.4 * torch.cos(theta1) * torch.cos(theta2),
+            0.1 * torch.sin(theta1) + 0.4 * torch.sin(theta1) * torch.cos(theta2) \
+                - 0.188,
+            -0.4 * torch.sin(theta2)), dim=-1)
+
+        for length, hinge, roll in [(0.321, theta4, theta3), \
+                (0.16828, theta6, theta5)]:
+            perp_all_axis = torch.cross(rot_axis, rot_perp_axis)
+            x = torch.cos(hinge) * rot_axis
+            y = torch.sin(hinge) * torch.sin(roll) * rot_perp_axis
+            z = -torch.sin(hinge) * torch.cos(roll) * perp_all_axis
+            new_rot_axis = x + y + z
+            new_rot_perp_axis = torch.cross(new_rot_axis, rot_axis)
+            tmp_rot_perp_axis = torch.where(
+                torch.lt(torch.norm(new_rot_perp_axis, dim=-1), 1e-30),
+                rot_perp_axis.permute(-1,
+                                      *list(range(rot_perp_axis.ndim - 1))),
+                new_rot_perp_axis.permute(
+                    -1, *list(range(new_rot_perp_axis.ndim - 1))))
+            new_rot_perp_axis = tmp_rot_perp_axis.permute(
+                *list(range(1, tmp_rot_perp_axis.ndim)), 0)
+            new_rot_perp_axis /= torch.norm(
+                new_rot_perp_axis, dim=-1, keepdim=True)
+            rot_axis, rot_perp_axis, cur_end = \
+                new_rot_axis, new_rot_perp_axis, cur_end + length * new_rot_axis
+
+        cost = torch.sum(
+            torch.square(cur_end - common.get_gym_env_attr('goal')), dim=-1)
+        return cost
+
+    def _action_cost(action):
+        cost = 0.01 * torch.sum(action**2, dim=-1)
+        return cost
+
+    cost = _observation_cost(obs) + _action_cost(action)
+    reward = -cost
+    return reward
diff --git a/alf/utils/common.py b/alf/utils/common.py
index 47f2cb60c..126a84855 100644
--- a/alf/utils/common.py
+++ b/alf/utils/common.py
@@ -33,6 +33,7 @@
 from typing import Callable
 
 import alf
+from alf.environments.parallel_environment import ParallelAlfEnvironment
 import alf.nest as nest
 from alf.tensor_specs import TensorSpec, BoundedTensorSpec
 from alf.utils.spec_utils import zeros_from_spec as zero_tensor_from_nested_spec
@@ -577,6 +578,26 @@ def get_vocab_size():
         return 0
 
 
+@gin.configurable
+def get_gym_env_attr(attr):
+    """Get specific attr of gym env wrapped in the global environment. Used for
+    customized gym environments.
+
+    Args:
+        attr (str): the attribute of the gym env.
+
+    Returns:
+        gym_env.attr
+    """
+    assert _env
+    if isinstance(_env, ParallelAlfEnvironment):
+        gym_env = _env.envs[0].gym
+    else:
+        gym_env = _env._env.gym
+    assert hasattr(gym_env, attr)
+    return torch.as_tensor(getattr(gym_env, attr), dtype=torch.float32)
+
+
 @gin.configurable
 def active_action_target_entropy(active_action_portion=0.2, min_entropy=0.3):
     """Automatically compute target entropy given the action spec. Currently