diff --git a/docs/index.md b/docs/index.md
index 0682ae5e6..772375949 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -115,7 +115,7 @@ Obtain a key, which should be named `mjkey.txt`, and place the key into the `mjp
 
 **ROS Setup** (optional)
 
-1. Install [ROS](http://ros.org), including the standard [PR2 packages](http://wiki.ros.org/Robots/PR2)
+1. Install [ROS](http://ros.org), including the standard [PR2 packages](http://wiki.ros.org/Robots/PR2) and the [Universal Robot Stack](wiki.ros.org/Industrial/Install)
 
 2. Set up paths by adding the following to your `~/.bashrc` file:
 
@@ -263,6 +263,64 @@ python python/gps/gps_main.py pr2_badmm_example
 
 To learn how to make your own experiment and/or set your own initial and target positions, see [the next section](#running-a-new-experiment)
 
+#### Simulated UR example
+
+To run the code on a simulated UR be sure to first follow the instructions above for ROS setup.
+
+You will need the following dependencies:
+```sh
+sudo apt-get install ros-indigo-urdf*
+sudo apt-get install xacro
+sudo apt-get install robot-model
+```
+
+If you build ros-industrial from source, it should be fine. But if you install the ros-industrial stack using apt-get, these folders are inside /opt/ros, which is only writable by root, so it is probably better to download these files into a workspace/folder you own:
+
+https://github.com/ros-industrial/universal_robot/tree/indigo/ur_description/urdf 
+
+For ROS >= Indigo, you need to manually convert these xacro files to urdf. One way to do so is to call and then point to:
+
+```sh
+rosrun xacro xacro --inorder -o ur5_robot.urdf ur5_robot.xacro
+```
+
+You must also set an environmental variable UR_PATH that makes gps aware of the location of the installed universal_robot package. For ROS < Indigo, you can do this be adding the following to your .bashrc:
+
+```sh
+export UR_PATH=/path/to/universal_robot
+```
+
+After adding this line run:
+```sh
+source .bashrc
+```
+
+###### 1. Start the controller
+
+Note: You may find it helpful early on to edit the default ur<5 or 10>.launch file in the ur_gazebo package by changing the -z argument in the spawn_gazebo_model node from .1 to a larger number like 5.  This will place the simulated UR, 5 meters above the ground rather than .1 removing the potential for the robot to interact with the ground.
+
+Launch gazebo and spawn a UR<5 or 10> inside of it.
+
+```sh
+roslaunch ur_gazebo ur<5 or 10>.launch
+```
+
+Now you're ready to run the examples via gps_main. This can be done on any machine as long as the variables are set appropriately.
+
+The first example starts with a default initial controller and learns to move the end effector joint to a specified location.
+
+Run the following from the gps directory:
+
+```sh
+python python/gps/gps_main.py ur_example
+```
+
+The second example trains a neural network policy to reach a goal pose from different starting positions, using guided policy search.
+
+```sh
+python python/gps/gps_main.py ur_caffe_example
+```
+
 #### Running a new experiment
 1. Set up a new experiment directory by running:
     ```sh
diff --git a/experiments/ur_caffe_example/hyperparams.py b/experiments/ur_caffe_example/hyperparams.py
new file mode 100644
index 000000000..8b1de251f
--- /dev/null
+++ b/experiments/ur_caffe_example/hyperparams.py
@@ -0,0 +1,314 @@
+# To get started, copy over hyperparams from another experiment.
+# Visit rll.berkeley.edu/gps/hyperparams.html for documentation.
+""" Hyperparameters for UR trajectory optimization experiment. """
+from __future__ import division
+import numpy as np
+import os.path
+import rospy
+from datetime import datetime
+from tf import TransformListener 
+
+from gps import __file__ as gps_filepath
+from gps.agent.ur_ros.agent_ur import AgentURROS
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_utils import RAMP_LINEAR, RAMP_FINAL_ONLY
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.gui.target_setup_gui import load_pose_from_npz
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION, \
+        TRIAL_ARM, JOINT_SPACE
+from gps.utility.general_utils import get_ee_points, get_position
+from gps.gui.config import generate_experiment_info
+
+# Topics for the robot publisher and subscriber.
+JOINT_PUBLISHER  = '/arm_controller/command'
+JOINT_SUBSCRIBER = '/arm_controller/state'
+
+# 'SLOWNESS' is how far in the future (in seconds) position control extrapolates
+# when it publishs actions for robot movement.  1.0-10.0 is fine for simulation.
+SLOWNESS = 10.0
+# 'RESET_SLOWNESS' is how long (in seconds) we tell the robot to take when
+# returning to its start configuration.
+RESET_SLOWNESS = 2.0
+
+# Set constants for joints
+SHOULDER_PAN_JOINT = 'shoulder_pan_joint'
+SHOULDER_LIFT_JOINT = 'shoulder_lift_joint'
+ELBOW_JOINT = 'elbow_joint'
+WRIST_1_JOINT = 'wrist_1_joint'
+WRIST_2_JOINT = 'wrist_2_joint'
+WRIST_3_JOINT = 'wrist_3_joint'
+
+# Set constants for links
+BASE_LINK = 'base_link'
+SHOULDER_LINK = 'shoulder_link'
+UPPER_ARM_LINK = 'upper_arm_link'
+FOREARM_LINK = 'forearm_link'
+WRIST_1_LINK = 'wrist_1_link'
+WRIST_2_LINK = 'wrist_2_link'
+WRIST_3_LINK = 'wrist_3_link'
+
+# Set end effector constants
+INITIAL_JOINTS = [0, -np.pi/2, np.pi/2, 0, 0, 0]
+
+# Set the number of goal points. 1 by default for a single end effector tip.
+NUM_EE_POINTS = 1
+EE_POINTS = np.array([[0, 0, 0]])
+
+# Specify a goal state in cartesian coordinates.
+EE_POS_TGT = np.asmatrix([.70, .70, .50])
+"""UR 10 Examples:
+EE_POS_TGT = np.asmatrix([.29, .52, .62]) # Target where all joints are 0.
+EE_POS_TGT = np.asmatrix([.65, .80, .30]) # Target in positive octant near ground.
+EE_POS_TGT = np.asmatrix([.70, .70, .50]) # Target in positive octant used for debugging convergence.
+The Gazebo sim converges to the above point with non-action costs: 
+(-589.75, -594.71, -599.54, -601.54, -602.75, -603.28, -604.28, -604.79, -605.55, -606.29)
+Distance from Goal: (0.014, 0.005, -0.017)
+"""
+
+# Set to identity unless you want the goal to have a certain orientation.
+EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) 
+
+# Only edit these when editing the robot joints and links. 
+# The lengths of these arrays define numerous parameters in GPS.
+JOINT_ORDER = [SHOULDER_PAN_JOINT, SHOULDER_LIFT_JOINT, ELBOW_JOINT,
+               WRIST_1_JOINT, WRIST_2_JOINT, WRIST_3_JOINT]
+LINK_NAMES = [BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK,
+              WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK]
+
+# Hyperparamters to be tuned for optimizing policy learning on the specific robot.
+UR_GAINS = np.array([1, 1, 1, 1, 1, 1])
+
+# Packaging sensor dimensional data for refernece.
+SENSOR_DIMS = {
+    JOINT_ANGLES: len(JOINT_ORDER),
+    JOINT_VELOCITIES: len(JOINT_ORDER),
+    END_EFFECTOR_POINTS: NUM_EE_POINTS * EE_POINTS.shape[1],
+    END_EFFECTOR_POINT_VELOCITIES: NUM_EE_POINTS * EE_POINTS.shape[1],
+    ACTION: len(UR_GAINS),
+}
+
+# States to check in agent._process_observations.
+STATE_TYPES = {'positions': JOINT_ANGLES,
+               'velocities': JOINT_VELOCITIES}
+
+# Path to urdf of robot.
+TREE_PATH = os.environ['UR_PATH'] + '/ur_description/urdf/ur10_robot.urdf'
+
+# Be sure to specify the correct experiment directory to save policy data at.
+BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
+EXP_DIR = BASE_DIR + '/../experiments/ur_caffe_example/'
+
+# Set the number of seconds per step of a sample.
+TIMESTEP     = 0.01 # Typically 0.01.
+# Set the number of timesteps per sample.
+STEP_COUNT   = 100 # Typically 100.
+# Set the number of samples per condition.
+SAMPLE_COUNT = 5 # Typically 5.
+# set the number of conditions per iteration.
+CONDITIONS   = 2 # Typically 2 for Caffe and 1 for LQR.
+# Set the number of trajectory iterations to collect.
+ITERATIONS   = 10 # Typically 10.
+
+x0s = []
+ee_tgts = []
+reset_conditions = []
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': CONDITIONS,
+}
+
+# Set up each condition.
+for i in xrange(common['conditions']):
+
+    # Use hardcoded default vals init and target locations
+    ja_x0 = np.zeros(SENSOR_DIMS[JOINT_ANGLES])
+    ee_pos_x0 = np.zeros((1, 3))
+    ee_rot_x0 = np.zeros((3, 3))
+    
+    ee_pos_tgt = EE_POS_TGT
+    ee_rot_tgt = EE_ROT_TGT
+
+    state_space = sum(SENSOR_DIMS.values()) - SENSOR_DIMS[ACTION]
+
+    joint_dim = SENSOR_DIMS[JOINT_ANGLES] + SENSOR_DIMS[JOINT_VELOCITIES]
+
+    # Initialized to start position and inital velocities are 0
+    x0 = np.zeros(state_space)
+    x0[:SENSOR_DIMS[JOINT_ANGLES]] = ja_x0
+
+    # Need for this node will go away upon migration to KDL
+    rospy.init_node('gps_agent_ur_ros_node')
+    
+    # Set starting end effector position using TF
+    tf = TransformListener()
+
+    # Sleep for .1 secs to give the node a chance to kick off
+    rospy.sleep(1)
+    time = tf.getLatestCommonTime(WRIST_3_LINK, BASE_LINK)
+
+    x0[joint_dim:(joint_dim + NUM_EE_POINTS * EE_POINTS.shape[1])] = get_position(tf, WRIST_3_LINK, BASE_LINK, time)
+
+    # Initialize target end effector position
+    ee_tgt = np.ndarray.flatten(
+        get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T
+    )
+
+    reset_condition = {
+        JOINT_ANGLES: INITIAL_JOINTS,
+        JOINT_VELOCITIES: []
+    }
+
+    x0s.append(x0)
+    ee_tgts.append(ee_tgt)
+    reset_conditions.append(reset_condition)
+
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = {
+    'type': AgentURROS,
+    'dt': TIMESTEP,
+    'dU': SENSOR_DIMS[ACTION],
+    'conditions': common['conditions'],
+    'T': STEP_COUNT,
+    'x0': x0s,
+    'ee_points_tgt': ee_tgts,
+    'reset_conditions': reset_conditions,
+    'sensor_dims': SENSOR_DIMS,
+    'joint_order': JOINT_ORDER,
+    'link_names': LINK_NAMES,
+    'state_types': STATE_TYPES,
+    'tree_path': TREE_PATH,
+    'joint_publisher': JOINT_PUBLISHER,
+    'joint_subscriber': JOINT_SUBSCRIBER,
+    'slowness': SLOWNESS,
+    'reset_slowness': RESET_SLOWNESS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES,
+                      END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES],
+    'end_effector_points': [EE_POINTS],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES]
+}
+
+algorithm = {
+    'type': AlgorithmBADMM,
+    'conditions': common['conditions'],
+    'iterations': ITERATIONS,
+    'lg_step_schedule': np.array([1e-4, 1e-3, 1e-2, 1e-1]),
+    'policy_dual_rate': 0.1,
+    'ent_reg_schedule': np.array([1e-3, 1e-3, 1e-2, 1e-1]),
+    'fixed_lg_step': 3,
+    'kl_step': 5.0,
+    'init_pol_wt': 0.01,
+    'min_step_mult': 0.01,
+    'max_step_mult': 1.0,
+    'sample_decrease_var': 0.05,
+    'sample_increase_var': 0.1,
+    'exp_step_increase': 2.0,
+    'exp_step_decrease': 0.5,
+    'exp_step_upper': 0.5,
+    'exp_step_lower': 1.0,
+    'max_policy_samples': 6,
+    'policy_sample_mode': 'add',
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains': 1.0 / UR_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 1.0,
+    'stiffness': 0.5,
+    'stiffness_vel': .25, 
+    'final_weight': 50, 
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+# This cost function takes into account the distance between the end effector's
+# current and target positions, weighted in a linearly increasing fassion
+# as the number of trials grows from 0 to T-1. 
+fk_cost_ramp = {
+    'type': CostFK,
+    # Target end effector is subtracted out of EE_POINTS in pr2 c++ plugin so goal
+    # is 0. The UR agent also subtracts this out for consistency.
+    'target_end_effector': [np.zeros(NUM_EE_POINTS * EE_POINTS.shape[1])],
+    'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
+    'l1': 0.1,
+    'l2': 0.0001,
+    'ramp_option': RAMP_LINEAR,
+}
+
+# This cost function takes into account the distance between the end effector's
+# current and target positions at time T-1 only.
+fk_cost_final = {
+    'type': CostFK,
+    'target_end_effector': np.zeros(NUM_EE_POINTS * EE_POINTS.shape[1]),
+    'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
+    'l1': 1.0,
+    'l2': 0.0,
+    'wp_final_multiplier': 100.0,  # Weight multiplier on final timestep.
+    'ramp_option': RAMP_FINAL_ONLY,
+}
+
+# Combines the cost functions in 'costs' to produce a single cost function
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [fk_cost_ramp, fk_cost_final],
+    'weights': [1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 20,
+        'min_samples_per_cluster': 40,
+        'max_samples': 20,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'weights_file_prefix': EXP_DIR + 'policy',
+    'iterations': 3000,
+    'dU': SENSOR_DIMS[ACTION]
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 20,
+    'min_samples_per_cluster': 40,
+    'max_samples': 40,
+}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'common': common,
+    'verbose_trials': 0,
+    'verbose_policy_trials': 1,
+    'agent': agent,
+    'gui_on': True,
+    'algorithm': algorithm,
+    'num_samples': SAMPLE_COUNT,
+}
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/ur_example/hyperparams.py b/experiments/ur_example/hyperparams.py
new file mode 100644
index 000000000..d62c8320f
--- /dev/null
+++ b/experiments/ur_example/hyperparams.py
@@ -0,0 +1,283 @@
+# To get started, copy over hyperparams from another experiment.
+# Visit rll.berkeley.edu/gps/hyperparams.html for documentation.
+""" Hyperparameters for UR trajectory optimization experiment. """
+from __future__ import division
+import numpy as np
+import os.path
+import rospy
+from datetime import datetime
+from tf import TransformListener 
+
+from gps import __file__ as gps_filepath
+from gps.agent.ur_ros.agent_ur import AgentURROS
+from gps.algorithm.algorithm_traj_opt import AlgorithmTrajOpt
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_utils import RAMP_LINEAR, RAMP_FINAL_ONLY
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.gui.target_setup_gui import load_pose_from_npz
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION, \
+        TRIAL_ARM, JOINT_SPACE
+from gps.utility.general_utils import get_ee_points, get_position
+from gps.gui.config import generate_experiment_info
+
+# Topics for the robot publisher and subscriber.
+JOINT_PUBLISHER  = '/arm_controller/command'
+JOINT_SUBSCRIBER = '/arm_controller/state'
+
+# 'SLOWNESS' is how far in the future (in seconds) position control extrapolates
+# when it publishs actions for robot movement.  1.0-10.0 is fine for simulation.
+SLOWNESS = 10.0
+# 'RESET_SLOWNESS' is how long (in seconds) we tell the robot to take when
+# returning to its start configuration.
+RESET_SLOWNESS = 2.0
+
+# Set constants for joints
+SHOULDER_PAN_JOINT = 'shoulder_pan_joint'
+SHOULDER_LIFT_JOINT = 'shoulder_lift_joint'
+ELBOW_JOINT = 'elbow_joint'
+WRIST_1_JOINT = 'wrist_1_joint'
+WRIST_2_JOINT = 'wrist_2_joint'
+WRIST_3_JOINT = 'wrist_3_joint'
+
+# Set constants for links
+BASE_LINK = 'base_link'
+SHOULDER_LINK = 'shoulder_link'
+UPPER_ARM_LINK = 'upper_arm_link'
+FOREARM_LINK = 'forearm_link'
+WRIST_1_LINK = 'wrist_1_link'
+WRIST_2_LINK = 'wrist_2_link'
+WRIST_3_LINK = 'wrist_3_link'
+
+# Set end effector constants
+INITIAL_JOINTS = [0, -np.pi/2, np.pi/2, 0, 0, 0]
+
+# Set the number of goal points. 1 by default for a single end effector tip.
+NUM_EE_POINTS = 1
+EE_POINTS = np.array([[0, 0, 0]])
+
+# Specify a goal state in cartesian coordinates.
+EE_POS_TGT = np.asmatrix([.70, .70, .50])
+"""UR 10 Examples:
+EE_POS_TGT = np.asmatrix([.29, .52, .62]) # Target where all joints are 0.
+EE_POS_TGT = np.asmatrix([.65, .80, .30]) # Target in positive octant near ground.
+EE_POS_TGT = np.asmatrix([.70, .70, .50]) # Target in positive octant used for debugging convergence.
+The Gazebo sim converges to the above point with non-action costs: 
+(-589.75, -594.71, -599.54, -601.54, -602.75, -603.28, -604.28, -604.79, -605.55, -606.29)
+Distance from Goal: (0.014, 0.005, -0.017)
+"""
+
+# Set to identity unless you want the goal to have a certain orientation.
+EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) 
+
+# Only edit these when editing the robot joints and links. 
+# The lengths of these arrays define numerous parameters in GPS.
+JOINT_ORDER = [SHOULDER_PAN_JOINT, SHOULDER_LIFT_JOINT, ELBOW_JOINT,
+               WRIST_1_JOINT, WRIST_2_JOINT, WRIST_3_JOINT]
+LINK_NAMES = [BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK,
+              WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK]
+
+# Hyperparamters to be tuned for optimizing policy learning on the specific robot.
+UR_GAINS = np.array([1, 1, 1, 1, 1, 1])
+
+# Packaging sensor dimensional data for refernece.
+SENSOR_DIMS = {
+    JOINT_ANGLES: len(JOINT_ORDER),
+    JOINT_VELOCITIES: len(JOINT_ORDER),
+    END_EFFECTOR_POINTS: NUM_EE_POINTS * EE_POINTS.shape[1],
+    END_EFFECTOR_POINT_VELOCITIES: NUM_EE_POINTS * EE_POINTS.shape[1],
+    ACTION: len(UR_GAINS),
+}
+
+# States to check in agent._process_observations.
+STATE_TYPES = {'positions': JOINT_ANGLES,
+               'velocities': JOINT_VELOCITIES}
+
+# Path to urdf of robot.
+TREE_PATH = os.environ['UR_PATH'] + '/ur_description/urdf/ur10_robot.urdf'
+
+# Be sure to specify the correct experiment directory to save policy data at.
+BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
+EXP_DIR = BASE_DIR + '/../experiments/ur_example/'
+
+# Set the number of seconds per step of a sample.
+TIMESTEP     = 0.01 # Typically 0.01.
+# Set the number of timesteps per sample.
+STEP_COUNT   = 100 # Typically 100.
+# Set the number of samples per condition.
+SAMPLE_COUNT = 5 # Typically 5.
+# set the number of conditions per iteration.
+CONDITIONS   = 1 # Typically 2 for Caffe and 1 for LQR.
+# Set the number of trajectory iterations to collect.
+ITERATIONS   = 10 # Typically 10.
+
+x0s = []
+ee_tgts = []
+reset_conditions = []
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': CONDITIONS,
+}
+
+# Set up each condition.
+for i in xrange(common['conditions']):
+
+    # Use hardcoded default vals init and target locations
+    ja_x0 = np.zeros(SENSOR_DIMS[JOINT_ANGLES])
+    ee_pos_x0 = np.zeros((1, 3))
+    ee_rot_x0 = np.zeros((3, 3))
+    
+    ee_pos_tgt = EE_POS_TGT
+    ee_rot_tgt = EE_ROT_TGT
+
+    state_space = sum(SENSOR_DIMS.values()) - SENSOR_DIMS[ACTION]
+
+    joint_dim = SENSOR_DIMS[JOINT_ANGLES] + SENSOR_DIMS[JOINT_VELOCITIES]
+
+    # Initialized to start position and inital velocities are 0
+    x0 = np.zeros(state_space)
+    x0[:SENSOR_DIMS[JOINT_ANGLES]] = ja_x0
+
+    # Need for this node will go away upon migration to KDL
+    rospy.init_node('gps_agent_ur_ros_node')
+    
+    # Set starting end effector position using TF
+    tf = TransformListener()
+
+    # Sleep for .1 secs to give the node a chance to kick off
+    rospy.sleep(1)
+    time = tf.getLatestCommonTime(WRIST_3_LINK, BASE_LINK)
+
+    x0[joint_dim:(joint_dim + NUM_EE_POINTS * EE_POINTS.shape[1])] = get_position(tf, WRIST_3_LINK, BASE_LINK, time)
+
+    # Initialize target end effector position
+    ee_tgt = np.ndarray.flatten(
+        get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T
+    )
+
+    reset_condition = {
+        JOINT_ANGLES: INITIAL_JOINTS,
+        JOINT_VELOCITIES: []
+    }
+
+    x0s.append(x0)
+    ee_tgts.append(ee_tgt)
+    reset_conditions.append(reset_condition)
+
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = {
+    'type': AgentURROS,
+    'dt': TIMESTEP,
+    'dU': SENSOR_DIMS[ACTION],
+    'conditions': common['conditions'],
+    'T': STEP_COUNT,
+    'x0': x0s,
+    'ee_points_tgt': ee_tgts,
+    'reset_conditions': reset_conditions,
+    'sensor_dims': SENSOR_DIMS,
+    'joint_order': JOINT_ORDER,
+    'link_names': LINK_NAMES,
+    'state_types': STATE_TYPES,
+    'tree_path': TREE_PATH,
+    'joint_publisher': JOINT_PUBLISHER,
+    'joint_subscriber': JOINT_SUBSCRIBER,
+    'slowness': SLOWNESS,
+    'reset_slowness': RESET_SLOWNESS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES,
+                      END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES],
+    'end_effector_points': [EE_POINTS],
+    'obs_include': [],
+}
+
+algorithm = {
+    'type': AlgorithmTrajOpt,
+    'conditions': common['conditions'],
+    'iterations': ITERATIONS,
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains': 1.0 / UR_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 1.0,
+    'stiffness': 0.5,
+    'stiffness_vel': .25,
+    'final_weight': 50,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+# This cost function takes into account the distance between the end effector's
+# current and target positions, weighted in a linearly increasing fassion
+# as the number of trials grows from 0 to T-1.  
+fk_cost_ramp = {
+    'type': CostFK,
+    # Target end effector is subtracted out of EE_POINTS in pr2 c++ plugin so goal
+    # is 0. The UR agent also subtracts this out for consistency.
+    'target_end_effector': [np.zeros(NUM_EE_POINTS * EE_POINTS.shape[1])],
+    'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
+    'l1': 0.1,
+    'l2': 0.0001,
+    'ramp_option': RAMP_LINEAR,
+}
+
+# This cost function takes into account the distance between the end effector's
+# current and target positions at time T-1 only.
+fk_cost_final = {
+    'type': CostFK,
+    'target_end_effector': np.zeros(NUM_EE_POINTS * EE_POINTS.shape[1]),
+    'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
+    'l1': 1.0,
+    'l2': 0.0,
+    'wp_final_multiplier': 100.0,  # Weight multiplier on final timestep.
+    'ramp_option': RAMP_FINAL_ONLY,
+}
+
+# Combines the cost functions in 'costs' to produce a single cost function
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [fk_cost_ramp, fk_cost_final],
+    'weights': [1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 20,
+        'min_samples_per_cluster': 40,
+        'max_samples': 20,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'common': common,
+    'verbose_trials': 0,
+    'agent': agent,
+    'gui_on': True,
+    'algorithm': algorithm,
+    'num_samples': SAMPLE_COUNT,
+}
+
+common['info'] = generate_experiment_info(config)
diff --git a/python/gps/agent/agent_utils.py b/python/gps/agent/agent_utils.py
index 5fb242a03..b372349c2 100644
--- a/python/gps/agent/agent_utils.py
+++ b/python/gps/agent/agent_utils.py
@@ -31,7 +31,7 @@ def generate_noise(T, dU, hyperparams):
             noise = noise / np.sqrt(variance)
     return noise
 
-
+# makes a list of shallow copies of values of length n
 def setup(value, n):
     """ Go through various types of hyperparameters. """
     if not isinstance(value, list):
diff --git a/python/gps/agent/config.py b/python/gps/agent/config.py
index 9c2e18a6a..ec7440f18 100755
--- a/python/gps/agent/config.py
+++ b/python/gps/agent/config.py
@@ -20,6 +20,7 @@
     'smooth_noise_renormalize': True,
 }
 
+AGENT_UR_ROS = {'trial_timeout': 4 }
 
 try:
     import rospkg
diff --git a/python/gps/agent/ur_ros/__init__.py b/python/gps/agent/ur_ros/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/gps/agent/ur_ros/agent_ur.py b/python/gps/agent/ur_ros/agent_ur.py
new file mode 100644
index 000000000..a843bbd9f
--- /dev/null
+++ b/python/gps/agent/ur_ros/agent_ur.py
@@ -0,0 +1,491 @@
+import os
+import copy # Only used to copy the agent config data.
+import numpy as np # Used pretty much everywhere.
+import threading # Used for time locks to synchronize position data.
+import rospy # Needed for nodes, rate, sleep, publish, and subscribe.
+
+from gps.agent.agent import Agent # GPS class needed to inherit from.
+from gps.agent.agent_utils import setup, generate_noise # setup used to get hyperparams in init and generate_noise to get noise in sample.
+from gps.agent.config import AGENT_UR_ROS # Parameters needed for config in __init__.
+from gps.sample.sample import Sample # Used to build a Sample object for each sample taken.
+from gps.utility.general_utils import get_position # For getting points and velocities.
+
+from tf import TransformListener # Needed for listening to and transforming robot state information.
+from trajectory_msgs.msg import JointTrajectory, JointTrajectoryPoint # Used for publishing UR joint angles.
+from control_msgs.msg import JointTrajectoryControllerState # Used for subscribing to the UR.
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, ACTION, END_EFFECTOR_POINTS, END_EFFECTOR_POINT_JACOBIANS, END_EFFECTOR_POINT_VELOCITIES
+from tree_urdf import treeFromFile # For KDL Jacobians
+from PyKDL import Jacobian, Chain, ChainJntToJacSolver, JntArray # For KDL Jacobians
+
+class MSG_INVALID_JOINT_NAMES_DIFFER(Exception):
+    """Error object exclusively raised by _process_observations."""
+    pass
+
+class ROBOT_MADE_CONTACT_WITH_GAZEBO_GROUND_SO_RESTART_ROSLAUNCH(Exception):
+    """Error object exclusively raised by reset."""
+    pass
+
+class AgentURROS(Agent):
+    """Connects the UR actions and GPS algorithms."""
+
+    def __init__(self, hyperparams, init_node=True):
+        """Initialized Agent.
+        hyperparams: Dictionary of hyperparameters.
+        init_node:   Whether or not to initialize a new ROS node."""
+
+        # Setup the main node.
+        if init_node:
+            rospy.init_node('gps_agent_ur_ros_node')
+
+        # Pull parameters from hyperparams file.
+        config = copy.deepcopy(AGENT_UR_ROS)
+        config.update(hyperparams)
+        Agent.__init__(self, config)
+        conditions = self._hyperparams['conditions']
+        for field in ('x0', 'ee_points_tgt', 'reset_conditions'):
+            self._hyperparams[field] = setup(self._hyperparams[field],
+                                             conditions)
+        # x0 is not used by the agent, but it needs to be set on the agent
+        # object because the GPS algorithm implementation gets the value it
+        # uses for x0 from the agent object.  (This should probably be changed.)
+        self.x0 = self._hyperparams['x0']
+
+        # Used by subscriber to get the present time and by main thread to
+        # get buffered positions.
+        self.tf = TransformListener()
+
+        # Times when EE positions were most recently recorded.  Set by the
+        # subscriber thread when it collects the EE positions from ROS and
+        # stores them for later use on the main thread.
+        self._ee_time = None
+        self._previous_ee_time = None
+        self._ref_time = None
+
+        # Lock used to make sure the subscriber does not update the EE times
+        # while they are being used by the main thread.
+        # The subscriber thread acquires this lock with blocking=False to
+        # skip an update of the protected data if the lock is held elsewhere
+        # (ie, the main thread).  The main thread acquires this lock with
+        # blocking=True to block until the lock has been acquired.
+        self._time_lock = threading.Lock()
+
+        # Flag denoting if observations read in from ROS messages are fresh
+        # or have already been used in processing.  Used to make sure that the
+        # observations have been updated by the subscriber before the main
+        # thread will try to use them.
+        self._observations_stale = True
+
+        # Message sent via the joint_subscriber topic indicating the
+        # newly observed state of the UR.
+        self._observation_msg = None
+
+        # set dimensions of action
+        self.dU = self._hyperparams['dU']
+
+        self._valid_joint_set = set(hyperparams['joint_order'])
+        self._valid_joint_index = {joint:index for joint, index in
+                                   enumerate(hyperparams['joint_order'])}
+
+        # Initialize the publisher and subscriber.  The subscriber begins
+        # checking the time every 40 msec by default.
+        self._pub = rospy.Publisher(self._hyperparams['joint_publisher'], 
+                                     JointTrajectory)
+        self._sub = rospy.Subscriber(self._hyperparams['joint_subscriber'],
+                                     JointTrajectoryControllerState,
+                                     self._observation_callback)
+
+        # Used for enforcing the period specified in hyperparameters.
+        self.period = self._hyperparams['dt']
+        self.r = rospy.Rate(1. / self.period)
+        self.r.sleep()
+
+        self._currently_resetting = False
+        self._reset_cv = threading.Condition(self._time_lock)
+
+    def _observation_callback(self, message):
+        """This callback is set on the subscriber node in self.__init__().
+        It's called by ROS every 40 ms while the subscriber is listening.
+        Primarily updates the present and latest times.
+        This callback is invoked asynchronously, so is effectively a
+        "subscriber thread", separate from the control flow of the rest of
+        GPS, which runs in the "main thread".
+        message: observation from the robot to store each listen."""
+
+        with self._time_lock:
+            # Gets the most recent time according to the TF node when a TF message
+            # transforming between base and end effector frames is published.
+            new_time = self.tf.getLatestCommonTime(self._hyperparams['link_names'][6],
+                self._hyperparams['link_names'][0])
+
+            # Skip updating when _observation_callback() is called multiple times.
+            # between TF updates.
+            if new_time == self._ee_time:
+                return
+
+            # Set the reference time if it has not yet been set.
+            if self._ref_time is None:
+                self._ref_time = new_time
+                return
+
+            # Set previous_ee_time if not set or same as present time.
+            if self._previous_ee_time is None or self._previous_ee_time == self._ee_time:
+                self._previous_ee_time = self._ee_time
+                self._ee_time = new_time
+                return
+
+            # Compute the amount of time that has passed between the last
+            # "present" time and now.
+            delta_t = new_time - self._ref_time
+            delta_ref_t = float(delta_t.secs) + float(delta_t.nsecs) * 1e-9
+            delta_t = new_time - self._ee_time
+
+            # Fields such as self._observation_msg keep track of all
+            # valid messages received over the wire regardless of whether
+            # "enough time has passed".  This is useful in cases such as
+            # when you would like to examine the immediately prior value
+            # to approximate velocity.  If these values change faster than
+            # those such as self._ref_time, there will be no ill effect
+            # as self._observations_stale will indicate when the latter
+            # are ready to be used.
+            self.delta_t = float(delta_t.secs) + float(delta_t.nsecs) * 10.**-9
+            self._observation_msg = message
+            self._previous_ee_time = self._ee_time
+            self._ee_time = new_time
+
+            if self._currently_resetting:
+                epsilon = 1e-3
+                reset_action = np.asarray(
+                    self._hyperparams['reset_conditions'][0][JOINT_ANGLES])
+                now_action = np.asarray(
+                    self._observation_msg.actual.positions[:len(reset_action)])
+                du = np.linalg.norm(reset_action-now_action, float('inf'))
+                if du < epsilon:
+                    self._currently_resetting = False
+                    self._reset_cv.notify_all()
+
+            # Check if not enough time has passed.
+            if delta_ref_t < self.period:
+                return
+
+            # Set observations stale and release lock.
+            self._ref_time = new_time
+            self._observations_stale = False
+
+    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
+        """This is the main method run when the Agent object is called by GPS.
+        Draws a sample from the environment, using the specified policy and
+        under the specified condition.
+        If "save" is True, then append the sample object of type Sample to
+        self._samples[condition].
+        TensorFlow is not yet implemented (FIXME)."""
+
+        # Reset the arm to initial configuration at start of each new trial.
+        self.reset(condition)
+
+        # Generate noise to be used in the policy object to compute next state.
+        if noisy:
+            noise = generate_noise(self.T, self.dU, self._hyperparams)
+        else:
+            noise = np.zeros((self.T, self.dU))
+
+        # Execute the trial.
+        sample_data = self._run_trial(policy, noise,
+                          time_to_run=self._hyperparams['trial_timeout'])
+
+        # Write trial data into sample object.
+        sample = Sample(self)
+        for sensor_id, data in sample_data.iteritems():
+            sample.set(sensor_id, np.asarray(data))
+
+        # Save the sample to the data structure. This is controlled by gps_main.py.
+        if save:
+            self._samples[condition].append(sample)
+
+        return sample
+
+    def reset(self, condition):
+        """Not necessarily a helper function as it is inherited.
+        Reset the agent for a particular experiment condition.
+        condition: An index into hyperparams['reset_conditions']."""
+
+        # Set the rest position as the initial position from agent hyperparams.
+        action = self._hyperparams['reset_conditions'][condition][JOINT_ANGLES]
+
+        # Prepare the present positions to see how far off we are.
+        now_position = np.asarray(self._observation_msg.actual.positions[:len(action)])
+
+        # Raise error if robot has made contact with the ground in simulation.
+        # This occurs because Gazebo sets joint angles beyond what they can possibly
+        # be when the robot makes contact with the ground and "breaks." 
+        if max(abs(now_position)) >= 2*np.pi:
+            raise ROBOT_MADE_CONTACT_WITH_GAZEBO_GROUND_SO_RESTART_ROSLAUNCH
+
+        # Wait until the arm is within epsilon of reset configuration.
+        with self._time_lock:
+            self._currently_resetting = True
+            self._pub.publish(self._get_ur_trajectory_message(action,
+                              self._hyperparams['reset_slowness']))
+            self._reset_cv.wait()
+
+    def _run_trial(self, policy, noise, time_to_run=5):
+        """'Private' method only called by sample() to collect sample data.
+        Runs an async controller with a policy.
+        The async controller receives observations from ROS subscribers and
+        then uses them to publish actions.
+        policy:      policy object used to get next state
+        noise:       noise necessary in order to carry out the policy
+        time_to_run: is not used in this agent
+
+        Returns:
+            result: a dictionary keyed with each of the constants that
+            appear in the state_include, obs_include, and meta_include
+            sections of the hyperparameters file.  Each of these should
+            be associated with an array indexed by the timestep at which
+            a certain state/observation/meta param occurred and the
+            value representing a particular state/observation/meta
+            param.  Through this indexing scheme the value of each
+            state/observation/meta param at each timestep is stored."""
+
+        # Initialize the data structure to be passed to GPS.
+        result = {param: [] for param in self.x_data_types +
+                         self.obs_data_types + self.meta_data_types +
+                         [END_EFFECTOR_POINT_JACOBIANS, ACTION]}
+
+        # Carry out the number of trials specified in the hyperparams.  The
+        # index is only called by the policy.act method.  We use a while
+        # instead of for because we do not want to iterate if we do not publish.
+        time_step = 0
+        while time_step < self._hyperparams['T']:
+
+            # Skip the action if the subscriber thread has not finished
+            # initializing the times.
+            if self._previous_ee_time is None:
+                continue
+
+            # Only read and process ROS messages if they are fresh.
+            if self._observations_stale is False:
+
+                # Acquire the lock to prevent the subscriber thread from
+                # updating times or observation messages.
+                self._time_lock.acquire(True)
+
+                prev_ros_time = self._previous_ee_time
+                ros_time = self._ee_time
+                self._previous_ee_time = ros_time
+                obs_message = self._observation_msg
+
+                # Make it so that subscriber's thread observation callback
+                # must be called before publishing again.  FIXME: This field
+                # should be protected by the time lock.
+                self._observations_stale = True
+
+                # Release the lock after all dynamic variables have been updated.
+                self._time_lock.release()
+
+                # Collect the end effector points and velocities in
+                # cartesian coordinates for the state.
+                ee_points, ee_velocities = \
+                    self._get_points_and_vels(ros_time, prev_ros_time, 
+                        self._hyperparams['link_names'][6],
+                        self._hyperparams['link_names'][0], debug=False)
+
+                # Collect the present joint angles and velocities from ROS for the state.
+                last_observations = self._process_observations(obs_message, result)
+
+                # Get Jacobians from present joint angles and KDL trees
+                # The Jacobians consist of a 6x6 matrix getting its from from
+                # (# joint angles) x (len[x, y, z] + len[roll, pitch, yaw])
+                ee_jacobians = self._get_jacobians(last_observations[:6])
+
+                # Concatenate the information that defines the robot state
+                # vector, typically denoted as 'x' in GPS articles.
+                state = np.r_[last_observations, ee_points, ee_velocities]
+
+                # Pass the state vector through the policy to get a vector of
+                # joint angles to go to next.
+                action = policy.act(state, state, time_step, noise[time_step])
+
+                # Primary print statements of the action development.
+                print '\nTimestep', time_step
+                print 'Joint States ', np.around(state[:6], 2)
+                print 'Policy Action', np.around(action, 2)
+
+                # Display meters off from goal.
+                print 'Distance from Goal', np.around(ee_points, 3)
+
+                # Stop the robot from moving past last position when sample is done.
+                # If this is not called, the robot will complete its last action even
+                # though it is no longer supposed to be exploring.
+                if time_step == self._hyperparams['T']-1:
+                    action[:6] = last_observations[:6]
+
+                # Publish the action to the robot.
+                self._pub.publish(self._get_ur_trajectory_message(action, 
+                    self._hyperparams['slowness']))
+
+                # Only update the time_step after publishing.
+                time_step += 1
+
+                # Build up the result data structure to return to GPS.
+                result[ACTION].append(action)
+                result[END_EFFECTOR_POINTS].append(ee_points)
+                result[END_EFFECTOR_POINT_JACOBIANS].append(ee_jacobians)
+                result[END_EFFECTOR_POINT_VELOCITIES].append(ee_velocities)
+
+            # The subscriber is listening during this sleep() call, and
+            # updating the time "continuously" (each hyperparams['period'].
+            self.r.sleep()
+
+        # Sanity check the results to make sure nothing is infinite.
+        for value in result.values():
+            if not np.isfinite(value).all():
+                print 'There is an infinite value in the results.'
+            assert np.isfinite(value).all()
+        return result
+
+    def _get_jacobians(self,state):
+        """Produce a Jacobian from the urdf that maps from joint angles to x, y, z.
+        This makes a 6x6 matrix from 6 joint angles to x, y, z and 3 angles.
+        The angles are roll, pitch, and yaw (not Euler angles) and are not needed.
+        Returns a repackaged Jacobian that is 3x6.
+        """
+
+        # Initialize a Jacobian for 6 joint angles by 3 cartesian coords and 3 orientation angles
+        jacobian = Jacobian(6)
+
+        # Initialize a joint array for the present 6 joint angles.
+        angles = JntArray(6)
+
+        # Construct the joint array from the most recent joint angles.
+        for i in range(6):
+            angles[i] = state[i]
+
+        # Initialize a tree structure from the robot urdf. 
+        # Note that the xacro of the urdf is updated by hand.
+        # Then the urdf must be compiled.
+        _, ur_tree = treeFromFile(self._hyperparams['tree_path'])
+
+        # Retrieve a chain structure between the base and the start of the end effector.
+        ur_chain = ur_tree.getChain(self._hyperparams['link_names'][0], 
+            self._hyperparams['link_names'][6])
+
+        # Initialize a KDL Jacobian solver from the chain.
+        jac_solver = ChainJntToJacSolver(ur_chain)
+
+        # Update the jacobian by solving for the given angles.
+        jac_solver.JntToJac(angles, jacobian)
+
+        # Initialize a numpy array to store the Jacobian.
+        J = np.zeros((6,6))
+        for i in range(jacobian.rows()):
+            for j in range(jacobian.columns()):
+                J[i,j] = jacobian[i,j]
+
+        # Only want the cartesian position, not Roll, Pitch, Yaw (RPY) Angles
+        ee_jacobians = J[0:3,:] 
+
+        return ee_jacobians
+
+    def _get_points_and_vels(self, t, t_last, target, source, debug=False):
+        """
+        Helper function to _run_trial that gets the cartesian positions
+        and velocities from ROS.  Uses tf to convert from the Base to
+        End Effector and and get cartesian coordinates.  Draws positions
+        from a 'buffer' of past positions by checking recent times.
+        t: the 'present' time.  This is the time of the most recent observation,
+            so slightly earlier than the actual present.
+        t_last:          the 'present' time from last time this function was called
+        target:          the End Effector link
+        source:          the Base Link
+        debug:           activate extra print statements for debugging"""
+
+        # Assert that time has passed since the last step.
+        assert self.delta_t > 0.
+
+        # Listen to the cartesian coordinate of the target link.
+        pos_last = get_position(self.tf, target, source, t_last)
+        pos_now = get_position(self.tf, target, source, t)
+
+        # Use the past position to get the present velocity.
+        velocity = (pos_now - pos_last)/self.delta_t
+
+        # Shift the present poistion by the End Effector target.
+        # Since we subtract the target point from the current position, the optimal
+        # value for this will be 0.
+        position = np.asarray(pos_now)-np.asarray(self._hyperparams['ee_points_tgt'])[0, 0, :]
+
+        if debug:
+            print 'VELOCITY:', velocity
+            print 'POSITION:', position
+            print 'BEFORE  :', t_last.to_sec(), pos_last
+            print 'PRESENT :', t.to_sec(), pos_now
+
+        return position, velocity
+
+    def _process_observations(self, message, result):
+        """Helper fuinction only called by _run_trial to convert a ROS message
+        to joint angles and velocities.
+        Check for and handle the case where a message is either malformed
+        or contains joint values in an order different from that expected
+        in hyperparams['joint_order']"""
+
+
+        # Check if joint values are in the expected order and size.
+        if message.joint_names != self._hyperparams['joint_order']:
+
+            # Check that the message is of same size as the expected message.
+            if len(message.joint_names) != len(self._hyperparams['joint_order']):
+                raise MSG_INVALID_JOINT_NAMES_DIFFER
+
+            # Check that all the expected joint values are present in a message.
+            if not all(map(lambda x,y: x in y, message.joint_names, 
+                [self._valid_joint_set for _ in range(len(message.joint_names))])):
+
+                raise MSG_INVALID_JOINT_NAMES_DIFFER
+
+            # If necessary, reorder the joint values to conform to the order
+            # expected in hyperparams['joint_order'].
+            new_message = [None for _ in range(len(message))]
+            for joint, index in message.joint_names.enumerate():
+                for state_type in self._hyperparams['state_types']:
+                    new_message[self._valid_joint_index[joint]] = message[state_type][index]
+
+            message = new_message
+
+        # Package the positions, velocities, amd accellerations of the joint angles.
+        for (state_type, state_category), state_value_vector in zip(
+            self._hyperparams['state_types'].iteritems(),
+            [message.actual.positions, message.actual.velocities,
+            message.actual.accelerations]):
+
+            # Assert that the length of the value vector matches the corresponding
+            # number of dimensions from the hyperparameters file
+            assert len(state_value_vector) == self._hyperparams['sensor_dims'][state_category]
+
+            # Write the state value vector into the results dictionary keyed by its
+            # state category
+            result[state_category].append(state_value_vector)
+
+        return np.array(result[JOINT_ANGLES][-1] + result[JOINT_VELOCITIES][-1])
+
+    def _get_ur_trajectory_message(self, action, slowness):
+        """Helper function only called by reset() and run_trial().
+        Wraps an action vector of joint angles into a JointTrajectory message.
+        The velocities, accelerations, and effort do not control the arm motion"""
+
+        # Set up a trajectory message to publish.
+        action_msg = JointTrajectory()
+        action_msg.joint_names = self._hyperparams['joint_order']
+
+        # Create a point to tell the robot to move to.
+        target = JointTrajectoryPoint()
+        target.positions = action
+
+        # These times determine the speed at which the robot moves:
+        # it tries to reach the specified target position in 'slowness' time.
+        target.time_from_start = rospy.Duration(slowness)
+
+        # Package the single point into a trajectory of points with length 1.
+        action_msg.points = [target]
+
+        return action_msg
diff --git a/python/gps/agent/ur_ros/tree_urdf.py b/python/gps/agent/ur_ros/tree_urdf.py
new file mode 100644
index 000000000..81696bf21
--- /dev/null
+++ b/python/gps/agent/ur_ros/tree_urdf.py
@@ -0,0 +1,130 @@
+from __future__ import print_function
+
+import urdf_parser_py.urdf as urdf
+
+import PyKDL as kdl
+
+def treeFromFile(filename):
+    """
+    Construct a PyKDL.Tree from an URDF file.
+    :param filename: URDF file path
+    """
+
+    with open(filename) as urdf_file:
+        return treeFromUrdfModel(urdf.URDF.from_xml_string(urdf_file.read()))
+
+def treeFromParam(param):
+    """
+    Construct a PyKDL.Tree from an URDF in a ROS parameter.
+    :param param: Parameter name, ``str``
+    """
+
+    return treeFromUrdfModel(urdf.URDF.from_parameter_server())
+
+def treeFromString(xml):
+    """
+    Construct a PyKDL.Tree from an URDF xml string.
+    :param xml: URDF xml string, ``str``
+    """
+
+    return treeFromUrdfModel(urdf.URDF.from_xml_string(xml))
+
+def _toKdlPose(pose):
+    """
+    Helper function that packages a pose structure containing orientation values (roll, pitch, yaw)
+    and position values (x, y, z) into a KDL Frame.
+    """
+    if pose and pose.rpy and len(pose.rpy) == 3 and pose.xyz and len(pose.xyz) == 3:
+        return kdl.Frame(
+              kdl.Rotation.RPY(*pose.rpy),
+              kdl.Vector(*pose.xyz))
+    else:
+        return kdl.Frame.Identity()
+
+def _toKdlInertia(i):
+    # kdl specifies the inertia in the reference frame of the link, the urdf
+    # specifies the inertia in the inertia reference frame
+    origin = _toKdlPose(i.origin)
+    inertia = i.inertia
+    return origin.M * kdl.RigidBodyInertia(
+            i.mass, origin.p,
+            kdl.RotationalInertia(inertia.ixx, inertia.iyy, inertia.izz, inertia.ixy, inertia.ixz, inertia.iyz));
+
+def _toKdlJoint(jnt):
+    #  define a mapping for joints and kdl
+    fixed = lambda j,F: kdl.Joint(j.name, kdl.Joint.None)
+    rotational = lambda j,F: kdl.Joint(j.name, F.p, F.M * kdl.Vector(*j.axis), kdl.Joint.RotAxis)
+    translational = lambda j,F: kdl.Joint(j.name, F.p, F.M * kdl.Vector(*j.axis), kdl.Joint.TransAxis)
+
+    type_map = {
+            'fixed': fixed,
+            'revolute': rotational,
+            'continuous': rotational,
+            'prismatic': translational,
+            'floating': fixed,
+            'planar': fixed,
+            'unknown': fixed,
+            }
+
+    return type_map[jnt.type](jnt, _toKdlPose(jnt.origin))
+
+def _add_children_to_tree(robot_model, root, tree):
+    """
+    Helper function that adds children to a KDL tree.
+    """
+
+    # constructs the optional inertia
+    inert = kdl.RigidBodyInertia(0)
+    if root.inertial:
+        inert = _toKdlInertia(root.inertial)
+
+    # constructs the kdl joint
+    parent_joint_name, parent_link_name = robot_model.parent_map[root.name]
+    parent_joint = robot_model.joint_map[parent_joint_name]
+
+    # construct the kdl segment
+    sgm = kdl.Segment(
+        root.name,
+        _toKdlJoint(parent_joint),
+        _toKdlPose(parent_joint.origin),
+        inert)
+
+    # add segment to tree
+    if not tree.addSegment(sgm, parent_link_name):
+        return False
+
+    if root.name not in robot_model.child_map:
+        return True
+
+    children = [robot_model.link_map[l] for (j,l) in robot_model.child_map[root.name]]
+
+    # recurslively add all children
+    for child in children:
+        if not _add_children_to_tree(robot_model, child, tree):
+            return False
+
+    return True;
+
+def treeFromUrdfModel(robot_model, quiet=False):
+    """
+    Construct a PyKDL.Tree from an URDF model from urdf_parser_python.
+
+    :param robot_model: URDF xml string, ``str``
+    :param quiet: If true suppress messages to stdout, ``bool``
+    """
+
+    root = robot_model.link_map[robot_model.get_root()]
+
+    if root.inertial and not quiet:
+        print("The root link %s has an inertia specified in the URDF, but KDL does not support a root link with an inertia.  As a workaround, you can add an extra dummy link to your URDF." % root.name);
+
+    ok = True
+    tree = kdl.Tree(root.name)
+
+    #  add all children
+    for joint,child in robot_model.child_map[root.name]:
+        if not _add_children_to_tree(robot_model, robot_model.link_map[child], tree):
+            ok = False
+            break
+  
+    return (ok, tree)
diff --git a/python/gps/utility/general_utils.py b/python/gps/utility/general_utils.py
index 98cc3e49d..50f8a31d7 100644
--- a/python/gps/utility/general_utils.py
+++ b/python/gps/utility/general_utils.py
@@ -1,7 +1,6 @@
 """ This file defines general utility functions and classes. """
 import numpy as np
 
-
 class BundleType(object):
     """
     This class bundles many fields, similar to a record or a mutable
@@ -61,6 +60,36 @@ def finite_differences(func, inputs, func_output_shape=(), epsilon=1e-5):
     return gradient
 
 
+def get_position(tf, target, source, time):
+    """
+    Utility function that uses tf to return the position of target
+    relative to source at time
+    tf: Object that implements TransformListener
+    target: Valid label corresponding to target link
+    source: Valid label corresponding to source link
+    time: Time given in TF's time structure of secs and nsecs
+    """
+    
+    # Calculate the quaternion data for the relative position
+    # between the target and source.
+    translation, rot = tf.lookupTransform(target, source, time)
+
+    # Get rotation and translation matrix from the quaternion data.
+    # The top left 3x3 section is a rotation matrix.
+    # The far right column is a translation vector with 1 at the bottom.
+    # The bottom row is [0 0 0 1].
+    transform = np.asmatrix(tf.fromTranslationRotation(translation, rot))
+
+    # Get position relative to source by multiplying the rotation by 
+    # the translation. The -1 is for robot matching sign conventions.
+    position = -1 * (transform[:3, 3].T * transform[:3, :3])
+
+    # Convert from np.matrix to np.array
+    position = np.asarray(position)[0][:]
+
+    return position
+
+
 def approx_equal(a, b, threshold=1e-5):
     """
     Return whether two numbers are equal within an absolute threshold.
diff --git a/python/gps/utility/tree_urdf.py b/python/gps/utility/tree_urdf.py
new file mode 100644
index 000000000..81696bf21
--- /dev/null
+++ b/python/gps/utility/tree_urdf.py
@@ -0,0 +1,130 @@
+from __future__ import print_function
+
+import urdf_parser_py.urdf as urdf
+
+import PyKDL as kdl
+
+def treeFromFile(filename):
+    """
+    Construct a PyKDL.Tree from an URDF file.
+    :param filename: URDF file path
+    """
+
+    with open(filename) as urdf_file:
+        return treeFromUrdfModel(urdf.URDF.from_xml_string(urdf_file.read()))
+
+def treeFromParam(param):
+    """
+    Construct a PyKDL.Tree from an URDF in a ROS parameter.
+    :param param: Parameter name, ``str``
+    """
+
+    return treeFromUrdfModel(urdf.URDF.from_parameter_server())
+
+def treeFromString(xml):
+    """
+    Construct a PyKDL.Tree from an URDF xml string.
+    :param xml: URDF xml string, ``str``
+    """
+
+    return treeFromUrdfModel(urdf.URDF.from_xml_string(xml))
+
+def _toKdlPose(pose):
+    """
+    Helper function that packages a pose structure containing orientation values (roll, pitch, yaw)
+    and position values (x, y, z) into a KDL Frame.
+    """
+    if pose and pose.rpy and len(pose.rpy) == 3 and pose.xyz and len(pose.xyz) == 3:
+        return kdl.Frame(
+              kdl.Rotation.RPY(*pose.rpy),
+              kdl.Vector(*pose.xyz))
+    else:
+        return kdl.Frame.Identity()
+
+def _toKdlInertia(i):
+    # kdl specifies the inertia in the reference frame of the link, the urdf
+    # specifies the inertia in the inertia reference frame
+    origin = _toKdlPose(i.origin)
+    inertia = i.inertia
+    return origin.M * kdl.RigidBodyInertia(
+            i.mass, origin.p,
+            kdl.RotationalInertia(inertia.ixx, inertia.iyy, inertia.izz, inertia.ixy, inertia.ixz, inertia.iyz));
+
+def _toKdlJoint(jnt):
+    #  define a mapping for joints and kdl
+    fixed = lambda j,F: kdl.Joint(j.name, kdl.Joint.None)
+    rotational = lambda j,F: kdl.Joint(j.name, F.p, F.M * kdl.Vector(*j.axis), kdl.Joint.RotAxis)
+    translational = lambda j,F: kdl.Joint(j.name, F.p, F.M * kdl.Vector(*j.axis), kdl.Joint.TransAxis)
+
+    type_map = {
+            'fixed': fixed,
+            'revolute': rotational,
+            'continuous': rotational,
+            'prismatic': translational,
+            'floating': fixed,
+            'planar': fixed,
+            'unknown': fixed,
+            }
+
+    return type_map[jnt.type](jnt, _toKdlPose(jnt.origin))
+
+def _add_children_to_tree(robot_model, root, tree):
+    """
+    Helper function that adds children to a KDL tree.
+    """
+
+    # constructs the optional inertia
+    inert = kdl.RigidBodyInertia(0)
+    if root.inertial:
+        inert = _toKdlInertia(root.inertial)
+
+    # constructs the kdl joint
+    parent_joint_name, parent_link_name = robot_model.parent_map[root.name]
+    parent_joint = robot_model.joint_map[parent_joint_name]
+
+    # construct the kdl segment
+    sgm = kdl.Segment(
+        root.name,
+        _toKdlJoint(parent_joint),
+        _toKdlPose(parent_joint.origin),
+        inert)
+
+    # add segment to tree
+    if not tree.addSegment(sgm, parent_link_name):
+        return False
+
+    if root.name not in robot_model.child_map:
+        return True
+
+    children = [robot_model.link_map[l] for (j,l) in robot_model.child_map[root.name]]
+
+    # recurslively add all children
+    for child in children:
+        if not _add_children_to_tree(robot_model, child, tree):
+            return False
+
+    return True;
+
+def treeFromUrdfModel(robot_model, quiet=False):
+    """
+    Construct a PyKDL.Tree from an URDF model from urdf_parser_python.
+
+    :param robot_model: URDF xml string, ``str``
+    :param quiet: If true suppress messages to stdout, ``bool``
+    """
+
+    root = robot_model.link_map[robot_model.get_root()]
+
+    if root.inertial and not quiet:
+        print("The root link %s has an inertia specified in the URDF, but KDL does not support a root link with an inertia.  As a workaround, you can add an extra dummy link to your URDF." % root.name);
+
+    ok = True
+    tree = kdl.Tree(root.name)
+
+    #  add all children
+    for joint,child in robot_model.child_map[root.name]:
+        if not _add_children_to_tree(robot_model, robot_model.link_map[child], tree):
+            ok = False
+            break
+  
+    return (ok, tree)