RLPark 1.0.0
Reinforcement Learning Framework in Java

HelicopterDynamics.java

Go to the documentation of this file.
00001 /* Helicopter Domain for RL - Competition - RLAI's Port of Pieter Abbeel's code submission
00002  * Copyright (C) 2007, Pieter Abbeel
00003  * Ported by Mark Lee and Brian Tanner from C++ to Java for the 2008 RL-Competition and beyond. 
00004  * Adapted to RLPark from http://library.rl-community.org/wiki/Helicopter_%28Java%29
00005  * 
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License
00008  * as published by the Free Software Foundation; either version 2
00009  * of the License, or (at your option) any later version.
00010  * 
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  * 
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
00019  */
00020 package rlpark.plugin.rltoys.problems.helicopter;
00021 
00022 import java.util.Random;
00023 
00024 import rlpark.plugin.rltoys.envio.actions.ActionArray;
00025 import rlpark.plugin.rltoys.math.ranges.Range;
00026 import zephyr.plugin.core.api.monitoring.annotations.Monitor;
00027 
00028 
00029 public class HelicopterDynamics {
00030 
00031   /* some constants indexing into the helicopter's state */
00032   static final int ndot_idx = 0; // north velocity
00033   static final int edot_idx = 1; // east velocity
00034   static final int ddot_idx = 2; // down velocity
00035   static final int n_idx = 3; // north
00036   static final int e_idx = 4; // east
00037   static final int d_idx = 5; // down
00038   static final int p_idx = 6; // angular rate around forward axis
00039   static final int q_idx = 7; // angular rate around sideways (to the right)
00040                               // axis
00041   static final int r_idx = 8; // angular rate around vertical (downward) axis
00042   static final int qx_idx = 9; // quaternion entries, x,y,z,w q = [ sin(theta/2)
00043                                // * axis; cos(theta/2)]
00044   static final int qy_idx = 10; // where axis = axis of rotation; theta is
00045                                 // amount of rotation around that axis
00046   static final int qz_idx = 11; // [recall: any rotation can be represented by a
00047                                 // single rotation around some axis]
00048   final static int qw_idx = 12;
00049   final static int state_size = 13;
00050 
00051   static final int NUMOBS = 12;
00052   // note: observation returned is not the state itself, but the "error state"
00053   // expressed in the helicopter's frame (which allows for a simpler mapping
00054   // from observation to inputs)
00055   // observation consists of:
00056   // u, v, w : velocities in helicopter frame
00057   // xerr, yerr, zerr: position error expressed in frame attached to helicopter
00058   // [xyz correspond to ned when helicopter is in "neutral" orientation, i.e.,
00059   // level and facing north]
00060   // p, q, r
00061   // qx, qy, qz
00062   private final double wind[] = new double[2];
00063 
00064   // upper bounds on values state variables can take on (required by rl_glue to
00065   // be put into a string at environment initialization)
00066   public static final double MaxVel = 5.0; // m/s
00067   public static final double MaxPos = 20.0;
00068   public static final double MaxRate = 2 * 3.1415 * 2;
00069   public static final double MAX_QUAT = 1.0;
00070   public static final double MIN_QW_BEFORE_HITTING_TERMINAL_STATE = Math.cos(30.0 / 2.0 * Math.PI / 180.0);
00071   public static final double MaxAction = 1.0;
00072   public static final double WIND_MAX = 5.0; //
00073   public static final Range[] ObservationRanges = new Range[] { new Range(-MaxVel, MaxVel), new Range(-MaxVel, MaxVel),
00074       new Range(-MaxVel, MaxVel), new Range(-MaxPos, MaxPos), new Range(-MaxPos, MaxPos), new Range(-MaxPos, MaxPos),
00075       new Range(-MaxRate, MaxRate), new Range(-MaxRate, MaxRate), new Range(-MaxRate, MaxRate),
00076       new Range(-MAX_QUAT, MAX_QUAT), new Range(-MAX_QUAT, MAX_QUAT), new Range(-MAX_QUAT, MAX_QUAT) };
00077   public static final Range[] ActionRanges = new Range[] { new Range(-MaxAction, MaxAction),
00078       new Range(-MaxAction, MaxAction), new Range(-MaxAction, MaxAction), new Range(-MaxAction, MaxAction) };
00079   // very crude helicopter model, okay around hover:
00080   public static final double heli_model_u_drag = 0.18;
00081   public static final double heli_model_v_drag = 0.43;
00082   public static final double heli_model_w_drag = 0.49;
00083   public static final double heli_model_p_drag = 12.78;
00084   public static final double heli_model_q_drag = 10.12;
00085   public static final double heli_model_r_drag = 8.16;
00086   public static final double heli_model_u0_p = 33.04;
00087   public static final double heli_model_u1_q = -33.32;
00088   public static final double heli_model_u2_r = 70.54;
00089   public static final double heli_model_u3_w = -42.15;
00090   public static final double heli_model_tail_rotor_side_thrust = -0.54;
00091   public static final double DeltaT = .1; // simulation time scale [time scale
00092                                           // for
00093   // control ---
00094   // internally we integrate at 100Hz for simulating the
00095   // dynamics]
00096   @Monitor
00097   final public HeliVector velocity = new HeliVector(0.0d, 0.0d, 0.0d);
00098   @Monitor
00099   final public HeliVector position = new HeliVector(0.0d, 0.0d, 0.0d);
00100   @Monitor
00101   final public HeliVector angularRate = new HeliVector(0.0d, 0.0d, 0.0d);
00102   public Quaternion q = new Quaternion(0.0d, 0.0d, 0.0d, 1.0d);
00103   final private double noise[] = new double[6];
00104   final private Random random;
00105 
00106   public HelicopterDynamics(Random random) {
00107     this.random = random;
00108   }
00109 
00110   public void reset() {
00111     velocity.reset();
00112     position.reset();
00113     angularRate.reset();
00114     q.reset();
00115   }
00116 
00117   private double boxMull() {
00118     double x1 = random.nextDouble();
00119     double x2 = random.nextDouble();
00120     return Math.sqrt(-2.0f * Math.log(x1)) * Math.cos(2.0f * Math.PI * x2);
00121   }
00122 
00123   public double[] getObservation() {
00124     double[] observation = new double[NUMOBS];
00125     // observation is the error state in the helicopter's coordinate system
00126     // (that way errors/observations can be mapped more directly to actions)
00127     HeliVector ned_error_in_heli_frame = position.express_in_quat_frame(q);
00128     HeliVector uvw = velocity.express_in_quat_frame(q);
00129 
00130     observation[0] = uvw.x;
00131     observation[1] = uvw.y;
00132     observation[2] = uvw.z;
00133 
00134     observation[n_idx] = ned_error_in_heli_frame.x;
00135     observation[e_idx] = ned_error_in_heli_frame.y;
00136     observation[d_idx] = ned_error_in_heli_frame.z;
00137     observation[p_idx] = angularRate.x;
00138     observation[q_idx] = angularRate.y;
00139     observation[r_idx] = angularRate.z;
00140 
00141     // the error quaternion gets negated, b/c we consider the rotation required
00142     // to bring the helicopter back to target in the helicopter's frame
00143     observation[qx_idx] = q.x;
00144     observation[qy_idx] = q.y;
00145     observation[qz_idx] = q.z;
00146 
00147     for (int i = 0; i < NUMOBS; i++)
00148       observation[i] = ObservationRanges[i].bound(observation[i]);
00149     return observation;
00150   }
00151 
00152   public void step(ActionArray agentAction) {
00153     double[] a = new double[4];
00154     // saturate all the actions, b/c the actuators are limited:
00155     // [real helicopter's saturation is of course somewhat different, depends on
00156     // swash plate mixing etc ... ]
00157     for (int a_i = 0; a_i < a.length; a_i++)
00158       a[a_i] = ActionRanges[a_i].bound(agentAction.actions[a_i]);
00159 
00160 
00161     final double noise_mult = 2.0;
00162     final double noise_std[] = { 0.1941, 0.2975, 0.6058, 0.1508, 0.2492, 0.0734 }; // u,
00163                                                                                    // v,
00164                                                                                    // w,
00165                                                                                    // p,
00166                                                                                    // q,
00167                                                                                    // r
00168     double noise_memory = .8;
00169     // generate Gaussian random numbers
00170 
00171     for (int i = 0; i < 6; ++i)
00172       noise[i] = noise_memory * noise[i] + (1.0d - noise_memory) * boxMull() * noise_std[i] * noise_mult;
00173 
00174     for (int t = 0; t < 10; ++t) {
00175 
00176       // Euler integration:
00177 
00178       // *** position ***
00179       position.x += DeltaT * velocity.x;
00180       position.y += DeltaT * velocity.y;
00181       position.z += DeltaT * velocity.z;
00182 
00183       // *** velocity ***
00184       HeliVector uvw = velocity.express_in_quat_frame(q);
00185       HeliVector wind_ned = new HeliVector(wind[0], wind[1], 0.0);
00186       HeliVector wind_uvw = wind_ned.express_in_quat_frame(q);
00187       HeliVector uvw_force_from_heli_over_m = new HeliVector(-heli_model_u_drag * (uvw.x + wind_uvw.x) + noise[0],
00188                                                              -heli_model_v_drag * (uvw.y + wind_uvw.y)
00189                                                                  + heli_model_tail_rotor_side_thrust + noise[1],
00190                                                              -heli_model_w_drag * uvw.z + heli_model_u3_w * a[3]
00191                                                                  + noise[2]);
00192 
00193       HeliVector ned_force_from_heli_over_m = uvw_force_from_heli_over_m.rotate(q);
00194       velocity.x += DeltaT * ned_force_from_heli_over_m.x;
00195       velocity.y += DeltaT * ned_force_from_heli_over_m.y;
00196       velocity.z += DeltaT * (ned_force_from_heli_over_m.z + 9.81d);
00197 
00198       // *** orientation ***
00199       HeliVector axis_rotation = new HeliVector(angularRate.x * DeltaT, angularRate.y * DeltaT, angularRate.z * DeltaT);
00200       Quaternion rot_quat = axis_rotation.to_quaternion();
00201       q = q.mult(rot_quat);
00202 
00203       // *** angular rate ***
00204       double p_dot = -heli_model_p_drag * angularRate.x + heli_model_u0_p * a[0] + noise[3];
00205       double q_dot = -heli_model_q_drag * angularRate.y + heli_model_u1_q * a[1] + noise[4];
00206       double r_dot = -heli_model_r_drag * angularRate.z + heli_model_u2_r * a[2] + noise[5];
00207 
00208       angularRate.x += DeltaT * p_dot;
00209       angularRate.y += DeltaT * q_dot;
00210       angularRate.z += DeltaT * r_dot;
00211     }
00212   }
00213 
00214   public boolean isCrashed() {
00215     return Math.abs(position.x) > MaxPos || Math.abs(position.y) > MaxPos || Math.abs(position.z) > MaxPos;
00216   }
00217 }
 All Classes Namespaces Files Functions Variables Enumerations
Zephyr
RLPark