RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 /* Helicopter Domain for RL - Competition - RLAI's Port of Pieter Abbeel's code submission 00002 * Copyright (C) 2007, Pieter Abbeel 00003 * Ported by Mark Lee and Brian Tanner from C++ to Java for the 2008 RL-Competition and beyond. 00004 * Adapted to RLPark from http://library.rl-community.org/wiki/Helicopter_%28Java%29 00005 * 00006 * This program is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU General Public License 00008 * as published by the Free Software Foundation; either version 2 00009 * of the License, or (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00019 */ 00020 package rlpark.plugin.rltoys.problems.helicopter; 00021 00022 import java.util.Random; 00023 00024 import rlpark.plugin.rltoys.envio.actions.ActionArray; 00025 import rlpark.plugin.rltoys.math.ranges.Range; 00026 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00027 00028 00029 public class HelicopterDynamics { 00030 00031 /* some constants indexing into the helicopter's state */ 00032 static final int ndot_idx = 0; // north velocity 00033 static final int edot_idx = 1; // east velocity 00034 static final int ddot_idx = 2; // down velocity 00035 static final int n_idx = 3; // north 00036 static final int e_idx = 4; // east 00037 static final int d_idx = 5; // down 00038 static final int p_idx = 6; // angular rate around forward axis 00039 static final int q_idx = 7; // angular rate around sideways (to the right) 00040 // axis 00041 static final int r_idx = 8; // angular rate around vertical (downward) axis 00042 static final int qx_idx = 9; // quaternion entries, x,y,z,w q = [ sin(theta/2) 00043 // * axis; cos(theta/2)] 00044 static final int qy_idx = 10; // where axis = axis of rotation; theta is 00045 // amount of rotation around that axis 00046 static final int qz_idx = 11; // [recall: any rotation can be represented by a 00047 // single rotation around some axis] 00048 final static int qw_idx = 12; 00049 final static int state_size = 13; 00050 00051 static final int NUMOBS = 12; 00052 // note: observation returned is not the state itself, but the "error state" 00053 // expressed in the helicopter's frame (which allows for a simpler mapping 00054 // from observation to inputs) 00055 // observation consists of: 00056 // u, v, w : velocities in helicopter frame 00057 // xerr, yerr, zerr: position error expressed in frame attached to helicopter 00058 // [xyz correspond to ned when helicopter is in "neutral" orientation, i.e., 00059 // level and facing north] 00060 // p, q, r 00061 // qx, qy, qz 00062 private final double wind[] = new double[2]; 00063 00064 // upper bounds on values state variables can take on (required by rl_glue to 00065 // be put into a string at environment initialization) 00066 public static final double MaxVel = 5.0; // m/s 00067 public static final double MaxPos = 20.0; 00068 public static final double MaxRate = 2 * 3.1415 * 2; 00069 public static final double MAX_QUAT = 1.0; 00070 public static final double MIN_QW_BEFORE_HITTING_TERMINAL_STATE = Math.cos(30.0 / 2.0 * Math.PI / 180.0); 00071 public static final double MaxAction = 1.0; 00072 public static final double WIND_MAX = 5.0; // 00073 public static final Range[] ObservationRanges = new Range[] { new Range(-MaxVel, MaxVel), new Range(-MaxVel, MaxVel), 00074 new Range(-MaxVel, MaxVel), new Range(-MaxPos, MaxPos), new Range(-MaxPos, MaxPos), new Range(-MaxPos, MaxPos), 00075 new Range(-MaxRate, MaxRate), new Range(-MaxRate, MaxRate), new Range(-MaxRate, MaxRate), 00076 new Range(-MAX_QUAT, MAX_QUAT), new Range(-MAX_QUAT, MAX_QUAT), new Range(-MAX_QUAT, MAX_QUAT) }; 00077 public static final Range[] ActionRanges = new Range[] { new Range(-MaxAction, MaxAction), 00078 new Range(-MaxAction, MaxAction), new Range(-MaxAction, MaxAction), new Range(-MaxAction, MaxAction) }; 00079 // very crude helicopter model, okay around hover: 00080 public static final double heli_model_u_drag = 0.18; 00081 public static final double heli_model_v_drag = 0.43; 00082 public static final double heli_model_w_drag = 0.49; 00083 public static final double heli_model_p_drag = 12.78; 00084 public static final double heli_model_q_drag = 10.12; 00085 public static final double heli_model_r_drag = 8.16; 00086 public static final double heli_model_u0_p = 33.04; 00087 public static final double heli_model_u1_q = -33.32; 00088 public static final double heli_model_u2_r = 70.54; 00089 public static final double heli_model_u3_w = -42.15; 00090 public static final double heli_model_tail_rotor_side_thrust = -0.54; 00091 public static final double DeltaT = .1; // simulation time scale [time scale 00092 // for 00093 // control --- 00094 // internally we integrate at 100Hz for simulating the 00095 // dynamics] 00096 @Monitor 00097 final public HeliVector velocity = new HeliVector(0.0d, 0.0d, 0.0d); 00098 @Monitor 00099 final public HeliVector position = new HeliVector(0.0d, 0.0d, 0.0d); 00100 @Monitor 00101 final public HeliVector angularRate = new HeliVector(0.0d, 0.0d, 0.0d); 00102 public Quaternion q = new Quaternion(0.0d, 0.0d, 0.0d, 1.0d); 00103 final private double noise[] = new double[6]; 00104 final private Random random; 00105 00106 public HelicopterDynamics(Random random) { 00107 this.random = random; 00108 } 00109 00110 public void reset() { 00111 velocity.reset(); 00112 position.reset(); 00113 angularRate.reset(); 00114 q.reset(); 00115 } 00116 00117 private double boxMull() { 00118 double x1 = random.nextDouble(); 00119 double x2 = random.nextDouble(); 00120 return Math.sqrt(-2.0f * Math.log(x1)) * Math.cos(2.0f * Math.PI * x2); 00121 } 00122 00123 public double[] getObservation() { 00124 double[] observation = new double[NUMOBS]; 00125 // observation is the error state in the helicopter's coordinate system 00126 // (that way errors/observations can be mapped more directly to actions) 00127 HeliVector ned_error_in_heli_frame = position.express_in_quat_frame(q); 00128 HeliVector uvw = velocity.express_in_quat_frame(q); 00129 00130 observation[0] = uvw.x; 00131 observation[1] = uvw.y; 00132 observation[2] = uvw.z; 00133 00134 observation[n_idx] = ned_error_in_heli_frame.x; 00135 observation[e_idx] = ned_error_in_heli_frame.y; 00136 observation[d_idx] = ned_error_in_heli_frame.z; 00137 observation[p_idx] = angularRate.x; 00138 observation[q_idx] = angularRate.y; 00139 observation[r_idx] = angularRate.z; 00140 00141 // the error quaternion gets negated, b/c we consider the rotation required 00142 // to bring the helicopter back to target in the helicopter's frame 00143 observation[qx_idx] = q.x; 00144 observation[qy_idx] = q.y; 00145 observation[qz_idx] = q.z; 00146 00147 for (int i = 0; i < NUMOBS; i++) 00148 observation[i] = ObservationRanges[i].bound(observation[i]); 00149 return observation; 00150 } 00151 00152 public void step(ActionArray agentAction) { 00153 double[] a = new double[4]; 00154 // saturate all the actions, b/c the actuators are limited: 00155 // [real helicopter's saturation is of course somewhat different, depends on 00156 // swash plate mixing etc ... ] 00157 for (int a_i = 0; a_i < a.length; a_i++) 00158 a[a_i] = ActionRanges[a_i].bound(agentAction.actions[a_i]); 00159 00160 00161 final double noise_mult = 2.0; 00162 final double noise_std[] = { 0.1941, 0.2975, 0.6058, 0.1508, 0.2492, 0.0734 }; // u, 00163 // v, 00164 // w, 00165 // p, 00166 // q, 00167 // r 00168 double noise_memory = .8; 00169 // generate Gaussian random numbers 00170 00171 for (int i = 0; i < 6; ++i) 00172 noise[i] = noise_memory * noise[i] + (1.0d - noise_memory) * boxMull() * noise_std[i] * noise_mult; 00173 00174 for (int t = 0; t < 10; ++t) { 00175 00176 // Euler integration: 00177 00178 // *** position *** 00179 position.x += DeltaT * velocity.x; 00180 position.y += DeltaT * velocity.y; 00181 position.z += DeltaT * velocity.z; 00182 00183 // *** velocity *** 00184 HeliVector uvw = velocity.express_in_quat_frame(q); 00185 HeliVector wind_ned = new HeliVector(wind[0], wind[1], 0.0); 00186 HeliVector wind_uvw = wind_ned.express_in_quat_frame(q); 00187 HeliVector uvw_force_from_heli_over_m = new HeliVector(-heli_model_u_drag * (uvw.x + wind_uvw.x) + noise[0], 00188 -heli_model_v_drag * (uvw.y + wind_uvw.y) 00189 + heli_model_tail_rotor_side_thrust + noise[1], 00190 -heli_model_w_drag * uvw.z + heli_model_u3_w * a[3] 00191 + noise[2]); 00192 00193 HeliVector ned_force_from_heli_over_m = uvw_force_from_heli_over_m.rotate(q); 00194 velocity.x += DeltaT * ned_force_from_heli_over_m.x; 00195 velocity.y += DeltaT * ned_force_from_heli_over_m.y; 00196 velocity.z += DeltaT * (ned_force_from_heli_over_m.z + 9.81d); 00197 00198 // *** orientation *** 00199 HeliVector axis_rotation = new HeliVector(angularRate.x * DeltaT, angularRate.y * DeltaT, angularRate.z * DeltaT); 00200 Quaternion rot_quat = axis_rotation.to_quaternion(); 00201 q = q.mult(rot_quat); 00202 00203 // *** angular rate *** 00204 double p_dot = -heli_model_p_drag * angularRate.x + heli_model_u0_p * a[0] + noise[3]; 00205 double q_dot = -heli_model_q_drag * angularRate.y + heli_model_u1_q * a[1] + noise[4]; 00206 double r_dot = -heli_model_r_drag * angularRate.z + heli_model_u2_r * a[2] + noise[5]; 00207 00208 angularRate.x += DeltaT * p_dot; 00209 angularRate.y += DeltaT * q_dot; 00210 angularRate.z += DeltaT * r_dot; 00211 } 00212 } 00213 00214 public boolean isCrashed() { 00215 return Math.abs(position.x) > MaxPos || Math.abs(position.y) > MaxPos || Math.abs(position.z) > MaxPos; 00216 } 00217 }