RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.problems.helicopter; 00002 00003 import java.util.Random; 00004 00005 import rlpark.plugin.rltoys.envio.actions.Action; 00006 import rlpark.plugin.rltoys.envio.actions.ActionArray; 00007 import rlpark.plugin.rltoys.envio.observations.Legend; 00008 import rlpark.plugin.rltoys.envio.rl.TRStep; 00009 import rlpark.plugin.rltoys.math.ranges.Range; 00010 import rlpark.plugin.rltoys.problems.ProblemBounded; 00011 import rlpark.plugin.rltoys.problems.ProblemContinuousAction; 00012 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00013 00014 public class Helicopter implements ProblemBounded, ProblemContinuousAction { 00015 static final private Legend legend = new Legend("VelocityX", "VelocityY", "VelocityZ", "PositionX", "PositionY", 00016 "PositionZ", "AngularVelocityX", "AngularVelocityY", 00017 "AngularVelocityZ", "ErrorQuaternionX", "ErrorQuaternionY", 00018 "ErrorQuaternionZ"); 00019 @Monitor 00020 private final HelicopterDynamics heliDynamics; 00021 private TRStep step; 00022 private final int episodeLength; 00023 static private final int DefaultEpisodeLength = 6000; 00024 00025 public Helicopter(Random random) { 00026 this(random, DefaultEpisodeLength); 00027 } 00028 00029 public Helicopter(Random random, int episodeLength) { 00030 this.episodeLength = episodeLength; 00031 heliDynamics = new HelicopterDynamics(random); 00032 } 00033 00034 @Override 00035 public TRStep initialize() { 00036 heliDynamics.reset(); 00037 step = new TRStep(heliDynamics.getObservation(), computeReward()); 00038 return step; 00039 } 00040 00041 private double computeReward() { 00042 if (heliDynamics.isCrashed()) 00043 return computeTerminalReward(); 00044 double reward = 0; 00045 reward -= heliDynamics.velocity.x * heliDynamics.velocity.x; 00046 reward -= heliDynamics.velocity.y * heliDynamics.velocity.y; 00047 reward -= heliDynamics.velocity.z * heliDynamics.velocity.z; 00048 reward -= heliDynamics.position.x * heliDynamics.position.x; 00049 reward -= heliDynamics.position.y * heliDynamics.position.y; 00050 reward -= heliDynamics.position.z * heliDynamics.position.z; 00051 reward -= heliDynamics.angularRate.x * heliDynamics.angularRate.x; 00052 reward -= heliDynamics.angularRate.y * heliDynamics.angularRate.y; 00053 reward -= heliDynamics.angularRate.z * heliDynamics.angularRate.z; 00054 reward -= heliDynamics.q.x * heliDynamics.q.x; 00055 reward -= heliDynamics.q.y * heliDynamics.q.y; 00056 reward -= heliDynamics.q.z * heliDynamics.q.z; 00057 return reward; 00058 } 00059 00060 private double computeTerminalReward() { 00061 double reward = -3.0f 00062 * HelicopterDynamics.MaxPos 00063 * HelicopterDynamics.MaxPos 00064 + -3.0f 00065 * HelicopterDynamics.MaxRate 00066 * HelicopterDynamics.MaxRate 00067 + -3.0f 00068 * HelicopterDynamics.MaxVel 00069 * HelicopterDynamics.MaxVel 00070 - (1.0f - HelicopterDynamics.MIN_QW_BEFORE_HITTING_TERMINAL_STATE 00071 * HelicopterDynamics.MIN_QW_BEFORE_HITTING_TERMINAL_STATE); 00072 reward *= episodeLength - step.time; 00073 return reward; 00074 } 00075 00076 @Override 00077 public TRStep step(Action action) { 00078 heliDynamics.step((ActionArray) action); 00079 step = new TRStep(step, action, heliDynamics.getObservation(), computeReward()); 00080 if (heliDynamics.isCrashed() || step.time == episodeLength) 00081 forceEndEpisode(); 00082 return step; 00083 } 00084 00085 @Override 00086 public TRStep forceEndEpisode() { 00087 step = step.createEndingStep(); 00088 return step; 00089 } 00090 00091 @Override 00092 public TRStep lastStep() { 00093 return step; 00094 } 00095 00096 @Override 00097 public Legend legend() { 00098 return legend; 00099 } 00100 00101 @Override 00102 public Range[] actionRanges() { 00103 return HelicopterDynamics.ActionRanges; 00104 } 00105 00106 @Override 00107 public Range[] getObservationRanges() { 00108 return HelicopterDynamics.ObservationRanges; 00109 } 00110 }