RLPark: TRStep.java Source File

RLPark 1.0.0
Reinforcement Learning Framework in Java
File List
00001 package rlpark.plugin.rltoys.envio.rl;
00002 
00003 import java.io.Serializable;
00004 import java.util.Arrays;
00005 
00006 import rlpark.plugin.rltoys.envio.actions.Action;
00007 import rlpark.plugin.rltoys.envio.observations.ObsAsDoubles;
00008 import rlpark.plugin.rltoys.utils.Utils;
00009 
00010 public class TRStep implements ObsAsDoubles, Serializable {
00011   private static final long serialVersionUID = 5694217784539677187L;
00012   // Time of o_tp1
00013   final public long time;
00014   final public double[] o_t;
00015   final public Action a_t;
00016   final public double[] o_tp1;
00017   final public double r_tp1;
00018   public final boolean endEpisode;
00019 
00020   public TRStep(double[] o_tp1, double reward) {
00021     this(0, null, null, o_tp1, reward, false);
00022   }
00023 
00024   public TRStep(TRStep step_t, Action a_t, double[] o_tp1, double r_tp1) {
00025     this(step_t.time + 1, step_t.o_tp1, a_t, o_tp1, r_tp1, false);
00026   }
00027 
00028   public TRStep(long time, double[] o_t, Action a_t, double[] o_tp1, double r_tp1, boolean endEpisode) {
00029     this.time = time;
00030     this.endEpisode = endEpisode;
00031     assert (o_t == null && a_t == null) || (o_t != null && a_t != null);
00032     this.o_t = o_t == null ? null : o_t.clone();
00033     this.a_t = a_t;
00034     assert o_tp1 != null;
00035     this.o_tp1 = o_tp1.clone();
00036     assert Utils.checkValue(r_tp1);
00037     this.r_tp1 = r_tp1;
00038   }
00039 
00040   public TRStep createEndingStep() {
00041     return new TRStep(time, o_t, a_t, o_tp1, r_tp1, true);
00042   }
00043 
00044   @Override
00045   public String toString() {
00046     return String.format("T=%d: %s,%s->%s,r=%f", time, Arrays.toString(o_t), a_t, Arrays.toString(o_tp1), r_tp1);
00047   }
00048 
00049   public boolean isEpisodeStarting() {
00050     return o_t == null;
00051   }
00052 
00053 
00054   public boolean isEpisodeEnding() {
00055     return endEpisode;
00056   }
00057 
00058   @Override
00059   public double[] doubleValues() {
00060     return o_tp1;
00061   }
00062 }