RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.envio.rl; 00002 00003 import java.io.Serializable; 00004 import java.util.Arrays; 00005 00006 import rlpark.plugin.rltoys.envio.actions.Action; 00007 import rlpark.plugin.rltoys.envio.observations.ObsAsDoubles; 00008 import rlpark.plugin.rltoys.utils.Utils; 00009 00010 public class TRStep implements ObsAsDoubles, Serializable { 00011 private static final long serialVersionUID = 5694217784539677187L; 00012 // Time of o_tp1 00013 final public long time; 00014 final public double[] o_t; 00015 final public Action a_t; 00016 final public double[] o_tp1; 00017 final public double r_tp1; 00018 public final boolean endEpisode; 00019 00020 public TRStep(double[] o_tp1, double reward) { 00021 this(0, null, null, o_tp1, reward, false); 00022 } 00023 00024 public TRStep(TRStep step_t, Action a_t, double[] o_tp1, double r_tp1) { 00025 this(step_t.time + 1, step_t.o_tp1, a_t, o_tp1, r_tp1, false); 00026 } 00027 00028 public TRStep(long time, double[] o_t, Action a_t, double[] o_tp1, double r_tp1, boolean endEpisode) { 00029 this.time = time; 00030 this.endEpisode = endEpisode; 00031 assert (o_t == null && a_t == null) || (o_t != null && a_t != null); 00032 this.o_t = o_t == null ? null : o_t.clone(); 00033 this.a_t = a_t; 00034 assert o_tp1 != null; 00035 this.o_tp1 = o_tp1.clone(); 00036 assert Utils.checkValue(r_tp1); 00037 this.r_tp1 = r_tp1; 00038 } 00039 00040 public TRStep createEndingStep() { 00041 return new TRStep(time, o_t, a_t, o_tp1, r_tp1, true); 00042 } 00043 00044 @Override 00045 public String toString() { 00046 return String.format("T=%d: %s,%s->%s,r=%f", time, Arrays.toString(o_t), a_t, Arrays.toString(o_tp1), r_tp1); 00047 } 00048 00049 public boolean isEpisodeStarting() { 00050 return o_t == null; 00051 } 00052 00053 00054 public boolean isEpisodeEnding() { 00055 return endEpisode; 00056 } 00057 00058 @Override 00059 public double[] doubleValues() { 00060 return o_tp1; 00061 } 00062 }