RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.problems.stategraph; 00002 00003 import java.util.Random; 00004 00005 import rlpark.plugin.rltoys.envio.actions.Action; 00006 import rlpark.plugin.rltoys.envio.policy.ConstantPolicy; 00007 import rlpark.plugin.rltoys.envio.policy.Policy; 00008 00009 @SuppressWarnings("serial") 00010 public class RandomWalk extends FiniteStateGraph { 00011 static private final double Gamma = .9; 00012 static public final GraphState TL = new GraphState("TL", 0.0); 00013 static public final GraphState A = new GraphState("A", 0.0); 00014 static public final GraphState B = new GraphState("B", 0.0); 00015 static public final GraphState C = new GraphState("C", 0.0); 00016 static public final GraphState D = new GraphState("D", 0.0); 00017 static public final GraphState E = new GraphState("E", 0.0); 00018 static public final GraphState TR = new GraphState("TR", 1.0); 00019 static public final Action Left = new Action() { 00020 @Override 00021 public String toString() { 00022 return "left"; 00023 }; 00024 }; 00025 static public final Action Right = new Action() { 00026 @Override 00027 public String toString() { 00028 return "right"; 00029 }; 00030 }; 00031 00032 static { 00033 A.connect(Left, TL); 00034 A.connect(Right, B); 00035 00036 B.connect(Left, A); 00037 B.connect(Right, C); 00038 00039 C.connect(Left, B); 00040 C.connect(Right, D); 00041 00042 D.connect(Left, C); 00043 D.connect(Right, E); 00044 00045 E.connect(Left, D); 00046 E.connect(Right, TR); 00047 } 00048 00049 public RandomWalk(Random random) { 00050 this(newPolicy(random, 0.5)); 00051 } 00052 00053 00054 public RandomWalk(Policy policy) { 00055 super(policy, new GraphState[] { TL, A, B, C, D, E, TR }); 00056 setInitialState(C); 00057 } 00058 00059 @Override 00060 public double[] expectedDiscountedSolution() { 00061 return new double[] { 0.056, 0.140, 0.258, 0.431, 0.644 }; 00062 } 00063 00064 public static ConstantPolicy newPolicy(Random random, double leftProbability) { 00065 return new ConstantPolicy(random, new Action[] { Left, Right }, 00066 new double[] { leftProbability, 1 - leftProbability }); 00067 } 00068 00069 @Override 00070 public Action[] actions() { 00071 return new Action[] { Left, Right }; 00072 } 00073 00074 00075 @Override 00076 public double gamma() { 00077 return Gamma; 00078 } 00079 }