RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.problems.stategraph02; 00002 00003 import java.util.Random; 00004 00005 import rlpark.plugin.rltoys.algorithms.functions.states.Projector; 00006 import rlpark.plugin.rltoys.envio.actions.Action; 00007 import rlpark.plugin.rltoys.envio.policy.Policy; 00008 import rlpark.plugin.rltoys.envio.policy.SingleActionPolicy; 00009 import rlpark.plugin.rltoys.math.vector.RealVector; 00010 import rlpark.plugin.rltoys.math.vector.implementations.BVector; 00011 00012 public class TrackingProblem { 00013 static public final double Gamma = .9; 00014 static public final double SameStateProbability = .99; 00015 static public final State A = new State("A", -1.0); 00016 static public final State B = new State("B", 1.0); 00017 static public final State C = new State("C", -3.0); 00018 static public Action Move = new Action() { 00019 private static final long serialVersionUID = -4236679466464277389L; 00020 }; 00021 static public final Policy acting = new SingleActionPolicy(Move); 00022 00023 static class TrackingProjector implements Projector { 00024 private static final long serialVersionUID = 6604066132865938651L; 00025 private final BVector stateVector; 00026 00027 public TrackingProjector(StateGraph stateGraph, int nbApproximatedStates) { 00028 stateVector = new BVector(stateGraph.nbStates() - nbApproximatedStates + 1); 00029 } 00030 00031 @Override 00032 public RealVector project(double[] obs) { 00033 stateVector.clear(); 00034 stateVector.setOn(Math.min((int) obs[0], stateVector.getDimension() - 2)); 00035 stateVector.setOn(stateVector.getDimension() - 1); 00036 assert stateVector.nonZeroElements() == 2; 00037 return stateVector; 00038 } 00039 00040 @Override 00041 public int vectorSize() { 00042 return stateVector.getDimension(); 00043 } 00044 00045 @Override 00046 public double vectorNorm() { 00047 return 2; 00048 } 00049 00050 }; 00051 00052 static public GraphProblem create(Random random) { 00053 StateGraph stateGraph = new StateGraph(A, new State[] { A, B, C }, new Action[] { Move }); 00054 stateGraph.addTransition(A, Move, A, SameStateProbability); 00055 stateGraph.addTransition(A, Move, B, (1 - SameStateProbability) / 2); 00056 stateGraph.addTransition(A, Move, C, (1 - SameStateProbability) / 2); 00057 stateGraph.addTransition(B, Move, B, SameStateProbability); 00058 stateGraph.addTransition(C, Move, C, SameStateProbability); 00059 stateGraph.addTransition(B, Move, A, 1 - SameStateProbability); 00060 stateGraph.addTransition(C, Move, A, 1 - SameStateProbability); 00061 return new GraphProblem(random, A, stateGraph, new TrackingProjector(stateGraph, 1)); 00062 } 00063 }