RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.algorithms.control.gq; 00002 00003 import rlpark.plugin.rltoys.algorithms.control.ControlLearner; 00004 import rlpark.plugin.rltoys.algorithms.functions.stateactions.StateToStateAction; 00005 import rlpark.plugin.rltoys.envio.actions.Action; 00006 import rlpark.plugin.rltoys.envio.policy.Policies; 00007 import rlpark.plugin.rltoys.envio.policy.Policy; 00008 import rlpark.plugin.rltoys.math.vector.RealVector; 00009 import rlpark.plugin.rltoys.math.vector.pool.VectorPool; 00010 import rlpark.plugin.rltoys.math.vector.pool.VectorPools; 00011 00012 public class GQOnPolicyControl implements ControlLearner { 00013 private static final long serialVersionUID = -1583554276099167880L; 00014 private final GQ gq; 00015 private final StateToStateAction toStateAction; 00016 private final Policy acting; 00017 00018 public GQOnPolicyControl(Policy acting, StateToStateAction toStateAction, GQ gq) { 00019 this.gq = gq; 00020 this.toStateAction = toStateAction; 00021 this.acting = acting; 00022 } 00023 00024 @Override 00025 public Action step(RealVector x_t, Action a_t, RealVector x_tp1, double r_tp1) { 00026 VectorPool pool = VectorPools.pool(x_tp1, toStateAction.vectorSize()); 00027 RealVector xa_t = x_t != null ? pool.newVector(toStateAction.stateAction(x_t, a_t)) : null; 00028 Action a_tp1 = Policies.decide(acting, x_tp1); 00029 gq.update(xa_t, 1.0, r_tp1, toStateAction.stateAction(x_tp1, a_tp1), 0.0); 00030 pool.releaseAll(); 00031 return a_tp1; 00032 } 00033 00034 public Policy acting() { 00035 return acting; 00036 } 00037 00038 @Override 00039 public Action proposeAction(RealVector x) { 00040 return Policies.decide(acting, x); 00041 } 00042 }