RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.algorithms.control.qlearning; 00002 00003 import rlpark.plugin.rltoys.algorithms.control.ControlLearner; 00004 import rlpark.plugin.rltoys.algorithms.control.OffPolicyLearner; 00005 import rlpark.plugin.rltoys.envio.actions.Action; 00006 import rlpark.plugin.rltoys.envio.policy.Policies; 00007 import rlpark.plugin.rltoys.envio.policy.Policy; 00008 import rlpark.plugin.rltoys.math.vector.RealVector; 00009 import rlpark.plugin.rltoys.utils.NotImplemented; 00010 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00011 00012 @Monitor 00013 public class QLearningControl implements ControlLearner, OffPolicyLearner { 00014 private static final long serialVersionUID = 5784749108581105369L; 00015 private final QLearning qlearning; 00016 private final Policy behaviour; 00017 00018 public QLearningControl(Policy acting, QLearning qlearning) { 00019 this.qlearning = qlearning; 00020 this.behaviour = acting; 00021 } 00022 00023 @Override 00024 public Action step(RealVector x_t, Action a_t, RealVector x_tp1, double r_tp1) { 00025 Action a_tp1 = Policies.decide(behaviour, x_tp1); 00026 qlearning.update(x_t, a_t, x_tp1, a_tp1, r_tp1); 00027 return a_tp1; 00028 } 00029 00030 @Override 00031 public void learn(RealVector x_t, Action a_t, RealVector x_tp1, Action a_tp1, double r_tp1) { 00032 qlearning.update(x_t, a_t, x_tp1, a_tp1, r_tp1); 00033 } 00034 00035 @Override 00036 public Action proposeAction(RealVector x) { 00037 return Policies.decide(behaviour, x); 00038 } 00039 00040 public Policy behaviourPolicy() { 00041 return behaviour; 00042 } 00043 00044 @Override 00045 public Policy targetPolicy() { 00046 throw new NotImplemented(); 00047 } 00048 00049 @Override 00050 public QLearning predictor() { 00051 return qlearning; 00052 } 00053 }