RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.horde.demons; 00002 00003 import rlpark.plugin.rltoys.algorithms.LinearLearner; 00004 import rlpark.plugin.rltoys.algorithms.control.gq.GreedyGQ; 00005 import rlpark.plugin.rltoys.algorithms.functions.Predictor; 00006 import rlpark.plugin.rltoys.envio.actions.Action; 00007 import rlpark.plugin.rltoys.envio.policy.Policy; 00008 import rlpark.plugin.rltoys.horde.functions.ConstantGamma; 00009 import rlpark.plugin.rltoys.horde.functions.ConstantOutcomeFunction; 00010 import rlpark.plugin.rltoys.horde.functions.GammaFunction; 00011 import rlpark.plugin.rltoys.horde.functions.OutcomeFunction; 00012 import rlpark.plugin.rltoys.horde.functions.RewardFunction; 00013 import rlpark.plugin.rltoys.math.vector.RealVector; 00014 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00015 00016 public class ControlOffPolicyDemon implements Demon { 00017 private static final long serialVersionUID = -7997723890930214800L; 00018 private final RewardFunction rewardFunction; 00019 private final OutcomeFunction outcomeFunction; 00020 @Monitor 00021 private final GreedyGQ gq; 00022 private final GammaFunction gammaFunction; 00023 00024 public ControlOffPolicyDemon(RewardFunction rewardFunction, final GreedyGQ gq) { 00025 this(gq, rewardFunction, new ConstantGamma(gq.gamma()), new ConstantOutcomeFunction(0)); 00026 } 00027 00028 public ControlOffPolicyDemon(GreedyGQ gq, RewardFunction rewardFunction, GammaFunction gammaFunction, 00029 OutcomeFunction outcomeFunction) { 00030 this.rewardFunction = rewardFunction; 00031 this.gq = gq; 00032 this.outcomeFunction = outcomeFunction; 00033 this.gammaFunction = gammaFunction; 00034 } 00035 00036 @Override 00037 public void update(RealVector x_t, Action a_t, RealVector x_tp1) { 00038 gq.update(x_t, a_t, rewardFunction.reward(), gammaFunction.gamma(), outcomeFunction.outcome(), x_tp1, a_t); 00039 } 00040 00041 public RewardFunction rewardFunction() { 00042 return rewardFunction; 00043 } 00044 00045 public OutcomeFunction outcomeFunction() { 00046 return outcomeFunction; 00047 } 00048 00049 public Predictor predictor() { 00050 return gq.predictor(); 00051 } 00052 00053 public Policy targetPolicy() { 00054 return gq.targetPolicy(); 00055 } 00056 00057 @Override 00058 public LinearLearner learner() { 00059 return gq.gq(); 00060 } 00061 }