RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.agents.offpolicy; 00002 00003 import rlpark.plugin.rltoys.agents.rl.ControlAgentFA; 00004 import rlpark.plugin.rltoys.algorithms.control.OffPolicyLearner; 00005 import rlpark.plugin.rltoys.algorithms.functions.states.Projector; 00006 import rlpark.plugin.rltoys.envio.actions.Action; 00007 import rlpark.plugin.rltoys.envio.policy.Policies; 00008 import rlpark.plugin.rltoys.envio.policy.Policy; 00009 import rlpark.plugin.rltoys.envio.rl.RLAgent; 00010 import rlpark.plugin.rltoys.envio.rl.TRStep; 00011 import rlpark.plugin.rltoys.math.vector.RealVector; 00012 import rlpark.plugin.rltoys.math.vector.implementations.Vectors; 00013 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00014 00015 @Monitor 00016 public class OffPolicyAgentFA implements OffPolicyAgentEvaluable { 00017 private static final long serialVersionUID = 3773760092579439924L; 00018 private final Projector projector; 00019 private RealVector x_t; 00020 private final OffPolicyLearner learner; 00021 private final Policy behaviour; 00022 00023 public OffPolicyAgentFA(Projector projector, Policy behaviour, OffPolicyLearner learner) { 00024 this.projector = projector; 00025 this.learner = learner; 00026 this.behaviour = behaviour; 00027 } 00028 00029 @Override 00030 public Action getAtp1(TRStep step) { 00031 if (step.isEpisodeStarting()) 00032 x_t = null; 00033 RealVector x_tp1 = projector.project(step.o_tp1); 00034 Action a_tp1 = Policies.decide(behaviour, x_tp1); 00035 learner.learn(x_t, step.a_t, x_tp1, a_tp1, step.r_tp1); 00036 x_t = Vectors.bufferedCopy(x_tp1, x_t); 00037 return a_tp1; 00038 } 00039 00040 public OffPolicyLearner learner() { 00041 return learner; 00042 } 00043 00044 public Policy behaviour() { 00045 return behaviour; 00046 } 00047 00048 @Override 00049 public RLAgent createEvaluatedAgent() { 00050 return new ControlAgentFA(learner, projector); 00051 } 00052 00053 public Projector projector() { 00054 return projector; 00055 } 00056 }