RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.agents.offpolicy; 00002 00003 import rlpark.plugin.rltoys.agents.rl.ControlAgent; 00004 import rlpark.plugin.rltoys.algorithms.control.OffPolicyLearner; 00005 import rlpark.plugin.rltoys.envio.actions.Action; 00006 import rlpark.plugin.rltoys.envio.policy.Policies; 00007 import rlpark.plugin.rltoys.envio.policy.Policy; 00008 import rlpark.plugin.rltoys.envio.rl.RLAgent; 00009 import rlpark.plugin.rltoys.envio.rl.TRStep; 00010 import rlpark.plugin.rltoys.math.vector.RealVector; 00011 import rlpark.plugin.rltoys.math.vector.implementations.PVector; 00012 import rlpark.plugin.rltoys.math.vector.implementations.VectorNull; 00013 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00014 00015 @Monitor 00016 public class OffPolicyAgentDirect implements OffPolicyAgentEvaluable { 00017 private static final long serialVersionUID = -8255597969677460009L; 00018 private RealVector x_t; 00019 private final OffPolicyLearner learner; 00020 private final Policy behaviour; 00021 00022 public OffPolicyAgentDirect(Policy behaviour, OffPolicyLearner learner) { 00023 this.learner = learner; 00024 this.behaviour = behaviour; 00025 } 00026 00027 @Override 00028 public Action getAtp1(TRStep step) { 00029 if (step.isEpisodeStarting()) 00030 x_t = null; 00031 RealVector x_tp1 = step.o_tp1 != null ? new PVector(step.o_tp1) : new VectorNull(x_t.getDimension()); 00032 Action a_tp1 = step.o_tp1 != null ? Policies.decide(behaviour, x_tp1) : null; 00033 learner.learn(x_t, step.a_t, x_tp1, a_tp1, step.r_tp1); 00034 x_t = x_tp1; 00035 return a_tp1; 00036 } 00037 00038 public OffPolicyLearner learner() { 00039 return learner; 00040 } 00041 00042 public Policy behaviour() { 00043 return behaviour; 00044 } 00045 00046 @Override 00047 public RLAgent createEvaluatedAgent() { 00048 return new ControlAgent(learner); 00049 } 00050 }