RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.horde; 00002 00003 import rlpark.plugin.rltoys.algorithms.control.ControlLearner; 00004 import rlpark.plugin.rltoys.algorithms.control.acting.PolicyBasedControl; 00005 import rlpark.plugin.rltoys.algorithms.functions.states.Projector; 00006 import rlpark.plugin.rltoys.envio.actions.Action; 00007 import rlpark.plugin.rltoys.envio.policy.Policy; 00008 import rlpark.plugin.rltoys.envio.rl.RLAgent; 00009 import rlpark.plugin.rltoys.envio.rl.TRStep; 00010 import rlpark.plugin.rltoys.math.vector.RealVector; 00011 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00012 00013 public class HordeAgent implements RLAgent { 00014 private static final long serialVersionUID = -8430893512617299110L; 00015 00016 @Monitor 00017 protected final PolicyBasedControl control; 00018 @Monitor 00019 protected final Projector projector; 00020 protected RealVector x_t; 00021 @Monitor 00022 private final Horde horde; 00023 00024 public HordeAgent(PolicyBasedControl control, Projector projector, Horde horde) { 00025 this.control = control; 00026 this.projector = projector; 00027 this.horde = horde; 00028 } 00029 00030 @Override 00031 public Action getAtp1(TRStep step) { 00032 if (step.isEpisodeStarting()) 00033 x_t = null; 00034 RealVector x_tp1 = projector.project(step.o_tp1); 00035 Action a_tp1 = control.step(x_t, step.a_t, x_tp1, step.r_tp1); 00036 horde.update(step, x_t, step.a_t, x_tp1); 00037 x_t = x_tp1; 00038 return a_tp1; 00039 } 00040 00041 public ControlLearner control() { 00042 return control; 00043 } 00044 00045 public Projector projector() { 00046 return projector; 00047 } 00048 00049 public Horde horde() { 00050 return horde; 00051 } 00052 00053 public Policy behaviourPolicy() { 00054 return control.policy(); 00055 } 00056 }