RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.evaluation; 00002 00003 import rlpark.plugin.rltoys.agents.offpolicy.OffPolicyAgentEvaluable; 00004 import rlpark.plugin.rltoys.agents.representations.RepresentationFactory; 00005 import rlpark.plugin.rltoys.envio.rl.RLAgent; 00006 import rlpark.plugin.rltoys.experiments.helpers.Runner; 00007 import rlpark.plugin.rltoys.experiments.helpers.Runner.RunnerEvent; 00008 import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.PerformanceEvaluator; 00009 import rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal.OffPolicyEpisodeRewardMonitor; 00010 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00011 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyProblemFactory; 00012 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.RLParameters; 00013 import rlpark.plugin.rltoys.problems.RLProblem; 00014 import zephyr.plugin.core.api.signals.Listener; 00015 00016 public class EpisodeBasedOffPolicyEvaluation extends AbstractOffPolicyEvaluation { 00017 private static final long serialVersionUID = -654783411988105997L; 00018 private final int maxTimeStepsPerEpisode; 00019 private final int nbEpisodePerEvaluation; 00020 00021 public EpisodeBasedOffPolicyEvaluation(int nbRewardCheckpoint, int maxTimeStepsPerEpisode, int nbEpisodePerEvaluation) { 00022 super(nbRewardCheckpoint); 00023 this.maxTimeStepsPerEpisode = maxTimeStepsPerEpisode; 00024 this.nbEpisodePerEvaluation = nbEpisodePerEvaluation; 00025 } 00026 00027 @Override 00028 public PerformanceEvaluator connectEvaluator(final int counter, Runner behaviourRunner, 00029 final OffPolicyProblemFactory problemFactory, final RepresentationFactory projectorFactory, 00030 final OffPolicyAgentEvaluable learningAgent, final Parameters parameters) { 00031 RLProblem problem = createEvaluationProblem(counter, problemFactory); 00032 RLAgent evaluatedAgent = learningAgent.createEvaluatedAgent(); 00033 Runner runner = new Runner(problem, evaluatedAgent, Integer.MAX_VALUE, maxTimeStepsPerEpisode); 00034 final int nbEpisode = RLParameters.nbEpisode(parameters); 00035 final OffPolicyEpisodeRewardMonitor rewardMonitor = new OffPolicyEpisodeRewardMonitor(runner, nbRewardCheckpoint, 00036 nbEpisode, 00037 nbEpisodePerEvaluation); 00038 rewardMonitor.runEvaluationIFN(0); 00039 behaviourRunner.onEpisodeEnd.connect(new Listener<Runner.RunnerEvent>() { 00040 @Override 00041 public void listen(RunnerEvent eventInfo) { 00042 rewardMonitor.runEvaluationIFN(eventInfo.nbEpisodeDone); 00043 } 00044 }); 00045 return rewardMonitor; 00046 } 00047 }