RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal; 00002 00003 import rlpark.plugin.rltoys.experiments.helpers.Runner; 00004 import rlpark.plugin.rltoys.experiments.helpers.Runner.RunnerEvent; 00005 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.internal.AbstractEpisodeRewardMonitor; 00006 00007 public class OffPolicyEpisodeRewardMonitor extends AbstractEpisodeRewardMonitor { 00008 private final Runner runner; 00009 private int nextEvaluationIndex = 0; 00010 private final int nbEpisodePerEvaluation; 00011 00012 public OffPolicyEpisodeRewardMonitor(Runner runner, int nbLearnerEvaluation, int nbTotalBehaviourEpisodes, 00013 int nbEpisodePerEvaluation) { 00014 super("Target", createStartingPoints(nbLearnerEvaluation, nbTotalBehaviourEpisodes)); 00015 this.runner = runner; 00016 this.nbEpisodePerEvaluation = nbEpisodePerEvaluation; 00017 } 00018 00019 static protected int[] createStartingPoints(int nbLearnerEvaluation, int nbTotalBehaviourEpisodes) { 00020 int[] starts = new int[nbLearnerEvaluation]; 00021 double binSize = (double) nbTotalBehaviourEpisodes / (nbLearnerEvaluation - 1); 00022 for (int i = 0; i < starts.length; i++) 00023 starts[i] = (int) (i * binSize); 00024 starts[starts.length - 1] = nbTotalBehaviourEpisodes - 1; 00025 return starts; 00026 } 00027 00028 public void runEvaluationIFN(int episodeIndex) { 00029 if (nextEvaluationIndex >= starts.length || starts[nextEvaluationIndex] > episodeIndex) 00030 return; 00031 for (int i = 0; i < nbEpisodePerEvaluation; i++) { 00032 runner.runEpisode(); 00033 RunnerEvent runnerEvent = runner.runnerEvent(); 00034 registerMeasurement(episodeIndex, runnerEvent.episodeReward, runnerEvent.step.time); 00035 } 00036 nextEvaluationIndex++; 00037 } 00038 }