RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy; 00002 00003 import rlpark.plugin.rltoys.agents.offpolicy.OffPolicyAgentEvaluable; 00004 import rlpark.plugin.rltoys.agents.representations.RepresentationFactory; 00005 import rlpark.plugin.rltoys.experiments.helpers.ExperimentCounter; 00006 import rlpark.plugin.rltoys.experiments.helpers.Runner; 00007 import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.PerformanceEvaluator; 00008 import rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.evaluation.OffPolicyEvaluation; 00009 import rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal.OffPolicyEvaluationContext; 00010 import rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal.SweepJob; 00011 import rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal.OnPolicyRewardMonitor; 00012 import rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal.RewardMonitorAverage; 00013 import rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal.RewardMonitorEpisode; 00014 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00015 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyAgentFactory; 00016 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyProblemFactory; 00017 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.RLParameters; 00018 00019 public class ContextEvaluation extends AbstractContextOffPolicy implements OffPolicyEvaluationContext { 00020 private static final long serialVersionUID = -593900122821568271L; 00021 00022 public ContextEvaluation(OffPolicyProblemFactory environmentFactory, RepresentationFactory projectorFactory, 00023 OffPolicyAgentFactory agentFactory, OffPolicyEvaluation evaluation) { 00024 super(environmentFactory, projectorFactory, agentFactory, evaluation); 00025 } 00026 00027 @Override 00028 public Runnable createJob(Parameters parameters, ExperimentCounter counter) { 00029 return new SweepJob(this, parameters, counter); 00030 } 00031 00032 private OnPolicyRewardMonitor createRewardMonitor(String prefix, int nbBins, Parameters parameters) { 00033 int nbEpisode = RLParameters.nbEpisode(parameters); 00034 int maxEpisodeTimeSteps = RLParameters.maxEpisodeTimeSteps(parameters); 00035 if (nbEpisode == 1) 00036 return new RewardMonitorAverage(prefix, nbBins, maxEpisodeTimeSteps); 00037 return new RewardMonitorEpisode(prefix, nbBins, nbEpisode); 00038 } 00039 00040 @Override 00041 public PerformanceEvaluator connectBehaviourRewardMonitor(Runner runner, Parameters parameters) { 00042 OnPolicyRewardMonitor monitor = createRewardMonitor("Behaviour", evaluation.nbRewardCheckpoint(), parameters); 00043 monitor.connect(runner); 00044 return monitor; 00045 } 00046 00047 @Override 00048 public PerformanceEvaluator connectTargetRewardMonitor(int counter, Runner runner, Parameters parameters) { 00049 OffPolicyAgentEvaluable agent = (OffPolicyAgentEvaluable) runner.agent(); 00050 return evaluation.connectEvaluator(counter, runner, environmentFactory, projectorFactory, agent, parameters); 00051 } 00052 }