RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy; 00002 00003 import rlpark.plugin.rltoys.agents.offpolicy.OffPolicyAgent; 00004 import rlpark.plugin.rltoys.agents.representations.RepresentationFactory; 00005 import rlpark.plugin.rltoys.experiments.helpers.ExperimentCounter; 00006 import rlpark.plugin.rltoys.experiments.helpers.Runner; 00007 import rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.evaluation.OffPolicyEvaluation; 00008 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00009 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.RunInfo; 00010 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyAgentFactory; 00011 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyProblemFactory; 00012 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.ProblemFactory; 00013 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.RLParameters; 00014 import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.ReinforcementLearningContext; 00015 import rlpark.plugin.rltoys.problems.RLProblem; 00016 00017 public abstract class AbstractContextOffPolicy implements ReinforcementLearningContext { 00018 private static final long serialVersionUID = -6212106048889219995L; 00019 private final OffPolicyAgentFactory agentFactory; 00020 protected final OffPolicyProblemFactory environmentFactory; 00021 protected final OffPolicyEvaluation evaluation; 00022 protected final RepresentationFactory projectorFactory; 00023 00024 public AbstractContextOffPolicy(OffPolicyProblemFactory environmentFactory, RepresentationFactory projectorFactory, 00025 OffPolicyAgentFactory agentFactory, OffPolicyEvaluation evaluation) { 00026 this.evaluation = evaluation; 00027 this.projectorFactory = projectorFactory; 00028 this.environmentFactory = environmentFactory; 00029 this.agentFactory = agentFactory; 00030 } 00031 00032 @Override 00033 public Runner createRunner(int seed, Parameters parameters) { 00034 RLProblem problem = environmentFactory.createEnvironment(ExperimentCounter.newRandom(seed)); 00035 OffPolicyAgent agent = agentFactory.createAgent(seed, problem, parameters, projectorFactory); 00036 int nbEpisode = RLParameters.nbEpisode(parameters); 00037 int maxEpisodeTimeSteps = RLParameters.maxEpisodeTimeSteps(parameters); 00038 return new Runner(problem, agent, nbEpisode, maxEpisodeTimeSteps); 00039 } 00040 00041 @Override 00042 public String fileName() { 00043 return ExperimentCounter.DefaultFileName; 00044 } 00045 00046 @Override 00047 public String folderPath() { 00048 return environmentFactory.label() + "/" + agentFactory.label(); 00049 } 00050 00051 public OffPolicyAgentFactory agentFactory() { 00052 return agentFactory; 00053 } 00054 00055 public ProblemFactory problemFactory() { 00056 return environmentFactory; 00057 } 00058 00059 public Parameters contextParameters() { 00060 RunInfo infos = new RunInfo(); 00061 infos.enableFlag(agentFactory.label()); 00062 infos.enableFlag(environmentFactory.label()); 00063 Parameters parameters = new Parameters(infos); 00064 environmentFactory.setExperimentParameters(parameters); 00065 return parameters; 00066 } 00067 }