RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal; 00002 00003 import rlpark.plugin.rltoys.experiments.helpers.ExperimentCounter; 00004 import rlpark.plugin.rltoys.experiments.helpers.Runner; 00005 import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.JobWithParameters; 00006 import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.PerformanceEvaluator; 00007 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00008 import rlpark.plugin.rltoys.experiments.scheduling.interfaces.TimedJob; 00009 import zephyr.plugin.core.api.synchronization.Chrono; 00010 00011 public class SweepJob implements JobWithParameters, TimedJob { 00012 private static final long serialVersionUID = -563211383079107807L; 00013 private final Parameters parameters; 00014 private final OffPolicyEvaluationContext context; 00015 private final int counter; 00016 00017 public SweepJob(OffPolicyEvaluationContext context, Parameters parameters, ExperimentCounter counter) { 00018 this.context = context; 00019 this.parameters = parameters; 00020 this.counter = counter.currentIndex(); 00021 } 00022 00023 @Override 00024 public void run() { 00025 Runner runner = context.createRunner(counter, parameters); 00026 PerformanceEvaluator behaviourRewardMonitor = context.connectBehaviourRewardMonitor(runner, parameters); 00027 PerformanceEvaluator targetRewardMonitor = context.connectTargetRewardMonitor(counter, runner, parameters); 00028 Chrono chrono = new Chrono(); 00029 try { 00030 runner.run(); 00031 } catch (Throwable e) { 00032 e.printStackTrace(System.err); 00033 behaviourRewardMonitor.worstResultUntilEnd(); 00034 targetRewardMonitor.worstResultUntilEnd(); 00035 } 00036 behaviourRewardMonitor.putResult(parameters); 00037 targetRewardMonitor.putResult(parameters); 00038 parameters.putResult("totalTimeStep", runner.runnerEvent().nbTotalTimeSteps); 00039 parameters.setComputationTimeMillis(chrono.getCurrentMillis()); 00040 } 00041 00042 @Override 00043 public Parameters parameters() { 00044 return parameters; 00045 } 00046 00047 @Override 00048 public long getComputationTimeMillis() { 00049 return parameters.getComputationTimeMillis(); 00050 } 00051 }