RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal; 00002 00003 import rlpark.plugin.rltoys.experiments.helpers.ExperimentCounter; 00004 import rlpark.plugin.rltoys.experiments.helpers.Runner; 00005 import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.JobWithParameters; 00006 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00007 import rlpark.plugin.rltoys.experiments.scheduling.interfaces.TimedJob; 00008 import zephyr.plugin.core.api.synchronization.Chrono; 00009 00010 public class SweepJob implements JobWithParameters, TimedJob { 00011 private static final long serialVersionUID = -1636763888764939471L; 00012 private final Parameters parameters; 00013 private final OnPolicyEvaluationContext context; 00014 private final int counter; 00015 00016 public SweepJob(OnPolicyEvaluationContext context, Parameters parameters, ExperimentCounter counter) { 00017 this.context = context; 00018 this.parameters = parameters; 00019 this.counter = counter.currentIndex(); 00020 } 00021 00022 @Override 00023 public void run() { 00024 Runner runner = context.createRunner(counter, parameters); 00025 OnPolicyRewardMonitor rewardMonitor = context.createRewardMonitor(parameters); 00026 rewardMonitor.connect(runner); 00027 Chrono chrono = new Chrono(); 00028 try { 00029 runner.run(); 00030 } catch (Throwable e) { 00031 e.printStackTrace(System.err); 00032 rewardMonitor.worstResultUntilEnd(); 00033 } 00034 rewardMonitor.putResult(parameters); 00035 parameters.putResult("totalTimeStep", runner.runnerEvent().nbTotalTimeSteps); 00036 parameters.setComputationTimeMillis(chrono.getCurrentMillis()); 00037 } 00038 00039 @Override 00040 public Parameters parameters() { 00041 return parameters; 00042 } 00043 00044 @Override 00045 public long getComputationTimeMillis() { 00046 return parameters.getComputationTimeMillis(); 00047 } 00048 }