RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.internal; 00002 00003 import rlpark.plugin.rltoys.experiments.parametersweep.internal.AbstractPerformanceMonitor; 00004 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00005 00006 public abstract class AbstractEpisodeRewardMonitor extends AbstractPerformanceMonitor { 00007 private final int[] nbTimeSteps; 00008 00009 public AbstractEpisodeRewardMonitor(String prefix, int[] starts) { 00010 super(prefix, "Reward", starts); 00011 nbTimeSteps = new int[starts.length]; 00012 } 00013 00014 public void registerMeasurement(int episode, double episodeReward, long nbEpisodeTimeSteps) { 00015 assert nbEpisodeTimeSteps > 0; 00016 super.registerMeasurement(episode, episodeReward); 00017 nbTimeSteps[currentSlice] += nbEpisodeTimeSteps; 00018 } 00019 00020 private double divideBySize(int value, int size) { 00021 return value != Integer.MAX_VALUE ? value / size : Integer.MAX_VALUE; 00022 } 00023 00024 @Override 00025 public void putResult(Parameters parameters) { 00026 super.putResult(parameters); 00027 for (int i = 0; i < starts.length; i++) { 00028 String sliceLabel = criterionLabel("NbTimeStepSliceMeasured", i); 00029 parameters.putResult(sliceLabel, divideBySize(nbTimeSteps[i], sizes[i])); 00030 } 00031 } 00032 00033 @Override 00034 public void worstResultUntilEnd() { 00035 super.worstResultUntilEnd(); 00036 for (int i = currentSlice; i < starts.length; i++) 00037 nbTimeSteps[i] = Integer.MAX_VALUE; 00038 } 00039 00040 @Override 00041 protected double worstValue() { 00042 return -Float.MAX_VALUE; 00043 } 00044 }