RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.internal; 00002 00003 import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.PerformanceEvaluator; 00004 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00005 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.RunInfo; 00006 00007 public abstract class AbstractPerformanceMonitor implements PerformanceEvaluator { 00008 protected int currentSlice; 00009 protected final int[] starts; 00010 protected final int[] sizes; 00011 private final double[] slices; 00012 private final String prefix; 00013 private final String performanceLabel; 00014 00015 public AbstractPerformanceMonitor(String prefix, String performanceLabel, int[] starts) { 00016 this.prefix = prefix; 00017 this.performanceLabel = performanceLabel; 00018 this.starts = starts; 00019 slices = new double[starts.length]; 00020 sizes = new int[starts.length]; 00021 } 00022 00023 static protected int[] createStartingPoints(int nbBins, int nbMeasurements) { 00024 int[] starts = new int[nbBins]; 00025 double binSize = (double) nbMeasurements / nbBins; 00026 for (int i = 0; i < starts.length; i++) 00027 starts[i] = (int) (i * binSize); 00028 return starts; 00029 } 00030 00031 private double divideBySize(double value, int size) { 00032 return value != worstValue() ? value / size : worstValue(); 00033 } 00034 00035 protected String criterionLabel(String label, int sliceIndex) { 00036 return String.format("%s%s%02d", prefix, label, sliceIndex); 00037 } 00038 00039 @Override 00040 public void putResult(Parameters parameters) { 00041 RunInfo infos = parameters.infos(); 00042 infos.put(prefix + performanceLabel + Parameters.PerformanceNbCheckPoint, starts.length); 00043 for (int i = 0; i < starts.length; i++) { 00044 String startLabel = criterionLabel(performanceLabel + Parameters.PerformanceStart, i); 00045 infos.put(startLabel, starts[i]); 00046 String sliceLabel = criterionLabel(performanceLabel + Parameters.PerformanceSliceMeasured, i); 00047 parameters.putResult(sliceLabel, divideBySize(slices[i], sizes[i])); 00048 } 00049 double cumulatedReward = 0.0; 00050 int cumulatedSize = 0; 00051 for (int i = starts.length - 1; i >= 0; i--) { 00052 cumulatedSize += sizes[i]; 00053 if (slices[i] != worstValue()) 00054 cumulatedReward += slices[i]; 00055 else 00056 cumulatedReward = worstValue(); 00057 String rewardLabel = criterionLabel(performanceLabel + Parameters.PerformanceCumulatedMeasured, i); 00058 parameters.putResult(rewardLabel, divideBySize(cumulatedReward, cumulatedSize)); 00059 } 00060 } 00061 00062 protected void registerMeasurement(long measurementIndex, double reward) { 00063 updateCurrentSlice(measurementIndex); 00064 slices[currentSlice] += reward; 00065 sizes[currentSlice]++; 00066 } 00067 00068 private void updateCurrentSlice(long measurementIndex) { 00069 if (currentSlice < starts.length - 1 && measurementIndex >= starts[currentSlice + 1]) 00070 currentSlice++; 00071 } 00072 00073 @Override 00074 public void worstResultUntilEnd() { 00075 for (int i = currentSlice; i < starts.length; i++) { 00076 slices[i] = worstValue(); 00077 sizes[i] = 1; 00078 } 00079 } 00080 00081 abstract protected double worstValue(); 00082 }