RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.experiments.parametersweep.prediction; 00002 00003 import java.util.List; 00004 00005 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.AbstractParameters; 00006 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters; 00007 import rlpark.plugin.rltoys.experiments.parametersweep.parameters.RunInfos; 00008 import rlpark.plugin.rltoys.utils.Utils; 00009 00010 public class PredictionParameters { 00011 public static final String NbLearningSteps = "NbLearningSteps"; 00012 public static final String NbEvaluationSteps = "NbEvaluationSteps"; 00013 public static final String MSE = "MSE"; 00014 public static final String StepSize = "StepSize"; 00015 public static final String MetaStepSize = "MetaStepSize"; 00016 public static final String Gamma = "gamma"; 00017 00018 public static final String Lambda = "Lambda"; 00019 public static final String Tau = "Tau"; 00020 00021 static public int nbPerformanceCheckpoint(AbstractParameters parameters) { 00022 return (int) ((double) parameters.infos().get(Parameters.PerformanceNbCheckPoint)); 00023 } 00024 00025 static public int nbEvaluationSteps(AbstractParameters parameters) { 00026 return (int) ((double) parameters.infos().get(PredictionParameters.NbEvaluationSteps)); 00027 } 00028 00029 static public int nbLearningSteps(AbstractParameters parameters) { 00030 return (int) ((double) parameters.infos().get(PredictionParameters.NbLearningSteps)); 00031 } 00032 00033 final static public double[] getTauValues() { 00034 return new double[] { 1, 2, 4, 8, 16, 32 }; 00035 } 00036 00037 static public double[] getStepSizeValues(boolean withZero) { 00038 double[] values = new double[] { .0001, .0005, .001, .005, .01, .05, .1, .5, 1. }; 00039 if (withZero) 00040 values = addZero(values); 00041 return values; 00042 } 00043 00044 static public double[] getWideStepSizeValues(boolean withZero) { 00045 double[] values = new double[] { 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0 }; 00046 if (withZero) 00047 values = addZero(values); 00048 return values; 00049 } 00050 00051 public static double[] getFewStepSizeValues(boolean withZero) { 00052 double[] values = new double[] { 1e-8, 1e-4, 1 }; 00053 if (withZero) 00054 values = addZero(values); 00055 return values; 00056 } 00057 00058 private static double[] addZero(double[] withoutZero) { 00059 double[] result = new double[withoutZero.length + 1]; 00060 System.arraycopy(withoutZero, 0, result, 1, withoutZero.length); 00061 result[0] = 0.0; 00062 return result; 00063 } 00064 00065 static public List<Parameters> provideLambdaParameters(List<Parameters> parameters) { 00066 return provideLambdaParameters(parameters, getTauValues()); 00067 } 00068 00069 public static List<Parameters> provideLambdaParameters(List<Parameters> parameters, double... tauValues) { 00070 List<Parameters> result = Parameters.combine(parameters, Tau, tauValues); 00071 for (Parameters p : result) 00072 p.putSweepParam(Lambda, Utils.timeStepsToDiscount((int) p.get(Tau))); 00073 return result; 00074 } 00075 00076 public static List<Parameters> adjustForLocalTesting(List<Parameters> parameters) { 00077 Parameters selected = parameters.get(0); 00078 RunInfos.set(selected, PredictionParameters.NbLearningSteps, Math.min(10, nbLearningSteps(selected))); 00079 RunInfos.set(selected, PredictionParameters.NbEvaluationSteps, 00080 Math.min(nbPerformanceCheckpoint(selected) * 2, nbEvaluationSteps(selected))); 00081 return Utils.asList(selected); 00082 } 00083 }