RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.algorithms.representations.tilescoding; 00002 00003 import java.util.ArrayList; 00004 import java.util.List; 00005 00006 import rlpark.plugin.rltoys.algorithms.functions.states.Projector; 00007 import rlpark.plugin.rltoys.algorithms.representations.discretizer.Discretizer; 00008 import rlpark.plugin.rltoys.algorithms.representations.discretizer.DiscretizerFactory; 00009 import rlpark.plugin.rltoys.algorithms.representations.discretizer.partitions.BoundedBigPartitionFactory; 00010 import rlpark.plugin.rltoys.algorithms.representations.tilescoding.hashing.Tiling; 00011 import rlpark.plugin.rltoys.math.ranges.Range; 00012 import rlpark.plugin.rltoys.math.vector.BinaryVector; 00013 import rlpark.plugin.rltoys.math.vector.RealVector; 00014 import rlpark.plugin.rltoys.math.vector.implementations.BVector; 00015 import rlpark.plugin.rltoys.utils.Utils; 00016 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00017 00018 @Monitor 00019 public abstract class TileCoders implements Projector { 00020 private static final long serialVersionUID = -2663191120601745893L; 00021 protected final List<TileCoder> tileCoders = new ArrayList<TileCoder>(); 00022 private BinaryVector vector; 00023 private boolean includeActiveFeature = false; 00024 private int tilingHashingIndex = 0; 00025 private final DiscretizerFactory discretizerFactory; 00026 private final int nbInputs; 00027 00028 public TileCoders(DiscretizerFactory discretizerFactory, int nbInputs) { 00029 this.discretizerFactory = discretizerFactory; 00030 this.nbInputs = nbInputs; 00031 } 00032 00033 public void includeActiveFeature() { 00034 includeActiveFeature = true; 00035 vector = newVectorInstance(); 00036 } 00037 00038 private BinaryVector newVectorInstance() { 00039 return new BVector(vectorSize()); 00040 } 00041 00042 public void addIndependentTilings(int gridResolution, int nbTilings) { 00043 for (int i = 0; i < nbInputs; i++) 00044 addTileCoder(new int[] { i }, gridResolution, nbTilings); 00045 } 00046 00047 public void addFullTilings(int gridResolution, int nbTilings) { 00048 addTileCoder(Utils.range(0, nbInputs), gridResolution, nbTilings); 00049 } 00050 00051 public void addTileCoder(int[] inputIndexes, int resolution, int nbTilings) { 00052 addTileCoder(discretizerFactory, inputIndexes, resolution, nbTilings); 00053 } 00054 00055 public void addTileCoder(DiscretizerFactory discretizerFactory, int[] inputIndexes, int resolution, int nbTilings) { 00056 assert resolution > 0; 00057 assert nbTilings > 0; 00058 assert inputIndexes.length > 0; 00059 Tiling[] tilings = new Tiling[nbTilings]; 00060 for (int tilingIndex = 0; tilingIndex < nbTilings; tilingIndex++) { 00061 Discretizer[] discretizers = new Discretizer[inputIndexes.length]; 00062 for (int inputIndex = 0; inputIndex < discretizers.length; inputIndex++) 00063 discretizers[inputIndex] = discretizerFactory.createDiscretizer(inputIndexes[inputIndex], resolution, 00064 tilingIndex, nbTilings); 00065 tilings[tilingIndex] = new Tiling(tilingHashingIndex, discretizers, inputIndexes); 00066 tilingHashingIndex++; 00067 } 00068 addTileCoder(new TileCoder(tilings, resolution)); 00069 vector = newVectorInstance(); 00070 } 00071 00072 public int nbInputs() { 00073 return nbInputs; 00074 } 00075 00076 @Override 00077 public double vectorNorm() { 00078 int nbActiveTiles = 0; 00079 for (TileCoder tileCoder : tileCoders) 00080 nbActiveTiles += tileCoder.nbTilings(); 00081 return includeActiveFeature ? nbActiveTiles + 1 : nbActiveTiles; 00082 } 00083 00084 @Override 00085 public int vectorSize() { 00086 int vectorSize = computeVectorSize(); 00087 return includeActiveFeature ? vectorSize + 1 : vectorSize; 00088 } 00089 00090 @Override 00091 public BinaryVector project(double[] inputs) { 00092 vector.clear(); 00093 if (inputs == null) 00094 return vector; 00095 activateIndexes(inputs, vector); 00096 if (includeActiveFeature) 00097 vector.setOn(vector.getDimension() - 1); 00098 return vector; 00099 } 00100 00101 protected void addTileCoder(TileCoder tileCoder) { 00102 tileCoders.add(tileCoder); 00103 } 00104 00105 abstract protected void activateIndexes(double[] inputs, BinaryVector vector); 00106 00107 abstract protected int computeVectorSize(); 00108 00109 public RealVector vector() { 00110 return vector; 00111 } 00112 00113 protected void setFeatureOn(BinaryVector vector, int[] indexes) { 00114 for (int i : indexes) 00115 vector.setOn(i); 00116 } 00117 00118 public DiscretizerFactory discretizerFactory() { 00119 return discretizerFactory; 00120 } 00121 00122 public List<TileCoder> tileCoders() { 00123 return tileCoders; 00124 } 00125 00126 @Override 00127 public String toString() { 00128 StringBuilder result = new StringBuilder(); 00129 for (int i = 0; i < tileCoders.size(); i++) 00130 result.append("TileCoder " + i + ":\n" + tileCoders.get(i).toString() + "\n"); 00131 return result.toString(); 00132 } 00133 00134 public static Range[] buildRanges(int inputSize, double min, double max) { 00135 Range[] ranges = new Range[inputSize]; 00136 for (int i = 0; i < ranges.length; i++) 00137 ranges[i] = new Range(min, max); 00138 return ranges; 00139 } 00140 00141 public static DiscretizerFactory createDefaultDiscretizer(Range... ranges) { 00142 return new BoundedBigPartitionFactory(ranges); 00143 } 00144 00145 }