RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.algorithms.functions.policydistributions.structures; 00002 00003 import java.util.Random; 00004 00005 import rlpark.plugin.rltoys.algorithms.functions.policydistributions.BoundedPdf; 00006 import rlpark.plugin.rltoys.algorithms.functions.policydistributions.PolicyDistribution; 00007 import rlpark.plugin.rltoys.envio.actions.Action; 00008 import rlpark.plugin.rltoys.envio.actions.ActionArray; 00009 import rlpark.plugin.rltoys.envio.actions.Actions; 00010 import rlpark.plugin.rltoys.envio.policy.BoundedPolicy; 00011 import rlpark.plugin.rltoys.math.ranges.Range; 00012 import rlpark.plugin.rltoys.math.vector.RealVector; 00013 import rlpark.plugin.rltoys.math.vector.implementations.PVector; 00014 00015 public class UniformDistribution implements PolicyDistribution, BoundedPolicy, BoundedPdf { 00016 private static final long serialVersionUID = 7284864369595009279L; 00017 private final Random random; 00018 private final Range range; 00019 private final double pdfValue; 00020 00021 public UniformDistribution(Random random, Range range) { 00022 this.random = random; 00023 this.range = range; 00024 pdfValue = 1.0 / range.length(); 00025 } 00026 00027 @Override 00028 public PVector[] createParameters(int nbFeatures) { 00029 return new PVector[] {}; 00030 } 00031 00032 @Override 00033 public RealVector[] computeGradLog(Action a_t) { 00034 assert Actions.isOneDimension(a_t); 00035 return new PVector[] {}; 00036 } 00037 00038 @Override 00039 public Action sampleAction() { 00040 return new ActionArray(range.choose(random)); 00041 } 00042 00043 @Override 00044 public double pi(Action action) { 00045 assert ((ActionArray) action).actions.length == 1; 00046 double a = ActionArray.toDouble(action); 00047 return range.in(a) ? pdfValue : 0; 00048 } 00049 00050 @Override 00051 public int nbParameterVectors() { 00052 return 0; 00053 } 00054 00055 @Override 00056 public Range range() { 00057 return range; 00058 } 00059 00060 @Override 00061 public double piMax() { 00062 return pdfValue; 00063 } 00064 00065 @Override 00066 public void update(RealVector x) { 00067 } 00068 }