RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.algorithms.functions.policydistributions.helpers; 00002 00003 import java.util.Arrays; 00004 import java.util.Random; 00005 00006 import rlpark.plugin.rltoys.algorithms.functions.policydistributions.PolicyDistribution; 00007 import rlpark.plugin.rltoys.envio.actions.Action; 00008 import rlpark.plugin.rltoys.math.vector.RealVector; 00009 import rlpark.plugin.rltoys.math.vector.implementations.PVector; 00010 import rlpark.plugin.rltoys.utils.Utils; 00011 00012 public class RandomPolicy implements PolicyDistribution { 00013 private static final long serialVersionUID = 7993101579423392389L; 00014 private final Random random; 00015 private final Action[] actions; 00016 00017 public RandomPolicy(Random random, Action[] actions) { 00018 this.random = random; 00019 this.actions = actions.clone(); 00020 } 00021 00022 @Override 00023 public double pi(Action a) { 00024 assert Arrays.asList(a).contains(a); 00025 return 1.0 / actions.length; 00026 } 00027 00028 @Override 00029 public Action sampleAction() { 00030 return Utils.choose(random, actions); 00031 } 00032 00033 @Override 00034 public PVector[] createParameters(int nbFeatures) { 00035 return new PVector[] {}; 00036 } 00037 00038 @Override 00039 public RealVector[] computeGradLog(Action a_t) { 00040 return new PVector[] {}; 00041 } 00042 00043 @Override 00044 public int nbParameterVectors() { 00045 return 0; 00046 } 00047 00048 @Override 00049 public void update(RealVector x) { 00050 } 00051 }