RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.algorithms.control.acting; 00002 00003 import java.util.Random; 00004 00005 import rlpark.plugin.rltoys.algorithms.functions.Predictor; 00006 import rlpark.plugin.rltoys.algorithms.functions.stateactions.StateToStateAction; 00007 import rlpark.plugin.rltoys.envio.actions.Action; 00008 import rlpark.plugin.rltoys.utils.Utils; 00009 00010 public class EpsilonGreedy extends Greedy { 00011 private static final long serialVersionUID = -2618584767896890494L; 00012 private final double epsilon; 00013 private final Random random; 00014 00015 public EpsilonGreedy(Random random, Action[] actions, StateToStateAction toStateAction, Predictor predictor, 00016 double epsilon) { 00017 super(predictor, actions, toStateAction); 00018 this.epsilon = epsilon; 00019 this.random = random; 00020 } 00021 00022 @Override 00023 public Action sampleAction() { 00024 if (random.nextFloat() < epsilon) 00025 return Utils.choose(random, actions); 00026 return super.bestAction(); 00027 } 00028 00029 @Override 00030 public double pi(Action a) { 00031 double probability = a == bestAction ? 1.0 - epsilon : 0.0; 00032 return probability + epsilon / actions.length; 00033 } 00034 00035 @Override 00036 public EpsilonGreedy duplicate() { 00037 return new EpsilonGreedy(random, actions, Utils.clone(toStateAction), predictor, epsilon); 00038 } 00039 }