RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.algorithms.control.actorcritic.offpolicy; 00002 00003 import rlpark.plugin.rltoys.algorithms.functions.policydistributions.PolicyDistribution; 00004 import rlpark.plugin.rltoys.envio.actions.Action; 00005 import rlpark.plugin.rltoys.envio.policy.Policies; 00006 import rlpark.plugin.rltoys.math.vector.RealVector; 00007 import rlpark.plugin.rltoys.math.vector.implementations.PVector; 00008 import zephyr.plugin.core.api.monitoring.annotations.Monitor; 00009 00010 public abstract class AbstractActorOffPolicy implements ActorOffPolicy { 00011 @Monitor 00012 final protected PolicyDistribution targetPolicy; 00013 @Monitor(level = 4) 00014 final protected PVector[] u; 00015 00016 protected AbstractActorOffPolicy(PVector[] policyParameters, PolicyDistribution policyDistribution) { 00017 u = policyParameters; 00018 this.targetPolicy = policyDistribution; 00019 } 00020 00021 @Override 00022 public Action proposeAction(RealVector x) { 00023 return Policies.decide(targetPolicy, x); 00024 } 00025 00026 @Override 00027 public void update(double pi_t, double b_t, RealVector x_t, Action a_t, double delta) { 00028 if (x_t == null) { 00029 initEpisode(); 00030 return; 00031 } 00032 updateParameters(pi_t, b_t, x_t, a_t, delta); 00033 } 00034 00035 @Override 00036 public PolicyDistribution policy() { 00037 return targetPolicy; 00038 } 00039 00040 @Override 00041 public PVector[] actorParameters() { 00042 return u; 00043 } 00044 00045 abstract protected void initEpisode(); 00046 00047 abstract protected void updateParameters(double pi_t, double b_t, RealVector x_t, Action a_t, double delta); 00048 }