RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.critterbot.agents; 00002 00003 import java.util.Random; 00004 00005 import rlpark.plugin.critterbot.actions.CritterbotAction; 00006 import rlpark.plugin.critterbot.actions.XYThetaAction; 00007 import rlpark.plugin.critterbot.data.CritterbotObservation; 00008 import rlpark.plugin.critterbot.environment.CritterbotEnvironment; 00009 import rlpark.plugin.critterbot.environment.CritterbotRobot; 00010 import rlpark.plugin.rltoys.agents.Agent; 00011 import zephyr.plugin.core.api.synchronization.Clock; 00012 00013 public class DockingAgent implements Agent { 00014 private static final int RIGHT_TIMEOUT = 1 * 100; 00015 private static final int ABANDON_TIMEOUT = 10 * 100; 00016 static private final double lambda = .01; 00017 private int[] IRLights; 00018 private double[] wmaIRLights; 00019 private final Random random = new Random(0); 00020 private CritterbotAction lastCommand = XYThetaAction.NoMove; 00021 private final CritterbotEnvironment environment; 00022 private int abandonTimeout = ABANDON_TIMEOUT; 00023 private boolean abandonMode = false; 00024 private int timesteps = 0; 00025 00026 public DockingAgent(CritterbotEnvironment environment) { 00027 this.environment = environment; 00028 } 00029 00030 @Override 00031 public CritterbotAction getAtp1(double[] envObs) { 00032 CritterbotObservation obs = environment.getCritterbotObservation(envObs); 00033 // System.out.println(String.format("abandonMode: %b abandonTimeout: %d", 00034 // abandonMode, abandonTimeout)); 00035 if (obs.busVoltage > 170) 00036 return XYThetaAction.NoMove; 00037 else if (abandonMode) { 00038 abandonTimeout -= 1; 00039 if (abandonTimeout <= 0) { 00040 abandonMode = false; 00041 System.out.println("End Timeout"); 00042 abandonTimeout = ABANDON_TIMEOUT; 00043 } 00044 return lastCommand; 00045 } else { 00046 abandonTimeout -= 1; 00047 if (abandonTimeout <= 0) { 00048 abandonMode = true; 00049 abandonTimeout = RIGHT_TIMEOUT; 00050 if (random.nextDouble() > .5) { 00051 System.out.println("Timeout: Moving right"); 00052 lastCommand = new XYThetaAction(0, 20, 0); 00053 } else { 00054 System.out.println("Timeout: Stopping"); 00055 lastCommand = new XYThetaAction(0, 0, 0); 00056 } 00057 return lastCommand; 00058 } 00059 } 00060 if (timesteps > 0) { 00061 timesteps--; 00062 return lastCommand; 00063 } 00064 00065 IRLights = obs.irLight; 00066 if (wmaIRLights == null) { 00067 wmaIRLights = new double[IRLights.length]; 00068 for (int i = 0; i < IRLights.length; i++) 00069 wmaIRLights[i] = IRLights[i]; 00070 } else 00071 for (int i = 0; i < IRLights.length; i++) 00072 wmaIRLights[i] = lambda * IRLights[i] + (1 - lambda) * wmaIRLights[i]; 00073 00074 int max_ind = -1; // where is the strongest indication of the dock, in a 00075 // local neighborhood of the sensors 00076 double max_val = -1; 00077 for (int i = 0; i < 8; i++) { 00078 double val = Math.abs(IRLights[(i - 1 + 8) % 8] - wmaIRLights[(i - 1 + 8) % 8]) + 00079 Math.abs(IRLights[i] - wmaIRLights[i]) + 00080 Math.abs(IRLights[(i + 1) % 8] - wmaIRLights[(i + 1) % 8]); 00081 if (val > max_val) { 00082 max_ind = i; 00083 max_val = val; 00084 } 00085 } 00086 00087 if (random.nextDouble() > .1 && max_ind >= 0 && max_val > 10) {// 20 00088 System.out.println("IR" + max_ind + " " + max_val); 00089 // for (double d : IRLights) { 00090 // System.out.print(" " + d); 00091 // } 00092 // System.out.println(); 00093 00094 if (max_ind == 2) 00095 lastCommand = new XYThetaAction(0, -20, 0); 00096 else if (4 < (max_ind - 2 + 4) % 8) { // in one direction, turn or 00097 // move back 00098 00099 lastCommand = new XYThetaAction(0, 0, 5); 00100 if (random.nextDouble() > .3) 00101 lastCommand = new XYThetaAction(-10, 0, 0); 00102 else if (random.nextDouble() > .2) 00103 lastCommand = new XYThetaAction(0, -10, 0); 00104 } else { // if the other direction, turn or move forward. 00105 lastCommand = new XYThetaAction(0, 0, -5); 00106 if (random.nextDouble() > .3) 00107 lastCommand = new XYThetaAction(10, 0, 0); 00108 else if (random.nextDouble() > .2) 00109 lastCommand = new XYThetaAction(0, 10, 0); 00110 00111 } 00112 00113 // double radians = (max_ind * 45 + 0) * Math.PI / 180.; 00114 // lastCommand = new XYThetaAction(20 * Math.cos(radians), 20 * 00115 // Math.sin(radians), 0); 00116 timesteps = 50; 00117 System.out.println(" Command" + lastCommand); 00118 return lastCommand; 00119 } 00120 00121 System.out.println("IR null"); 00122 timesteps = 20; 00123 lastCommand = new XYThetaAction(0, 0, 0); 00124 return lastCommand; 00125 } 00126 00127 public static void main(String[] args) { 00128 final CritterbotEnvironment environment = new CritterbotRobot(); 00129 Agent agent = new DockingAgent(environment); 00130 Clock clock = new Clock("Docking"); 00131 while (clock.tick() && !environment.isClosed()) 00132 environment.sendAction((CritterbotAction) agent.getAtp1(environment.waitNewObs())); 00133 } 00134 }