RLPark 1.0.0
Reinforcement Learning Framework in Java
|
00001 package rlpark.plugin.rltoys.problems.mazes; 00002 00003 import java.awt.Point; 00004 import java.util.Arrays; 00005 00006 public class Mazes { 00007 static public final byte[][] BookMaze = { { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, { 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1 }, 00008 { 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1 }, { 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1 }, { 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, 00009 { 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1 }, { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } }; 00010 00011 static public Maze createBookMaze() { 00012 byte[][] layout = BookMaze; 00013 double[][] rewardFunction = createDefaultRewardFunction(layout, -1); 00014 boolean[][] endEpisodeFunction = createDefaultEndEpisode(layout); 00015 endEpisodeFunction[1][9] = true; 00016 Point start = new Point(3, 1); 00017 return new Maze(layout, rewardFunction, endEpisodeFunction, start); 00018 } 00019 00020 static public Maze createBookMazePositiveReward() { 00021 byte[][] layout = BookMaze; 00022 double[][] rewardFunction = createDefaultRewardFunction(layout, 0); 00023 rewardFunction[1][9] = 1; 00024 boolean[][] endEpisodeFunction = createDefaultEndEpisode(layout); 00025 endEpisodeFunction[1][9] = true; 00026 Point start = new Point(3, 1); 00027 return new Maze(layout, rewardFunction, endEpisodeFunction, start); 00028 } 00029 00030 private static double[][] createDefaultRewardFunction(byte[][] layout, double reward) { 00031 double[][] rewardFunction = new double[layout.length][]; 00032 for (int i = 0; i < rewardFunction.length; i++) { 00033 rewardFunction[i] = new double[layout[i].length]; 00034 Arrays.fill(rewardFunction[i], reward); 00035 } 00036 return rewardFunction; 00037 } 00038 00039 private static boolean[][] createDefaultEndEpisode(byte[][] layout) { 00040 boolean[][] endEpisodeFunction = new boolean[layout.length][]; 00041 for (int i = 0; i < endEpisodeFunction.length; i++) 00042 endEpisodeFunction[i] = new boolean[layout[i].length]; 00043 return endEpisodeFunction; 00044 } 00045 }