import java.util.Vector; /** * HumanNArmedBanditAgent.java * * * Created: Thu Jan 16 16:47:08 2003 * * @author Todd Neller * @version 1.0 */ public class HumanNArmedBanditAgent extends Agent{ /** * variable console - encapsulates and uses a BufferedReader */ private static ConsoleReader console = new ConsoleReader(System.in); /** * variable arms - number of arms the bandit has */ private int arms; /** * variable lastArm - last arm chosen */ private int lastArm; /** * variable rewards - all rewards received from each * arm. Note that we're unwisely not updating estimates * incrementally - this is for your exercise later. */ private Vector[] rewards; /** * variable showAvgRewards - show average rewards * received so far before requesting an action */ public boolean showAvgRewards = false; /** * variable totalReward - total reward received for * this trial */ private double totalReward; public HumanNArmedBanditAgent(int arms){ this.arms = arms; } public void init( Object[] a ) { } /** * Initializes/resets agent to a naive state (no collected * statistics) and returns the first choice of arm. Since this is * a nonassociative task, the State is always the same and hence * does not matter. * * @param state a State, the first * State of the new trial. Should not be altered * within method. * @return the first Action of the agent in the new trial */ public Action startTrial( State state ) { rewards = new Vector[arms]; for (int i=0; igetAction - return a valid NArmedBanditAction * * @return an Action value */ private Action getAction() { if (showAvgRewards) { System.out.println("Arm #\tTries\tAverage Reward:"); for (int i=0; i= 0 && arm < arms) { lastArm = arm; action = new NArmedBanditAction(arm); } else System.out.println("Illegal arm."); } catch (Exception e) { System.out.println("Illegal arm."); } } return action; } /** * The user-defined method where all learning takes place for an * agent; the method is called once by the Simulation * instance on each step of the simulation. * * The method checks for the case nextState == null, which * indicates that the trial terminates with this step, and adjusts * the agent's learning and other processes accordingly. In that * case, the value returned from this method call is ignored. * * @param nextState, the resulting State from * taking the previous action; nextState == null if the trial * terminates with this step. * @param reward a double value, the reward * received for the previous action. * @return the next Action to be taken in response to * State nextState */ public Action step(State nextState, double reward) { totalReward += reward; System.out.println("Reward: " + reward); rewards[lastArm].add(new Double(reward)); return getAction(); } public String toString() { return "Total Reward: " + totalReward; } }// HumanNArmedBanditAgent