import java.util.Vector;
/**
* HumanNArmedBanditAgent.java
*
*
* Created: Thu Jan 16 16:47:08 2003
*
* @author Todd Neller
* @version 1.0
*/
public class HumanNArmedBanditAgent extends Agent{
/**
* variable console
- encapsulates and uses a BufferedReader
*/
private static ConsoleReader console = new ConsoleReader(System.in);
/**
* variable arms
- number of arms the bandit has
*/
private int arms;
/**
* variable lastArm
- last arm chosen
*/
private int lastArm;
/**
* variable rewards
- all rewards received from each
* arm. Note that we're unwisely not updating estimates
* incrementally - this is for your exercise later. */
private Vector[] rewards;
/**
* variable showAvgRewards
- show average rewards
* received so far before requesting an action */
public boolean showAvgRewards = false;
/**
* variable totalReward
- total reward received for
* this trial */
private double totalReward;
public HumanNArmedBanditAgent(int arms){
this.arms = arms;
}
public void init( Object[] a )
{
}
/**
* Initializes/resets agent to a naive state (no collected
* statistics) and returns the first choice of arm. Since this is
* a nonassociative task, the State is always the same and hence
* does not matter.
*
* @param state a State
, the first
* State
of the new trial. Should not be altered
* within method.
* @return the first Action
of the agent in the new trial */
public Action startTrial( State state )
{
rewards = new Vector[arms];
for (int i=0; igetAction - return a valid NArmedBanditAction
*
* @return an Action
value
*/
private Action getAction() {
if (showAvgRewards) {
System.out.println("Arm #\tTries\tAverage Reward:");
for (int i=0; i= 0 && arm < arms) {
lastArm = arm;
action = new NArmedBanditAction(arm);
}
else
System.out.println("Illegal arm.");
}
catch (Exception e) {
System.out.println("Illegal arm.");
}
}
return action;
}
/**
* The user-defined method where all learning takes place for an
* agent; the method is called once by the Simulation
* instance on each step of the simulation.
*
* The method checks for the case nextState == null, which
* indicates that the trial terminates with this step, and adjusts
* the agent's learning and other processes accordingly. In that
* case, the value returned from this method call is ignored.
*
* @param nextState, the resulting State
from
* taking the previous action; nextState == null if the trial
* terminates with this step.
* @param reward a double
value, the reward
* received for the previous action.
* @return the next Action
to be taken in response to
* State
nextState
*/
public Action step(State nextState, double reward)
{
totalReward += reward;
System.out.println("Reward: " + reward);
rewards[lastArm].add(new Double(reward));
return getAction();
}
public String toString()
{
return "Total Reward: " + totalReward;
}
}// HumanNArmedBanditAgent