import java.util.Scanner;

public class NArmedBanditTester {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		Scanner in = new Scanner(System.in);
		System.out.print("Initial seed? "); 
		int initialSeed = in.nextInt();
		// Parameters for grading: numArms = 10; numPulls = 1000; numGames = 10000; using normally-distributed reward means.
		int numArms = 10;
		int numPulls = 1000;
		int numGames = 10000;
		NArmedBandit nab = null;
		NArmedBanditPlayer player = new RandomPlayer(); // <=== replace RandomPlayer() with player to be tested
		double cumulativeRegret = 0;
		for (int seed = initialSeed; seed < initialSeed + numGames; seed++) {
			NArmedBandit.setSeed(seed);
			nab = new NArmedBandit(numArms); // normally-distributed reward means
			player.reset(numArms);
			for (int pull = 0; pull < numPulls; pull++) {
				int arm = player.getArm();
				double reward = nab.pull(arm);
				player.reportReward(arm, reward);
			}
			cumulativeRegret += nab.getCumulativeRegret();
		}
		cumulativeRegret /= numGames;
		System.out.println("Average Cumulative Regret: " + cumulativeRegret);
		// TWN Note: My best algorithm for init. seed 0 yields "Average Cumulative Regret: 62.34809482851944".
		// Minimum expected performance should be consistently lower than 135.
	}

}
