/**
 * PigletSolitairePlayerEvaluator - a policy evaluator for a Piglet Solitaire player.  Prints expected probability of winning the game, as well as a summarization of the policy.
 * 
 * Piglet Solitaire is a simple jeopardy coin game.  The goal is to reach a given goal score in a given number of turns.
 * Initially, a player's score is 0, the turn total is zero, and the turn number is 0.  (The last possible turn number is the number of turns - 1.)
 * A player's choice is always simply whether to "flip" a coin or to "hold" and end the turn.
 * If the player chooses to "flip" there are two equiprobable outcomes:
 *   HEAD - the turn total increases by 1, and the turn continues, or
 *   TAIL - the turn total resets to 0, and the turn ends with the score unchanged. Note that the turn number increments at the end of a turn.
 * If the player chooses to "hold", the score is increased by the turn total, the turn total resets to 0, and turn ends.
 * The player wins if, within a given number of turns, the player's score reaches a given goal score.
 * An optimal player chooses to "flip" or "hold" so as to maximize the probability of winning.
 * 
 * @author Todd W. Neller
 */
public class PigletSolitairePlayerEvaluator {

	int numTurns = 10, goalScore = 6;
	double epsilon = 1e-14;
	static final int HOLD = 0, FLIP = 1;
	double[][][] V; // indexed by score (i), turnTotal (k), turn (j)
	boolean[][][] flip; // indexed by score (i), turnTotal (k), turn (j)
	PigletSolitairePlayer player; // player to be evaluated


	public PigletSolitairePlayerEvaluator(PigletSolitairePlayer player) {
		this.player = player;
		player.initialize(goalScore, numTurns);
		V = new double[goalScore][goalScore][numTurns];
		flip = new boolean[goalScore][goalScore][numTurns];
		// import player policy
		for (int i = 0; i < goalScore; i++) // for all i
			for (int j = 0; j < numTurns; j++) // for all j
				for (int k = 0; k < goalScore - i; k++) { // for all k
					flip[i][k][j] = player.willFlip(i, k, j);
				}
		evaluate();
		summarize();
	}
	
	private double pWin(int score, int turnTotal, int turn) {
		if (score + turnTotal >= goalScore)
			return 1;
		else if (turn >= numTurns)
			return 0;
		else
			return V[score][turnTotal][turn];
	}

	private void evaluate() {
		double maxChange;
		do {
			maxChange = 0.0;
			for (int i = 0; i < goalScore; i++) // for all i
				for (int j = 0; j < numTurns; j++) // for all j
					for (int k = 0; k < goalScore - i; k++) { // for all k
						double oldProb = V[i][k][j];
						V[i][k][j] = flip[i][k][j] ? (pWin(i, k + 1, j) + pWin(i, 0, j + 1)) / 2 : pWin(i + k, 0, j + 1);
						double change = Math.abs(V[i][k][j] - oldProb);
						maxChange = Math.max(maxChange, change);
					}
		} while (maxChange >= epsilon);
	}

	public void summarize() {
		System.out.println("p[0][0][0] = " + pWin(0, 0, 0));
		System.out.println();
		System.out.println("score\tturn\tPolicy changes at k =");
		for (int i = 0; i < goalScore; i++) // for all i
			for (int j = 0; j < numTurns; j++) { // for all j
				int k = 0;
				System.out.print(i + "\t" + j + "\t" + (flip[i][k][j] ? "flip " : "hold "));
				for (k = 1; i + k < goalScore; k++) // for all valid k
					if (flip[i][k][j] != flip[i][k - 1][j])
						System.out.print(k + " " + (flip[i][k][j] ? "flip " : "hold "));
				System.out.println();
			}
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		new PigletSolitairePlayerEvaluator(new PigletSolitaireHoldAt2Player()); // <- Replace the player with your own player.
	}


}
