import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Scanner;

public class LinearRegression {
	
	double beta0, beta1; // y-intercept, slope
	
	/**
	 * @return beta0 the y-intercept
	 */
	public double getBeta0() {
		return beta0;
	}

	/**
	 * @return beta1 the slope
	 */
	public double getBeta1() {
		return beta1;
	}

	/**
	 * Given n-by-2 double data, where indices 0 and 1 are x and y values, respectively, compute the y-intercept (beta0) and slope (beta1) of simple linear regression.
	 * Linear regression will be computed according to the least squares method of equation 3.4 in the ISLR text (http://www-bcf.usc.edu/~gareth/ISL/).
	 * @param data n-by-2 double data, where indices 0 and 1 are x and y values, respectively
	 */
	public LinearRegression(double[][] data) {
		// Compute means
		double[] means = new double[2];
		for (int i = 0; i < data.length; i++) {
			for (int j = 0; j < data[i].length; j++)
				means[j] += data[i][j];
		}
		for (int i = 0; i < means.length; i++)
			means[i] /= data.length;
		// Compute beta1 according to equation 3.4
		double numerator = 0;
		double denominator = 0;
		for (int i = 0; i < data.length; i++) {
			double xDiff = data[i][0] - means[0];
			double yDiff = data[i][1] - means[1];
			numerator += xDiff * yDiff;
			denominator += xDiff * xDiff;
		}
		beta1 = numerator / denominator;
		// Compute beta0 according to equation 3.4
		beta0 = means[1] - beta1 * means[0];
	}

	/**
	 * Given double x, predict y according to simple linear regression.
	 * @param x input value
	 * @return predicted y output value
	 */
	public double predict(double x) {
		return beta0 + beta1 * x;
	}
	
	/**
	 * Perform linear regression on x,y-data from a file "data.csv" and then perform prediction from x values read from the standard input.
	 * Read CSV data from file data.csv.  Each line should have two doubles (x and y) separated by a comma. 
	 * Perform a linear regression on this data, and then print the output values linear regression would predict for the remaining lines, one per line.
	 * Linear regression will be computed according to the least squares method of equation 3.4 in the ISLR text (http://www-bcf.usc.edu/~gareth/ISL/).
	 * @param args
	 * @throws FileNotFoundException 
	 */
	public static void main(String[] args) throws FileNotFoundException {
		
		ArrayList<double[]> data = CSVUtil.readCSV(new FileInputStream("data.csv"));

		double[][] dataArr = new double[data.size()][];
		for (int i = 0; i < data.size(); i++)
			dataArr[i] = data.get(i);
		
		// Compute simple linear regression
		LinearRegression lr = new LinearRegression(dataArr);
		
		// Read inputs from standard input predict outputs according to equation 3.1
		Scanner in = new Scanner(System.in);
		while (in.hasNextDouble()) {
			double x = in.nextDouble();
			System.out.printf("%f,%f\n", x, lr.predict(x));
		}
		in.close();
	}
}

/*
Sample data.csv:
3,-4
1,-2.1
8,-8.9

Sample System.in input:
-1
0
1
2
3
4
5

Sample output:
-1.000000,-0.134615
0.000000,-1.107692
1.000000,-2.080769
2.000000,-3.053846
3.000000,-4.026923
4.000000,-5.000000
5.000000,-5.973077
*/
