AutoRegression.java [src/java/cfa] Revision: ffc412d47d72be5013c8a0a59c535082523410cb  Date: Tue Dec 16 12:11:46 MST 2014
package cfa;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Random;
import Jama.Matrix;
import java.io.IOException;


class sort0_largeToSmall implements Comparator<String[]>{
	//Compares the first entry and sorts largest to smallest
	public int compare(final String[] entry1, final String[] entry2) {
		double value1 = 0;
		double value2 = 0;
		try{
			value1 = Double.parseDouble(entry1[0]);
			value2 = Double.parseDouble(entry2[0]);
		}catch(NumberFormatException e){
			e.printStackTrace();
		}
		if(Double.compare(value1, value2) < 0){
			return -1;
        }else if(Double.compare(value1, value2) > 0){
        	return 1;
        }else{
        	return 0;
        }
	}
}
/**
* Last Updated: 16-December-2014
* @author Tyler Wible
* @since 23-July-2012
*/
public class AutoRegression {
	private int projectionCount = 100000;
	/**
	 * Performs a conversion to stoicastic data (z) from standard data of the second column of the provided data array if convertTrue is true. Otherwise it 
	 * performs a conversion to original data (x) from stoicastic data z of the second column of the provided data array.
	 * @param originalData  The data to be converted, first column should be dates associated with the data, second 
	 * column should be the values to be converted
	 * @param average  the average of the original data
	 * @param stDev the standard deviation of the original data
	 * @param convertTrue  if true then it converts from x to z [z = (x-average)/stDev], otherwise it converts from z to x [x = z*stDev + average]
	 * @return  if convertTrue is true the returned array is the converted data, the first column is the 
	 * same while the second column each element z = (data - average)/ stDev.
	 * Otherwise converts back from stoicastic data (z) back to orignial data, the second column each element newData = data*standard Deviation + average 
	 */
	public double[][] StoicasticConversion(double[][] data, double average, double stDev, boolean convertTrue){
		
		double[][] newData = new double[data.length][2];
		
		if(convertTrue){
			//Loop through and convert each element of the array from original data (x) to only the stoicastic component (z)
			for(int i=0; i<data.length; i++){
				newData[i][0] = data[i][0];
				newData[i][1] = (data[i][1] - average)/stDev;//convert data to "z": z = (data - ave)/stDev
			}
		}else{
			//Loop through and convert each element of the array from stoicastic data (z) back to original data (x)
			for(int i=0; i<data.length; i++){
				newData[i][0] = data[i][0];
				newData[i][1] = data[i][1]*stDev + average;//convert data back: x = data*stDev + ave
			}
		}

		return newData;
	}
	/**
	 * Perform a Box-Cox transformation of the provided data.  This is to ensure that the new data will be normally distributed.  This auto calculates an optimal lambda value in order to cause the skewness of the new dataset to be zero.
	 * @param sortedData  a double[][] containing the data to be transformed, column1 = years, column2 = values
	 * @param transformData  a boolean, if true a Box-Cox transformation will be performed on the data, if false it will 
	 * un-Box-Cox-transform the data back to orignal values
	 * @return  the optimized lambda value for the Box-Cox Transformation (aka minimum skewness)
	 */
	public double BoxCox(double[][] annualData){
		DoubleMath doubleMath = new DoubleMath();
		DoubleArray doubleArray = new DoubleArray();
		
		//Perform Box-Cox transform of the data
		double lambda = 0.0;
		double[][] transformedData = BoxCox(lambda, annualData, true);
		double skewness = doubleMath.SkewnessSample(doubleArray.getColumn(transformedData,1));
		
		
		//Check if the transformation is proper (aka skewness = 0 or marginally close) and try again if skewness is not zero
		long start = System.currentTimeMillis();
		int ctr=0;
		while(Double.compare(skewness, 0.0001) >=0 || Double.compare(skewness, -0.0001) <=0){
			//Estimate the lambda value of the data set
			if(ctr > 1000){
				//If lambda has ranged from 0 to 5 end searching for optimal lambda and report lambda = 1
				lambda = 1;
				transformedData = BoxCox(lambda, annualData, true);
				skewness = doubleMath.SkewnessSample(doubleArray.getColumn(transformedData,1));
				break;
			}else{
				lambda = lambda + 0.001;
			}
			
			
			//Transform the data using a Box-Cox transformation with the estimated lambda value
			transformedData = BoxCox(lambda, annualData, true);
			skewness = doubleMath.SkewnessSample(doubleArray.getColumn(transformedData,1));
			ctr++;
		}
		
		
		//Return the optimized lambda value
		System.out.println("Lambda Optimization Took: " + (System.currentTimeMillis()-start) + " milliseconds. Lambda = " + lambda);
		return lambda;
	}
	/**
	 * Perform a Box-Cox transformation of the provided data.  This is to ensure that the new data will be normally distributed.
	 * @param lambda  the lambda coefficient of a Box-Cox transformation
	 * @param sortedData  a String[][] containing the data to be transformed, column1 = dates (yyyy-mm-dd), column2 = values
	 * @param transformData  a boolean, if true a Box-Cox transformation will be performed on the data, if false it will 
	 * un-Box-Cox-transform the data back to orignal values
	 * @return  returns the original String[][] sortedData with the second column having been replaced by the results Box-Cox 
	 * transformation equation or an un-Box-Cox-Transformation
	 */
	public double[][] BoxCox(double lambda, double[][] sortedData, boolean transformData){
		
		double[][] transformedData = new double[sortedData.length][2];
		
		if(transformData){
			//Transform the provided stream flow data to normal using a BoxCox transformation (because stream flow values are 
			//not usually normally distributed)
			for(int i=0; i<sortedData.length; i++){
				//Perform Box-Cox transformation
				if(Double.compare(lambda, 0) == 0){
					double currentValue = sortedData[i][1];
					transformedData[i][0] = sortedData[i][0];
					transformedData[i][1] = Math.log(currentValue);
				}else{
					double currentValue = sortedData[i][1];
					transformedData[i][0] = sortedData[i][0];
					transformedData[i][1] = ( (Math.pow(currentValue, lambda)) - 1 )/lambda;
				}
			}
		}else{
			//Un-Box-Cox transform the data back to orignal values
			for(int i=0; i<sortedData.length; i++){
				//Un-Perform Box-Cox transformation
				if(Double.compare(lambda, 0) == 0){
					double currentValue = sortedData[i][1];
					transformedData[i][0] = sortedData[i][0];
					transformedData[i][1] = Math.exp(currentValue);
				}else{
					double currentValue = sortedData[i][1];
					transformedData[i][0] = sortedData[i][0];
					transformedData[i][1] = Math.pow((currentValue*lambda + 1), 1/lambda);
				}
			}
		}
		
		return transformedData;
	}
	/**
	 * Temporary transformation of the provided data to match Dr. Salas' example paper.  This is to ensure that the new data will 
	 * be normally distributed.
	 * @param annualData  a String[][] containing the data to be transformed, column1 = dates (yyyy-mm-dd), column2 = values
	 * @param convertData  a boolean, if true a transformation will be performed on the data, if false it will 
	 * un-transform the data back to orignal values
	 * @return  returns the original String[][] sortedData with the second column having been replaced by the results of the
	 * transformation or the un-Transformation
	 */
	public double[][] SalasExampleTransformation(double[][] annualData, boolean convertData){
		double[][] transformedData = new double[annualData.length][2];
		
		if(convertData){
			//Run though and perform the transformation of the data to normalize them
			for(int i=0; i<annualData.length; i++){
				//Peform transformation used in Salas' paper on the poudre river
				double currentValue = annualData[i][1];
				double transformedValue = Math.pow(currentValue - 42800,0.34);
				transformedData[i][0] = annualData[i][0];
				transformedData[i][1] = transformedValue;
			}
		}else{
			//Run through and un-transform the data to return real streamflow values
			for(int i=0; i<annualData.length; i++){
				//Undo transformation used in Salas' paper on the poudre river
				double currentValue = annualData[i][1];
				double transformedValue = 42800 + Math.pow(currentValue,1/0.34);
				transformedData[i][0] = annualData[i][0];
				transformedData[i][1] = transformedValue;
			}
		}
		
		return transformedData;
	}
	/**
	 * Performs an Auto-Regressive (AR(p)) or Auto-Regressive Moving Average (ARMA(p,q)) model 
	 * on the provided data.  The order p is determined by the number of phi values in phiString.  
	 * <code>p = phiString.split("\t").length</code>.  If thetaString is blank then an AR(p) model 
	 * is performed.  If thetaString is not blank then an ARMA(p,q) model is performed where the 
	 * order q is determined by the number of theta values in thetaString.  
	 * <code>q = thetaString.split("\t").length</code>. 
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param action  if "optimize", this only optimizes the lambda and phi values for the model and 
	 * then reports these back to the user, if "all" then it generates data based on the calculated 
	 * phi values and finishes performing the drought analysis, if "useParameters" then it takes the 
	 * previously generated parameters and uses these to perform the regression	 
	 * @param phiValues   either an integer for the number of phi values desired in the model (p) or 
	 * a tab-delimited string of phi values previously calculated
	 * @param thetaValues   either an integer for the number of theta values desired in the model (q) 
	 * or a tab-delimited string of theta values previously calculated or blank
	 * @param transformedData  a double[][] containing the transformed stochastic portion of average 
	 * stream flow values in the following format: column1 = dates (yyyy-mm-dd format), 
	 * column2 = the normally-transformed stochastic portion of annual stream flow values (acre-ft)
	 * @return  an Object[] containing two the tab-delimited phi values and theta values if action is "optimize", 
	 * otherwise it contains a string of the method type (ex. ARMA(2,1) <code>String modelType = (String) returnArray[0]</code>), 
	 * the original data (<code>double[][] transformeData = (double[][]) returnArray[1]</code>), 
	 * fitted data (<code>double[][] fittedData = (double[][]) returnArray[2]</code>), and 
	 * future predicted data (<code>double[][] predictedData = (double[][]) returnArray[3]</code>)
	 * @throws IOException
	 */
	public Object[] AutoregressiveModel(String mainFolder, String action, String phiValues, String thetaValues, double[][] transformedData) throws IOException{
		DoubleMath doubleMath = new DoubleMath();
		DoubleArray doubleArray = new DoubleArray();
		
		Object[] returnArray = null;
		
		if(action.equalsIgnoreCase("optimizeModel")){
			long start = System.currentTimeMillis();
			//Run optimization on the model using AIC/BIC and return a graph of the optimal front
			if(thetaValues.equalsIgnoreCase("")){
				//Perform AR(p) optimization
				ArrayList<Double> aic = new ArrayList<Double>();
				ArrayList<Double> bic = new ArrayList<Double>();
				ArrayList<String> phi = new ArrayList<String>();
				ArrayList<String> theta = new ArrayList<String>();
				
				boolean increasing = false;
				int ctr = 1;
				while(!increasing){
					//Find the phi parameters of the AR(p) regression
					String[] phiString = AR(mainFolder, ctr, transformedData);
					
					//Perform the AR(p) regression and calculated the AIC and BIC
					double[] aicBIC = ARoptimize(mainFolder, phiString[0], transformedData);
					aic.add(aicBIC[0]);//the AIC value of the fitted AR(ctr) model
					bic.add(aicBIC[1]);//the BIC value of the fitted AR(ctr) model
					phi.add(phiString[0]);//The tab-delimited string of phi values for the fitted AR(ctr) model
					theta.add("");
					
					//Check if BIC value has increased for the past 5 values, if so terminate the loop
					if(ctr >= 5){
						if(bic.get(ctr-5) < bic.get(ctr-4) && bic.get(ctr-4) < bic.get(ctr-3) && bic.get(ctr-3) < bic.get(ctr-2) && bic.get(ctr-2) < bic.get(ctr-1)){
							System.out.println("Found BIC Optimum");
							increasing = true;
						}
					}
					
					ctr++;
				}
				
				//Determine the phi values corresponding to the best BIC value
				double min = doubleMath.min(bic);
				for(int i=0; i<bic.size(); i++){
					if(Double.compare(bic.get(i), min) == 0){
						phiValues = phi.get(i);
						thetaValues = theta.get(i);
					}
				}
				
				Object[] aicData = {aic, bic, phiValues, thetaValues};
				returnArray = aicData;
				
			}else{
				//Perform ARMA(p,q) optimization
				int p=5, q=3;
				double[][] results = new double[p*q][4];
				String[][] phiTheta = new String[p*q][2];
				int ctr=0;
				
				for(int pCtr=1; pCtr<=p; pCtr++){//Loop from 1 to 5 of ARMA p values
					for(int qCtr=1; qCtr<=q; qCtr++){//Loop from 1 to 3 of ARMA q values
						//Find the phi and theta parameters of the ARMA(p, q) regression
						String[] coefficients = ARMA(mainFolder, pCtr, qCtr, transformedData);
						
						//Perform the ARMA(p, q) regression and calculated the AIC and BIC
						double[] aicBIC = ARMAoptimize(mainFolder, coefficients[0], coefficients[1], transformedData);
						results[ctr][0] = pCtr;
						results[ctr][1] = qCtr;
						results[ctr][2] = aicBIC[0];//the AIC value of the fitted ARMA(pCtr, qCtr) model
						results[ctr][3] = aicBIC[1];//the BIC value of the fitted ARMA(pCtr, qCtr) model
						
						phiTheta[ctr][0] = coefficients[0];//The tab-delimited string of phi values for the fitted AR(ctr) model
						phiTheta[ctr][1] = coefficients[1];//The tab-delimited string of theta values for the fitted AR(ctr) model
						ctr++;
					}
				}

				
				//Determine the optimal BIC value (minimum of the matrix)
				double min = doubleMath.min(doubleArray.getColumn(results, 3));
				
				//Loop through and find the phi and theta values corresponding to the optimal BIC value
				for(int i=0; i<results.length; i++){
					if(Double.compare(results[i][3], min) == 0){
						phiValues = phiTheta[i][0];
						thetaValues = phiTheta[i][1];
					}
				}
				
				Object[] aicData = {results, phiValues, thetaValues};
				returnArray = aicData;
			}
			System.out.println("Model Optimization took " + (System.currentTimeMillis()-start) + " milliseconds.");

			
		}else if(action.equalsIgnoreCase("optimizeParameters")){
			//Find the parameters of the regression only and report these back
			if(thetaValues.equalsIgnoreCase("")){
				//Perform AR(p) regression
				int p = Integer.parseInt(phiValues);
				returnArray = AR(mainFolder, p, transformedData);
				
			}else{
				//If there are provided theta values then it is an ARMA model
				//Perform ARMA(p, q) regression
				int p = Integer.parseInt(phiValues);
				int q = Integer.parseInt(thetaValues);
				returnArray = ARMA(mainFolder, p, q, transformedData);
			}
			
		}else if(action.equalsIgnoreCase("useParameters") || action.equalsIgnoreCase("updateParameters")){
			//If the parameters are provided, use them and finish the drought analysis
			if(thetaValues.equalsIgnoreCase("")){
				//Perform the AR(p) regression and generate a new dataset
				returnArray = AR(mainFolder, phiValues, transformedData);
				
			}else{
				//If there are provided theta values then it is an ARMA model
				//Perform the ARMA(p,q) regression and generate a new dataset
				returnArray = ARMA(mainFolder, phiValues, thetaValues, transformedData);
				
			}
			
		}else{
			//Otherwise find the parameters of the regression and then project the data and finish the drought analysis
			if(thetaValues.equalsIgnoreCase("")){
				//Find the phi parameters of the AR(p) regression
				int p = Integer.parseInt(phiValues);
				String[] phiString = AR(mainFolder, p, transformedData);
				
				//Perform the AR(p) regression and generate a new dataset
				returnArray = AR(mainFolder, phiString[0], transformedData);
				
			}else{
				//If there are provided theta values then it is an ARMA model
				//Find the phi and theta parameters of the ARMA(p, q) regression
				int p = Integer.parseInt(phiValues);
				int q = Integer.parseInt(thetaValues);
				String[] parameterArray = ARMA(mainFolder, p, q, transformedData);
				String phiString = parameterArray[0];
				String thetaString = parameterArray[1];
				phiString = phiString.substring(0, phiString.length() - 3);//Remove the $$ at the end of this string
				
				//Perform the ARMA(p,q) regression and generate a new dataset
				returnArray = ARMA(mainFolder, phiString, thetaString, transformedData);
				
			}
		}
		
		return returnArray;
	}
	/**
	 * Performs an Auto-Regressive, AR(p), model on the provided data.  Returns a string containing tab-delimited phi values of the AR(p) model.
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param transformedData  double array of data.  Column1 = years, column2 = annual flow values
	 * @param p  the order of the AR model (if p = 1 an AR(1) model is performed, if p=2 AR(2), etc.)
	 * @return a tab-delimited string containing p number of phi values for the AR(p) model
	 * @throws IOException 
	 */
	public String[] AR(String mainFolder, int p, double[][] transformedData) throws IOException{
		DoubleMath doubleMath = new DoubleMath();
		DoubleArray doubleArray = new DoubleArray();
		
		//Error catch for available data vs. "p" order AR model
		int N = transformedData.length;
		if(N < (2*p + 1)){
			String[] errorMessage = {"There is insufficient available data for the order of AR model you have choosen, AR(" + p + "). Please choose a smaller order and try again."};
			writeError(errorMessage);
		}
		
		
		long start = System.currentTimeMillis();
		//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
		Matrix A = new Matrix(N-p, p);
		for(int j=0; j<p; j++){//Column dimension of the matrix
			for(int i=0; i<(N-p); i++){//Row dimension of the matrix
				A.set(i, j, transformedData[p-j+i-1][1]);
			}
		}
		
		
		//Using non-lagged data for the original timeseries construct matrix b
		Matrix b = new Matrix(N-p, 1);
		for(int i=0; i<(N-p); i++){//Row dimension of the matrix
			b.set(i, 0, transformedData[p+i][1]);
		}
		
		
		//Find the Phi values of the AR(p) model using the Yule-Walker matrix aproach:
		//phi = [inverse(transpose(A) * A)] * transpose(A) * b		
		Matrix A_T = A.transpose();
		Matrix C = A_T.times(A);
		Matrix c = A_T.times(b);
		Matrix C_inv = C.inverse();
		Matrix phi = C_inv.times(c);
		displayMatrix(phi, "phi");
		
		
		//Convert phi values into a single tab-delimited string and calculate estimate of variance of errors
		String phiStrings = null;
		double[] data0 = doubleArray.getColumn(b.getArray(), 0);
		double sum = 0;
		for(int i=0; i<phi.getRowDimension(); i++){
			//Check for causal/explosive AR(p) model based on abs(phi) < 1
			if(Math.abs(phi.get(i, 0)) >= 1){
				System.err.println("Calculated phi value exceeds 1.  AR(" + p + ") model is not causal, it is explosive.  Phi = " + phi.get(i, 0));
			}
			
			//Concatenate the phi values
			if(i == 0){
				phiStrings = String.valueOf(phi.get(i,0));
			}else{
				phiStrings = phiStrings + "\t" + String.valueOf(phi.get(i, 0));
			}
			
			//Calculate sum of phi * lag(p) for variance of error estimate
			double[] dataP = doubleArray.getColumn(A.getMatrix(0, A.getRowDimension() - 1, i, i).getArray(), 0);
			double lagP = doubleMath.CorrelationCoefficient(dataP, data0);
			sum = sum + (phi.get(i, 0) * lagP);
		}
		
		
		//Return tab-delimited string of phi values
		System.out.println("AR(" + p + ") JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to calculated phi parameters.");
		String[] returnArray = {phiStrings,""};
		return returnArray;
	}
	/**
	 * Performs an Auto-Regressive (AR(p)) model on the provided data.  The order p is determined by the number of phi 
	 * values in phiString.  p = phiString.split("\t").length;
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param phiString  a string of the phi values for the AR(p) model separated by "\t" (ex. for AR(3) phiString = phi_1 \t phi_2 \tphi_3)
	 * @param transformedData  double array of data.  Column1 = years, column2 = annual flow values
	 * @return  an Object[] with the first element being the partial dataset of provided transformedData (double[][]) 
	 * and the second element being the AR(p) generated data (double[][]) based on the provided phi values
	 * @throws IOException 
	 */
	public Object[] AR(String mainFolder, String phiString, double[][] transformedData) throws IOException{
		DoubleMath doubleMath = new DoubleMath();
		DoubleArray doubleArray = new DoubleArray();
		
		//Error catch for available data vs. "p" order AR model
		int N = transformedData.length;
		String[] phiValues = phiString.split("\t");
		int p = phiValues.length;
		if(N < (2*p + 1)){
			String[] errorMessage = {"There is insufficient available data for the order of AR model you have choosen, AR(" + p + "). Please choose a smaller order and try again."};
			writeError(errorMessage);
		}
		
		
		long start = System.currentTimeMillis();
		//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
		Matrix A = new Matrix(N-p, p);
		for(int j=0; j<p; j++){//Column dimension of the matrix
			for(int i=0; i<(N-p); i++){//Row dimension of the matrix
				A.set(i, j, transformedData[p-j+i-1][1]);
			}
		}
		
		
		//Using non-lagged data for the original timeseries construct matrix b
		Matrix b = new Matrix(N-p, 1);
		for(int i=0; i<(N-p); i++){//Row dimension of the matrix
			b.set(i, 0, transformedData[p+i][1]);
		}
		
		
		//Convert phi values into a matrix
		Matrix phi = new Matrix(p, 1);
		double[] data0 = doubleArray.getColumn(b.getArray(), 0);
		double sum = 0;
		for(int i=0; i<phiValues.length; i++){
			double currentPhi = Double.parseDouble(phiValues[i]);
			phi.set(i, 0, currentPhi);
			
			//Calculate sum of phi * lag(p) for variance of error estimate
			double[] dataP = doubleArray.getColumn(A.getMatrix(0, A.getRowDimension() - 1, i, i).getArray(), 0);
			double lagP = doubleMath.CorrelationCoefficient(dataP, data0);
			sum = sum + (phi.get(i, 0) * lagP);
		}
		
		
		//Use the calculated phi values to fit a line to the dataset and predict new data
		//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p
		double[][] fittedData = new double[transformedData.length - p][2];
		double[][] transformedData_partial = new double[transformedData.length  -p][2];
		double[] errors = new double[transformedData_partial.length];
		for(int i=0; i<transformedData.length - p; i++){
			double predictedValue = 0;
			for(int j=0; j<phi.getRowDimension(); j++){
				double currentPHI = phi.get(j,0);
				double previousValue = A.get(i, j);
				predictedValue = predictedValue + currentPHI * previousValue;
			}
			
			fittedData[i][0] = transformedData[i+p][0];
			fittedData[i][1] = predictedValue;
			transformedData_partial[i][0] = transformedData[i+p][0];
			transformedData_partial[i][1] = transformedData[i+p][1];
			errors[i] = transformedData_partial[i][1] - fittedData[i][1];
		}
		
		
		//Compute stats on the errors
//		double aic = doubleMath.AIC(errors, p);
//		double bic = doubleMath.BIC(errors, p);
//		testErrors(errors);
		
		
		//Project forward the data to perform drought statistics on
		Random generator = new Random();
		double mean = doubleMath.meanArithmetic(errors);
		double stDev = doubleMath.StandardDeviationSample(errors);
		double[][] predictedData = new double[projectionCount + p][2];
		for(int i=0; i<predictedData.length; i++){
			//Pull first p values from actual observed data
			if(i < p){
				predictedData[i][0] = transformedData[i][0];
				predictedData[i][1] = transformedData[i][1];
			}else{
				//Calculate model based on p previous values and q previous errors
				double predictedValue = 0;
				//Loop through and add each lag term times its appropriate phi value
				for(int j=0; j<phi.getRowDimension(); j++){
					double currentPHI = phi.get(j, 0);
					predictedValue = predictedValue + currentPHI * predictedData[i-j-1][1];
				}
				double randNumber = generator.nextGaussian();
				predictedValue = predictedValue + (stDev*randNumber) + mean;
				
				predictedData[i][0] = predictedData[i-1][0] + 1;//previous year + 1
				predictedData[i][1] = predictedValue;
			}
		}
		
		
		//Return the generated data as well as the shortened version of the original data so they can be graphed together
		String modelType = "AR(" + p + ")";
		System.out.println(modelType + " JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to generate a dataset.");
		Object[] returnArray = {modelType, transformedData_partial, fittedData, predictedData};
		return returnArray;
	}
	/**
	 * Performs an Auto-Regressive (AR(p)) model on the provided data.  The order p is determined by the number of phi 
	 * values in phiString.  p = phiString.split("\t").length;
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param phiString  a string of the phi values for the AR(p) model separated by "\t" (ex. for AR(3) phiString = phi_1 \t phi_2 \tphi_3)
	 * @param transformedData  double array of data.  Column1 = years, column2 = annual flow values
	 * @return  an Object[] with the first element being the partial dataset of provided transformedData (double[][]) 
	 * and the second element being the AR(p) generated data (double[][]) based on the provided phi values
	 * @throws IOException 
	 */
	public double[] ARoptimize(String mainFolder, String phiString, double[][] transformedData) throws IOException{
		DoubleMath doubleMath = new DoubleMath();
		DoubleArray doubleArray = new DoubleArray();
		
		//Error catch for available data vs. "p" order AR model
		int N = transformedData.length;
		String[] phiValues = phiString.split("\t");
		int p = phiValues.length;
		if(N < (2*p + 1)){
			String[] errorMessage = {"There is insufficient available data for the order of AR model you have choosen, AR(" + p + "). Please choose a smaller order and try again."};
			writeError(errorMessage);
		}
		
		
		long start = System.currentTimeMillis();
		//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
		Matrix A = new Matrix(N-p, p);
		for(int j=0; j<p; j++){//Column dimension of the matrix
			for(int i=0; i<(N-p); i++){//Row dimension of the matrix
				A.set(i, j, transformedData[p-j+i-1][1]);
			}
		}
		
		
		//Using non-lagged data for the original timeseries construct matrix b
		Matrix b = new Matrix(N-p, 1);
		for(int i=0; i<(N-p); i++){//Row dimension of the matrix
			b.set(i, 0, transformedData[p+i][1]);
		}
		
		
		//Convert phi values into a matrix
		Matrix phi = new Matrix(p, 1);
		double[] data0 = doubleArray.getColumn(b.getArray(), 0);
		double sum = 0;
		for(int i=0; i<phiValues.length; i++){
			double currentPhi = Double.parseDouble(phiValues[i]);
			phi.set(i, 0, currentPhi);
			
			//Calculate sum of phi * lag(p) for variance of error estimate
			double[] dataP = doubleArray.getColumn(A.getMatrix(0, A.getRowDimension() - 1, i, i).getArray(), 0);
			double lagP = doubleMath.CorrelationCoefficient(dataP, data0);
			sum = sum + (phi.get(i, 0) * lagP);
		}
		
		
		//Use the calculated phi values to fit a line to the dataset and predict new data
		//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p
		double[][] fittedData = new double[transformedData.length - p][2];
		double[][] transformedData_partial = new double[transformedData.length  -p][2];
		double[] errors = new double[transformedData_partial.length];
		for(int i=0; i<transformedData.length - p; i++){
			double predictedValue = 0;
			for(int j=0; j<phi.getRowDimension(); j++){
				double currentPHI = phi.get(j,0);
				double previousValue = A.get(i, j);
				predictedValue = predictedValue + currentPHI * previousValue;
			}
			
			fittedData[i][0] = transformedData[i+p][0];
			fittedData[i][1] = predictedValue;
			transformedData_partial[i][0] = transformedData[i+p][0];
			transformedData_partial[i][1] = transformedData[i+p][1];
			errors[i] = transformedData_partial[i][1] - fittedData[i][1];
		}
		
		
		//Compute stats on the errors
		double aic = doubleMath.AIC(errors, p);
		double bic = doubleMath.BIC(errors, p);
//		testErrors(errors);
		
		//Return the generated data as well as the shortened version of the original data so they can be graphed together
		double[] returnArray = {aic, bic};
		String modelType = "AR(" + p + ")";
		System.out.println(modelType + " JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to generate a dataset.");
		return returnArray;
	}
	/**
	 * Perform an Auto-Regressive Moving Average  ARMA(p,q) model on the provided annual timeseries data 
	 * by generating 500*(p + q + 2) sets of possible values for p and q.  Then evaluates the best fit of 
	 * the generated models (mimimum SSE) and returns the phi and theta values of the best fit model.
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param p  the number of lagged data sets used to develop the ARMA(p,q) model
	 * @param q  the number of error data sets used to develop the ARMA(p,q) model
	 * @param transformedData  double array of data.  Column1 = years, column2 = flow values
	 * @return a string array with returnArray[0] = tab-delimited string containing p number of phi values for the ARMA(p,q) model 
	 * and returnArray[1]= a tab-delimited string containing q number of theta values for the ARMA(p,q) model
	 * @throws IOException 
	 */
	public String[] ARMA(String mainFolder, Integer p, Integer q, double[][] transformedData) throws IOException{
		if((p.compareTo(1) == 0) && (q.compareTo(1) == 0)){
			String[] returnArray = ARMA11(mainFolder, transformedData);
			return returnArray;
		}else{
			//Generate 500*(p+q+2) random sets of values for each phi and theta (between -1 and 1)
			long start = System.currentTimeMillis();
			String[][] phiTheta = new String[500*(p+q+2)][3];
			for(int i=0; i<phiTheta.length; i++){
				//Generate p number of phi values
				String phiString = "";
				for(int j=0; j<p; j++){
					//Randomize numbers between -1 and 1
					double currentPhi = Math.random();
					if(Math.random() > 0.5){
						currentPhi = -currentPhi;
					}
					if(j==0){
						phiString = String.valueOf(currentPhi);
					}else{
						phiString = phiString + "\t" + String.valueOf(currentPhi);
					}
				}
				
				//Generate q number of theta values
				String thetaString = "";
				for(int j=0; j<q; j++){
					double currentTheta = Math.random();
					if(Math.random() > 0.5){
						currentTheta = -currentTheta;
					}
					if(j==0){
						thetaString = String.valueOf(currentTheta);
					}else{
						thetaString = thetaString + "\t" + String.valueOf(currentTheta);
					}
				}
				
				double[] aicBIC = ARMAoptimize(mainFolder, phiString, thetaString, transformedData);
				phiTheta[i][0] = String.valueOf(aicBIC[1]);//the BIC value
				phiTheta[i][1] = phiString;
				phiTheta[i][2] = thetaString;
			}
			
			
			//Evaluate which of the phi and theta values provide the best fit of the data
			Arrays.sort(phiTheta, new sort0_largeToSmall());
			
			
			//Return the best phi and theta values
			String[] returnArray = {phiTheta[0][1], phiTheta[0][2]};
			System.out.println("ARMA(" + p + ", " + q + ") JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to calculate phi and theta parameters.");
			return returnArray;
		}
	}
	/**
	 * Perform an Auto-Regressive Moving Average  ARMA(p,q) model on the provided annual timeseries data
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param phiString  a tab-delimited string containing the pre-calculated phi values (lagged data coefficients) of the fitted ARMA(p,q) model
	 * @param thetaString  a tab-delimited string containing the pre-calculated theta (error coefficients) values of the fitted ARMA(p,q) model
	 * @param transformedData  double array of data.  Column1 = years, column2 = flow values
	 * @return  a double[] with two items returnArray[0] is the value of Akaike Information Criterion (AIC) between the calculated ARMA(p, q) model and the original data, 
	 * returnArray[1] is the value of Bayesian Information Criterion (BIC) between the calculated ARMA(p, q) model and the original data
	 * @throws IOException 
	 */
	public double[] ARMAoptimize(String mainFolder, String phiString, String thetaString, double[][] transformedData) throws IOException{
		DoubleMath doubleMath = new DoubleMath();
		
		String[] phiValues = phiString.split("\t");
		String[] thetaValues = thetaString.split("\t");
		int p = phiValues.length;
		int q = thetaValues.length;
		
		//Error catch for available data vs. "p" order AR model
		int N = transformedData.length;
		int largestParameter = p;
		if(p < q){
			largestParameter = q;
		}
		if(N < (2*largestParameter + 1)){
			String[] errorMessage = {"There is insufficient available data for the order of ARMA model you have choosen, ARMA(" + p + ", " + q +"). Please choose a smaller order and try again."};
			writeError(errorMessage);
		}
		
//		long start = System.nanoTime();
		//Convert phis and thetas into doubles
		double[] phi = new double[phiValues.length];
		for(int i=0; i<phiValues.length; i++){
			phi[i] = Double.parseDouble(phiValues[i]);
		}
		double[] theta = new double[thetaValues.length];
		for(int i=0; i<thetaValues.length; i++){
			theta[i] = Double.parseDouble(thetaValues[i]);
		}
		
		
		//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
		Matrix A = new Matrix(N-p, p);
		for(int j=0; j<p; j++){//Column dimension of the matrix
			for(int i=0; i<(N-p); i++){//Row dimension of the matrix
				A.set(i, j, transformedData[p-j+i-1][1]);
			}
		}
		
		
		//Use the calculated phi values to fit a line to the dataset and predict new data
		//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p - theta_1*error_t-1 - theta_2*error_t-2 - ... theta_q*error_t-q
		double[][] fittedData = new double[transformedData.length - p][2];
		double[][] transformedData_partial = new double[transformedData.length - p][2];
		double[] errors = new double[transformedData.length - p];
		for(int i=0; i<transformedData.length - p; i++){
			double predictedValue = 0;
			//Loop through and add each lag term times its appropriate phi value
			for(int j=0; j<phi.length; j++){
				double currentPHI = phi[j];
				double previousValue = A.get(i, j);
				predictedValue = predictedValue + currentPHI * previousValue;
			}
			//Loop through and subtract each error term times its appropriate theta value
			for(int k=0; k<theta.length; k++){
				double currentTHETA = theta[k];
				if( (i-k-1) < 0){//If there is no previous error (only for the first i < q steps) report zero error changes
					predictedValue = predictedValue - currentTHETA * 0.0;
				}else{
					double previousError = errors[i - k - 1];
					predictedValue = predictedValue - currentTHETA * previousError;
				}
			}
			
			fittedData[i][0] = transformedData[i+p][0];
			fittedData[i][1] = predictedValue;
			transformedData_partial[i][0] = transformedData[i+p][0];
			transformedData_partial[i][1] = transformedData[i+p][1];
			errors[i] = transformedData_partial[i][1] - fittedData[i][1];
		}
		
		
		//Compute stats on the errors
//		double sse = doubleMath.SSE(errors);
//		System.out.println("SSE: " + sse);
		double aic = doubleMath.AIC(errors, p+q);
		double bic = doubleMath.BIC(errors, p+q);
//		testErrors(errors);
//		System.out.println("ARMA(" + p + ", " + q + ") JAMA model (SSE: " + sse + ") took " + (System.nanoTime()-start) + " nanoseconds to generate a dataset.");
		double[] returnArray = {aic, bic};
		
		return returnArray;
	}
	/**
	 * Perform an Auto-Regressive Moving Average  ARMA(p,q) model on the provided annual timeseries data
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param phiString  a tab-delimited string containing the pre-calculated phi values (lagged data coefficients) of the fitted ARMA(p,q) model
	 * @param thetaString  a tab-delimited string containing the pre-calculated theta (error coefficients) values of the fitted ARMA(p,q) model
	 * @param transformedData  double array of data.  Column1 = years, column2 = flow values
	 * @return  an Object[] with the first element being the partial dataset of provided transformedData (double[][]) 
	 * and the second element being the ARMA(p, q) generated data (double[][]) based on the provided phi values (phiString) and theta values (thetaString)
	 * @throws IOException 
	 */
	public Object[] ARMA(String mainFolder, String phiString, String thetaString, double[][] transformedData) throws IOException{
		DoubleMath doubleMath = new DoubleMath();
		
		String[] phiValues = phiString.split("\t");
		String[] thetaValues = thetaString.split("\t");
		int p = phiValues.length;
		int q = thetaValues.length;
		
		//Error catch for available data vs. "p" order AR model
		int N = transformedData.length;
		if(N < (2*p + 1)){
			String[] errorMessage = {"There is insufficient available data for the order of ARMA model you have choosen, ARMA(" + p + ", " + q +"). Please choose a smaller order and try again."};
			writeError(errorMessage);
		}
		
		long start = System.currentTimeMillis();
		//Convert phis and thetas into doubles
		double[] phi = new double[phiValues.length];
		for(int i=0; i<phiValues.length; i++){
			phi[i] = Double.parseDouble(phiValues[i]);
		}
		double[] theta = new double[thetaValues.length];
		for(int i=0; i<thetaValues.length; i++){
			theta[i] = Double.parseDouble(thetaValues[i]);
		}
		
		
		//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
		Matrix A = new Matrix(N-p, p);
		for(int j=0; j<p; j++){//Column dimension of the matrix
			for(int i=0; i<(N-p); i++){//Row dimension of the matrix
				A.set(i, j, transformedData[p-j+i-1][1]);
			}
		}
		
		
		//Use the calculated phi values to fit a line to the dataset and predict new data
		//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p - theta_1*error_t-1 - theta_2*error_t-2 - ... theta_q*error_t-q
		double[][] fittedData = new double[transformedData.length - p][2];
		double[][] transformedData_partial = new double[transformedData.length - p][2];
		double[] errors = new double[transformedData.length - p];
		for(int i=0; i<transformedData.length - p; i++){
			double predictedValue = 0;
			//Loop through and add each lag term times its appropriate phi value
			for(int j=0; j<phi.length; j++){
				double currentPHI = phi[j];
				double previousValue = A.get(i, j);
				predictedValue = predictedValue + currentPHI * previousValue;
			}
			//Loop through and subtract each error term times its appropriate theta value
			for(int k=0; k<theta.length; k++){
				double currentTHETA = theta[k];
				if( (i-k-1) < 0){//If there is no previous error (only for the first i < q steps) report zero error changes
					predictedValue = predictedValue - currentTHETA * 0.0;
				}else{
					double previousError = errors[i - k - 1];
					predictedValue = predictedValue - currentTHETA * previousError;
				}
			}
			
			fittedData[i][0] = transformedData[i+p][0];
			fittedData[i][1] = predictedValue;
			transformedData_partial[i][0] = transformedData[i+p][0];
			transformedData_partial[i][1] = transformedData[i+p][1];
			errors[i] = transformedData_partial[i][1] - fittedData[i][1];
		}
		
		
//		//Compute stats on the errors
//		double aic = doubleMath.AIC(errors, p+q);
//		double bic = doubleMath.BIC(errors, p+q);
//		testErrors(errors);
		
		
		//Project forward the data to perform drought statistics on
		Random generator = new Random();
		double mean = doubleMath.meanArithmetic(errors);
		double stDev = doubleMath.StandardDeviationSample(errors);
		double[][] predictedData = new double[projectionCount + p][2];
		for(int i=0; i<predictedData.length; i++){
			//Pull first p values from actual observed data
			if(i < p){
				predictedData[i][0] = transformedData[transformedData.length - i - 1][0];
				predictedData[i][1] = transformedData[transformedData.length - i - 1][1];
			}else{
				//Calculate model based on p previous values and q previous errors
				double predictedValue = 0;
				//Loop through and add each lag term times its appropriate phi value
				for(int j=0; j<phi.length; j++){
					double currentPHI = phi[j];
					predictedValue = predictedValue + currentPHI * predictedData[i-p+j][1];
				}
				//Loop through and subtract each error term times its appropriate theta value
				for(int k=0; k<theta.length; k++){
					double currentTHETA = theta[k];
					if( (i-k-1) < 0){//If there is no previous error (only for the first i < q steps) report zero error changes
						predictedValue = predictedValue - currentTHETA * 0.0;
					}else{
						double randNumber = generator.nextGaussian();
						double previousError = (stDev*randNumber) + mean;
						predictedValue = predictedValue - currentTHETA * previousError;
					}
				}
				
				predictedData[i][0] = predictedData[i-1][0] + 1;//previous year + 1
				predictedData[i][1] = predictedValue;
				
//				if(predictedValue < -0.5){
//					System.out.println(predictedData[i][0] + "\t" + predictedData[i][1] + "\t" + i);
//				}
			}
		}
		
		
		//Return the generated data as well as the shortened version of the original data so they can be graphed together
		String modelType = "ARMA(" + p + ", " + q + ")";
		System.out.println(modelType + " JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to generate a dataset.");
		Object[] returnArray = {modelType, transformedData_partial, fittedData, predictedData};
		return returnArray;
	}
	/**
	 * Perform an Auto-Regressive Moving Average, ARMA(1,1), model on the provided annual timeseries data
	 * @param mainFolder  the file location to write out the error file if necessary
	 * @param transformedData  double array of data.  Column1 = years, column2 = flow values
	 * @throws IOException 
	 */
	public String[] ARMA11(String mainFolder, double[][] transformedData) throws IOException{
		DoubleMath doubleMath = new DoubleMath();
		DoubleArray doubleArray = new DoubleArray();
		
		//Error catch for available data vs. "p" order AR model
		int N = transformedData.length;
		if(N < (2*1 + 1)){
			String[] errorMessage = {"There is insufficient available data for the order of ARMA model you have choosen, ARMA(1, 1). Please choose a smaller order and try again."};
			writeError(errorMessage);
		}
		
		//General form of ARMA(1,1) model
		//Y_t = phi_1*(Y_t-1) + epsilon_t - theta_1 * epsilon_t-1
		//Note -1 < phi_1 < 1 and -1 < theta_1 < 1 must be satisfied
		long start = System.currentTimeMillis();
		
		//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
		int p = 2;
		Matrix A = new Matrix(N-p, p);
		for(int j=0; j<p; j++){//Column dimension of the matrix
			for(int i=0; i<(N-p); i++){//Row dimension of the matrix
				A.set(i, j, transformedData[p-j+i-1][1]);
			}
		}
		
		
		//Using non-lagged data for the original timeseries construct matrix b
		Matrix b = new Matrix(N-p, 1);
		for(int i=0; i<(N-p); i++){//Row dimension of the matrix
			b.set(i, 0, transformedData[p+i][1]);
		}
		
		
		//Pull out datasets for lag/correlation coefficient calculations
		double[][] data0 = b.getArray();
		double[][] data1 = A.getMatrix(0, A.getRowDimension() - 1, 0, 0).getArray();
		double[][] data2 = A.getMatrix(0, A.getRowDimension() - 1, 1, 1).getArray();
		double lag1 = doubleMath.CorrelationCoefficient(doubleArray.getColumn(data0, 0), doubleArray.getColumn(data1, 0));
		double lag2 = doubleMath.CorrelationCoefficient(doubleArray.getColumn(data0, 0), doubleArray.getColumn(data2, 0));
		
		
		//Calculated phi1 of ARMA(1,1)
		double phi1 = lag2/lag1;
		
		
		//Calculated theta1 of ARMA(1,1)
		double B = 1 - (2*phi1*lag1) + Math.pow(phi1, 2);
		double root = Math.pow(B, 2) - 4*Math.pow((lag1 - phi1), 2);
		double theta1 = ((-B) + Math.pow(root, 0.5))/(2*(lag1 - phi1));
		
		
//		//Estimate varience of errors for ARMA(1,1) only
//		double variance = doubleMath.VarianceSample(doubleArray.getColumn(data0, 0));
//		double numerator = variance *(1 - phi1);
//		double denominator = 1 - (2*phi1*theta1) + Math.pow(theta1, 1);
//		double variance_e = numerator/denominator;
//		System.out.println("Estimated variance of Errors: " + variance_e);
		
		
		//Check for causal/explosive model based on abs(phi) < 1
		if(Math.abs(phi1) >= 1){
			System.err.println("Calculated phi value exceeds 1.  ARMA(1,1) model is not causal, it is explosive.  Phi = " + phi1);
		}
		//Check for invertibility of the ARMA model based on abs(theta) < 1
		if(Math.abs(theta1) >= 1){
			System.err.println("Calculated phi value exceeds 1.  ARMA(1,1) model is not intertible.  Theta = " + theta1);
		}
		
		
		//Return array of tab-delimited strings of phi values and theta values
		System.out.println("ARMA(1, 1) JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to calculated phi and theta parameters.");
		String[] returnArray = {String.valueOf(phi1), String.valueOf(theta1)};
		return returnArray;
	}
	/**
	 * Prints out the contents of the provided matrix with a label printed before it of the name provided
	 * @param matrix  2D matrix to be printed out
	 * @param name  the label for the matrix will be "name matrix"
	 */
	private void displayMatrix(Matrix matrix, String name){
		System.out.println(name + " matrix");
		for(int i=0; i<matrix.getRowDimension(); i++){
			String currentLine = String.valueOf(matrix.get(i, 0));
			for(int j=1; j<matrix.getColumnDimension(); j++){
				currentLine = currentLine + "\t" + String.valueOf(matrix.get(i, j));
			}
			System.out.println(currentLine);
		}
	}
    /**
     * Writes out the error message, if any, for finding the file and then exits the program
     * @param error  string array to be written as each line of an error message
     * @throws IOException
     */
    public void writeError(String[] error) throws IOException{
        //Output data to text file
        String errorContents = error[0];
        for(int i=1; i<error.length; i++){
            errorContents = errorContents + "\n" + error[i];
        }
        throw new IOException("Error encountered. Please see the following message for details: \n" + errorContents);
    }
//	/**
//	 * Calculate and output a few statistics of the errors.
//	 * @param errors  a double[] containing the differences between the observed data and the predicted data
//	 */
//	private void testErrors(double[] errors){
//		//Calculate error mean and compare it against the assumed mean of zero
//		System.out.println("Error Mean: " + doubleMath.Average(errors));
//		
//		//Calculated the error variance from portions of the sample and test if it is constant
//		System.out.println("Error Variance: " + doubleMath.VarianceSample(errors));
//		System.out.println("Error Standard Deviation: " + doubleMath.StandardDeviationSample(errors));
//		
//		//Calculate other stats. of the errors
//		System.out.println("Error Likelihood: " + doubleMath.LF(errors));
//		System.out.println("Error Log-Likelihood: " + doubleMath.LLF(errors));
//		System.out.println("Error SSE: " + doubleMath.SSE(errors));
//	}
}