AutoRegression.java [src/java/cfa] Revision: ffc412d47d72be5013c8a0a59c535082523410cb Date: Tue Dec 16 12:11:46 MST 2014
package cfa;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Random;
import Jama.Matrix;
import java.io.IOException;
class sort0_largeToSmall implements Comparator<String[]>{
//Compares the first entry and sorts largest to smallest
public int compare(final String[] entry1, final String[] entry2) {
double value1 = 0;
double value2 = 0;
try{
value1 = Double.parseDouble(entry1[0]);
value2 = Double.parseDouble(entry2[0]);
}catch(NumberFormatException e){
e.printStackTrace();
}
if(Double.compare(value1, value2) < 0){
return -1;
}else if(Double.compare(value1, value2) > 0){
return 1;
}else{
return 0;
}
}
}
/**
* Last Updated: 16-December-2014
* @author Tyler Wible
* @since 23-July-2012
*/
public class AutoRegression {
private int projectionCount = 100000;
/**
* Performs a conversion to stoicastic data (z) from standard data of the second column of the provided data array if convertTrue is true. Otherwise it
* performs a conversion to original data (x) from stoicastic data z of the second column of the provided data array.
* @param originalData The data to be converted, first column should be dates associated with the data, second
* column should be the values to be converted
* @param average the average of the original data
* @param stDev the standard deviation of the original data
* @param convertTrue if true then it converts from x to z [z = (x-average)/stDev], otherwise it converts from z to x [x = z*stDev + average]
* @return if convertTrue is true the returned array is the converted data, the first column is the
* same while the second column each element z = (data - average)/ stDev.
* Otherwise converts back from stoicastic data (z) back to orignial data, the second column each element newData = data*standard Deviation + average
*/
public double[][] StoicasticConversion(double[][] data, double average, double stDev, boolean convertTrue){
double[][] newData = new double[data.length][2];
if(convertTrue){
//Loop through and convert each element of the array from original data (x) to only the stoicastic component (z)
for(int i=0; i<data.length; i++){
newData[i][0] = data[i][0];
newData[i][1] = (data[i][1] - average)/stDev;//convert data to "z": z = (data - ave)/stDev
}
}else{
//Loop through and convert each element of the array from stoicastic data (z) back to original data (x)
for(int i=0; i<data.length; i++){
newData[i][0] = data[i][0];
newData[i][1] = data[i][1]*stDev + average;//convert data back: x = data*stDev + ave
}
}
return newData;
}
/**
* Perform a Box-Cox transformation of the provided data. This is to ensure that the new data will be normally distributed. This auto calculates an optimal lambda value in order to cause the skewness of the new dataset to be zero.
* @param sortedData a double[][] containing the data to be transformed, column1 = years, column2 = values
* @param transformData a boolean, if true a Box-Cox transformation will be performed on the data, if false it will
* un-Box-Cox-transform the data back to orignal values
* @return the optimized lambda value for the Box-Cox Transformation (aka minimum skewness)
*/
public double BoxCox(double[][] annualData){
DoubleMath doubleMath = new DoubleMath();
DoubleArray doubleArray = new DoubleArray();
//Perform Box-Cox transform of the data
double lambda = 0.0;
double[][] transformedData = BoxCox(lambda, annualData, true);
double skewness = doubleMath.SkewnessSample(doubleArray.getColumn(transformedData,1));
//Check if the transformation is proper (aka skewness = 0 or marginally close) and try again if skewness is not zero
long start = System.currentTimeMillis();
int ctr=0;
while(Double.compare(skewness, 0.0001) >=0 || Double.compare(skewness, -0.0001) <=0){
//Estimate the lambda value of the data set
if(ctr > 1000){
//If lambda has ranged from 0 to 5 end searching for optimal lambda and report lambda = 1
lambda = 1;
transformedData = BoxCox(lambda, annualData, true);
skewness = doubleMath.SkewnessSample(doubleArray.getColumn(transformedData,1));
break;
}else{
lambda = lambda + 0.001;
}
//Transform the data using a Box-Cox transformation with the estimated lambda value
transformedData = BoxCox(lambda, annualData, true);
skewness = doubleMath.SkewnessSample(doubleArray.getColumn(transformedData,1));
ctr++;
}
//Return the optimized lambda value
System.out.println("Lambda Optimization Took: " + (System.currentTimeMillis()-start) + " milliseconds. Lambda = " + lambda);
return lambda;
}
/**
* Perform a Box-Cox transformation of the provided data. This is to ensure that the new data will be normally distributed.
* @param lambda the lambda coefficient of a Box-Cox transformation
* @param sortedData a String[][] containing the data to be transformed, column1 = dates (yyyy-mm-dd), column2 = values
* @param transformData a boolean, if true a Box-Cox transformation will be performed on the data, if false it will
* un-Box-Cox-transform the data back to orignal values
* @return returns the original String[][] sortedData with the second column having been replaced by the results Box-Cox
* transformation equation or an un-Box-Cox-Transformation
*/
public double[][] BoxCox(double lambda, double[][] sortedData, boolean transformData){
double[][] transformedData = new double[sortedData.length][2];
if(transformData){
//Transform the provided stream flow data to normal using a BoxCox transformation (because stream flow values are
//not usually normally distributed)
for(int i=0; i<sortedData.length; i++){
//Perform Box-Cox transformation
if(Double.compare(lambda, 0) == 0){
double currentValue = sortedData[i][1];
transformedData[i][0] = sortedData[i][0];
transformedData[i][1] = Math.log(currentValue);
}else{
double currentValue = sortedData[i][1];
transformedData[i][0] = sortedData[i][0];
transformedData[i][1] = ( (Math.pow(currentValue, lambda)) - 1 )/lambda;
}
}
}else{
//Un-Box-Cox transform the data back to orignal values
for(int i=0; i<sortedData.length; i++){
//Un-Perform Box-Cox transformation
if(Double.compare(lambda, 0) == 0){
double currentValue = sortedData[i][1];
transformedData[i][0] = sortedData[i][0];
transformedData[i][1] = Math.exp(currentValue);
}else{
double currentValue = sortedData[i][1];
transformedData[i][0] = sortedData[i][0];
transformedData[i][1] = Math.pow((currentValue*lambda + 1), 1/lambda);
}
}
}
return transformedData;
}
/**
* Temporary transformation of the provided data to match Dr. Salas' example paper. This is to ensure that the new data will
* be normally distributed.
* @param annualData a String[][] containing the data to be transformed, column1 = dates (yyyy-mm-dd), column2 = values
* @param convertData a boolean, if true a transformation will be performed on the data, if false it will
* un-transform the data back to orignal values
* @return returns the original String[][] sortedData with the second column having been replaced by the results of the
* transformation or the un-Transformation
*/
public double[][] SalasExampleTransformation(double[][] annualData, boolean convertData){
double[][] transformedData = new double[annualData.length][2];
if(convertData){
//Run though and perform the transformation of the data to normalize them
for(int i=0; i<annualData.length; i++){
//Peform transformation used in Salas' paper on the poudre river
double currentValue = annualData[i][1];
double transformedValue = Math.pow(currentValue - 42800,0.34);
transformedData[i][0] = annualData[i][0];
transformedData[i][1] = transformedValue;
}
}else{
//Run through and un-transform the data to return real streamflow values
for(int i=0; i<annualData.length; i++){
//Undo transformation used in Salas' paper on the poudre river
double currentValue = annualData[i][1];
double transformedValue = 42800 + Math.pow(currentValue,1/0.34);
transformedData[i][0] = annualData[i][0];
transformedData[i][1] = transformedValue;
}
}
return transformedData;
}
/**
* Performs an Auto-Regressive (AR(p)) or Auto-Regressive Moving Average (ARMA(p,q)) model
* on the provided data. The order p is determined by the number of phi values in phiString.
* <code>p = phiString.split("\t").length</code>. If thetaString is blank then an AR(p) model
* is performed. If thetaString is not blank then an ARMA(p,q) model is performed where the
* order q is determined by the number of theta values in thetaString.
* <code>q = thetaString.split("\t").length</code>.
* @param mainFolder the file location to write out the error file if necessary
* @param action if "optimize", this only optimizes the lambda and phi values for the model and
* then reports these back to the user, if "all" then it generates data based on the calculated
* phi values and finishes performing the drought analysis, if "useParameters" then it takes the
* previously generated parameters and uses these to perform the regression
* @param phiValues either an integer for the number of phi values desired in the model (p) or
* a tab-delimited string of phi values previously calculated
* @param thetaValues either an integer for the number of theta values desired in the model (q)
* or a tab-delimited string of theta values previously calculated or blank
* @param transformedData a double[][] containing the transformed stochastic portion of average
* stream flow values in the following format: column1 = dates (yyyy-mm-dd format),
* column2 = the normally-transformed stochastic portion of annual stream flow values (acre-ft)
* @return an Object[] containing two the tab-delimited phi values and theta values if action is "optimize",
* otherwise it contains a string of the method type (ex. ARMA(2,1) <code>String modelType = (String) returnArray[0]</code>),
* the original data (<code>double[][] transformeData = (double[][]) returnArray[1]</code>),
* fitted data (<code>double[][] fittedData = (double[][]) returnArray[2]</code>), and
* future predicted data (<code>double[][] predictedData = (double[][]) returnArray[3]</code>)
* @throws IOException
*/
public Object[] AutoregressiveModel(String mainFolder, String action, String phiValues, String thetaValues, double[][] transformedData) throws IOException{
DoubleMath doubleMath = new DoubleMath();
DoubleArray doubleArray = new DoubleArray();
Object[] returnArray = null;
if(action.equalsIgnoreCase("optimizeModel")){
long start = System.currentTimeMillis();
//Run optimization on the model using AIC/BIC and return a graph of the optimal front
if(thetaValues.equalsIgnoreCase("")){
//Perform AR(p) optimization
ArrayList<Double> aic = new ArrayList<Double>();
ArrayList<Double> bic = new ArrayList<Double>();
ArrayList<String> phi = new ArrayList<String>();
ArrayList<String> theta = new ArrayList<String>();
boolean increasing = false;
int ctr = 1;
while(!increasing){
//Find the phi parameters of the AR(p) regression
String[] phiString = AR(mainFolder, ctr, transformedData);
//Perform the AR(p) regression and calculated the AIC and BIC
double[] aicBIC = ARoptimize(mainFolder, phiString[0], transformedData);
aic.add(aicBIC[0]);//the AIC value of the fitted AR(ctr) model
bic.add(aicBIC[1]);//the BIC value of the fitted AR(ctr) model
phi.add(phiString[0]);//The tab-delimited string of phi values for the fitted AR(ctr) model
theta.add("");
//Check if BIC value has increased for the past 5 values, if so terminate the loop
if(ctr >= 5){
if(bic.get(ctr-5) < bic.get(ctr-4) && bic.get(ctr-4) < bic.get(ctr-3) && bic.get(ctr-3) < bic.get(ctr-2) && bic.get(ctr-2) < bic.get(ctr-1)){
System.out.println("Found BIC Optimum");
increasing = true;
}
}
ctr++;
}
//Determine the phi values corresponding to the best BIC value
double min = doubleMath.min(bic);
for(int i=0; i<bic.size(); i++){
if(Double.compare(bic.get(i), min) == 0){
phiValues = phi.get(i);
thetaValues = theta.get(i);
}
}
Object[] aicData = {aic, bic, phiValues, thetaValues};
returnArray = aicData;
}else{
//Perform ARMA(p,q) optimization
int p=5, q=3;
double[][] results = new double[p*q][4];
String[][] phiTheta = new String[p*q][2];
int ctr=0;
for(int pCtr=1; pCtr<=p; pCtr++){//Loop from 1 to 5 of ARMA p values
for(int qCtr=1; qCtr<=q; qCtr++){//Loop from 1 to 3 of ARMA q values
//Find the phi and theta parameters of the ARMA(p, q) regression
String[] coefficients = ARMA(mainFolder, pCtr, qCtr, transformedData);
//Perform the ARMA(p, q) regression and calculated the AIC and BIC
double[] aicBIC = ARMAoptimize(mainFolder, coefficients[0], coefficients[1], transformedData);
results[ctr][0] = pCtr;
results[ctr][1] = qCtr;
results[ctr][2] = aicBIC[0];//the AIC value of the fitted ARMA(pCtr, qCtr) model
results[ctr][3] = aicBIC[1];//the BIC value of the fitted ARMA(pCtr, qCtr) model
phiTheta[ctr][0] = coefficients[0];//The tab-delimited string of phi values for the fitted AR(ctr) model
phiTheta[ctr][1] = coefficients[1];//The tab-delimited string of theta values for the fitted AR(ctr) model
ctr++;
}
}
//Determine the optimal BIC value (minimum of the matrix)
double min = doubleMath.min(doubleArray.getColumn(results, 3));
//Loop through and find the phi and theta values corresponding to the optimal BIC value
for(int i=0; i<results.length; i++){
if(Double.compare(results[i][3], min) == 0){
phiValues = phiTheta[i][0];
thetaValues = phiTheta[i][1];
}
}
Object[] aicData = {results, phiValues, thetaValues};
returnArray = aicData;
}
System.out.println("Model Optimization took " + (System.currentTimeMillis()-start) + " milliseconds.");
}else if(action.equalsIgnoreCase("optimizeParameters")){
//Find the parameters of the regression only and report these back
if(thetaValues.equalsIgnoreCase("")){
//Perform AR(p) regression
int p = Integer.parseInt(phiValues);
returnArray = AR(mainFolder, p, transformedData);
}else{
//If there are provided theta values then it is an ARMA model
//Perform ARMA(p, q) regression
int p = Integer.parseInt(phiValues);
int q = Integer.parseInt(thetaValues);
returnArray = ARMA(mainFolder, p, q, transformedData);
}
}else if(action.equalsIgnoreCase("useParameters") || action.equalsIgnoreCase("updateParameters")){
//If the parameters are provided, use them and finish the drought analysis
if(thetaValues.equalsIgnoreCase("")){
//Perform the AR(p) regression and generate a new dataset
returnArray = AR(mainFolder, phiValues, transformedData);
}else{
//If there are provided theta values then it is an ARMA model
//Perform the ARMA(p,q) regression and generate a new dataset
returnArray = ARMA(mainFolder, phiValues, thetaValues, transformedData);
}
}else{
//Otherwise find the parameters of the regression and then project the data and finish the drought analysis
if(thetaValues.equalsIgnoreCase("")){
//Find the phi parameters of the AR(p) regression
int p = Integer.parseInt(phiValues);
String[] phiString = AR(mainFolder, p, transformedData);
//Perform the AR(p) regression and generate a new dataset
returnArray = AR(mainFolder, phiString[0], transformedData);
}else{
//If there are provided theta values then it is an ARMA model
//Find the phi and theta parameters of the ARMA(p, q) regression
int p = Integer.parseInt(phiValues);
int q = Integer.parseInt(thetaValues);
String[] parameterArray = ARMA(mainFolder, p, q, transformedData);
String phiString = parameterArray[0];
String thetaString = parameterArray[1];
phiString = phiString.substring(0, phiString.length() - 3);//Remove the $$ at the end of this string
//Perform the ARMA(p,q) regression and generate a new dataset
returnArray = ARMA(mainFolder, phiString, thetaString, transformedData);
}
}
return returnArray;
}
/**
* Performs an Auto-Regressive, AR(p), model on the provided data. Returns a string containing tab-delimited phi values of the AR(p) model.
* @param mainFolder the file location to write out the error file if necessary
* @param transformedData double array of data. Column1 = years, column2 = annual flow values
* @param p the order of the AR model (if p = 1 an AR(1) model is performed, if p=2 AR(2), etc.)
* @return a tab-delimited string containing p number of phi values for the AR(p) model
* @throws IOException
*/
public String[] AR(String mainFolder, int p, double[][] transformedData) throws IOException{
DoubleMath doubleMath = new DoubleMath();
DoubleArray doubleArray = new DoubleArray();
//Error catch for available data vs. "p" order AR model
int N = transformedData.length;
if(N < (2*p + 1)){
String[] errorMessage = {"There is insufficient available data for the order of AR model you have choosen, AR(" + p + "). Please choose a smaller order and try again."};
writeError(errorMessage);
}
long start = System.currentTimeMillis();
//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
Matrix A = new Matrix(N-p, p);
for(int j=0; j<p; j++){//Column dimension of the matrix
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
A.set(i, j, transformedData[p-j+i-1][1]);
}
}
//Using non-lagged data for the original timeseries construct matrix b
Matrix b = new Matrix(N-p, 1);
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
b.set(i, 0, transformedData[p+i][1]);
}
//Find the Phi values of the AR(p) model using the Yule-Walker matrix aproach:
//phi = [inverse(transpose(A) * A)] * transpose(A) * b
Matrix A_T = A.transpose();
Matrix C = A_T.times(A);
Matrix c = A_T.times(b);
Matrix C_inv = C.inverse();
Matrix phi = C_inv.times(c);
displayMatrix(phi, "phi");
//Convert phi values into a single tab-delimited string and calculate estimate of variance of errors
String phiStrings = null;
double[] data0 = doubleArray.getColumn(b.getArray(), 0);
double sum = 0;
for(int i=0; i<phi.getRowDimension(); i++){
//Check for causal/explosive AR(p) model based on abs(phi) < 1
if(Math.abs(phi.get(i, 0)) >= 1){
System.err.println("Calculated phi value exceeds 1. AR(" + p + ") model is not causal, it is explosive. Phi = " + phi.get(i, 0));
}
//Concatenate the phi values
if(i == 0){
phiStrings = String.valueOf(phi.get(i,0));
}else{
phiStrings = phiStrings + "\t" + String.valueOf(phi.get(i, 0));
}
//Calculate sum of phi * lag(p) for variance of error estimate
double[] dataP = doubleArray.getColumn(A.getMatrix(0, A.getRowDimension() - 1, i, i).getArray(), 0);
double lagP = doubleMath.CorrelationCoefficient(dataP, data0);
sum = sum + (phi.get(i, 0) * lagP);
}
//Return tab-delimited string of phi values
System.out.println("AR(" + p + ") JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to calculated phi parameters.");
String[] returnArray = {phiStrings,""};
return returnArray;
}
/**
* Performs an Auto-Regressive (AR(p)) model on the provided data. The order p is determined by the number of phi
* values in phiString. p = phiString.split("\t").length;
* @param mainFolder the file location to write out the error file if necessary
* @param phiString a string of the phi values for the AR(p) model separated by "\t" (ex. for AR(3) phiString = phi_1 \t phi_2 \tphi_3)
* @param transformedData double array of data. Column1 = years, column2 = annual flow values
* @return an Object[] with the first element being the partial dataset of provided transformedData (double[][])
* and the second element being the AR(p) generated data (double[][]) based on the provided phi values
* @throws IOException
*/
public Object[] AR(String mainFolder, String phiString, double[][] transformedData) throws IOException{
DoubleMath doubleMath = new DoubleMath();
DoubleArray doubleArray = new DoubleArray();
//Error catch for available data vs. "p" order AR model
int N = transformedData.length;
String[] phiValues = phiString.split("\t");
int p = phiValues.length;
if(N < (2*p + 1)){
String[] errorMessage = {"There is insufficient available data for the order of AR model you have choosen, AR(" + p + "). Please choose a smaller order and try again."};
writeError(errorMessage);
}
long start = System.currentTimeMillis();
//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
Matrix A = new Matrix(N-p, p);
for(int j=0; j<p; j++){//Column dimension of the matrix
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
A.set(i, j, transformedData[p-j+i-1][1]);
}
}
//Using non-lagged data for the original timeseries construct matrix b
Matrix b = new Matrix(N-p, 1);
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
b.set(i, 0, transformedData[p+i][1]);
}
//Convert phi values into a matrix
Matrix phi = new Matrix(p, 1);
double[] data0 = doubleArray.getColumn(b.getArray(), 0);
double sum = 0;
for(int i=0; i<phiValues.length; i++){
double currentPhi = Double.parseDouble(phiValues[i]);
phi.set(i, 0, currentPhi);
//Calculate sum of phi * lag(p) for variance of error estimate
double[] dataP = doubleArray.getColumn(A.getMatrix(0, A.getRowDimension() - 1, i, i).getArray(), 0);
double lagP = doubleMath.CorrelationCoefficient(dataP, data0);
sum = sum + (phi.get(i, 0) * lagP);
}
//Use the calculated phi values to fit a line to the dataset and predict new data
//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p
double[][] fittedData = new double[transformedData.length - p][2];
double[][] transformedData_partial = new double[transformedData.length -p][2];
double[] errors = new double[transformedData_partial.length];
for(int i=0; i<transformedData.length - p; i++){
double predictedValue = 0;
for(int j=0; j<phi.getRowDimension(); j++){
double currentPHI = phi.get(j,0);
double previousValue = A.get(i, j);
predictedValue = predictedValue + currentPHI * previousValue;
}
fittedData[i][0] = transformedData[i+p][0];
fittedData[i][1] = predictedValue;
transformedData_partial[i][0] = transformedData[i+p][0];
transformedData_partial[i][1] = transformedData[i+p][1];
errors[i] = transformedData_partial[i][1] - fittedData[i][1];
}
//Compute stats on the errors
// double aic = doubleMath.AIC(errors, p);
// double bic = doubleMath.BIC(errors, p);
// testErrors(errors);
//Project forward the data to perform drought statistics on
Random generator = new Random();
double mean = doubleMath.meanArithmetic(errors);
double stDev = doubleMath.StandardDeviationSample(errors);
double[][] predictedData = new double[projectionCount + p][2];
for(int i=0; i<predictedData.length; i++){
//Pull first p values from actual observed data
if(i < p){
predictedData[i][0] = transformedData[i][0];
predictedData[i][1] = transformedData[i][1];
}else{
//Calculate model based on p previous values and q previous errors
double predictedValue = 0;
//Loop through and add each lag term times its appropriate phi value
for(int j=0; j<phi.getRowDimension(); j++){
double currentPHI = phi.get(j, 0);
predictedValue = predictedValue + currentPHI * predictedData[i-j-1][1];
}
double randNumber = generator.nextGaussian();
predictedValue = predictedValue + (stDev*randNumber) + mean;
predictedData[i][0] = predictedData[i-1][0] + 1;//previous year + 1
predictedData[i][1] = predictedValue;
}
}
//Return the generated data as well as the shortened version of the original data so they can be graphed together
String modelType = "AR(" + p + ")";
System.out.println(modelType + " JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to generate a dataset.");
Object[] returnArray = {modelType, transformedData_partial, fittedData, predictedData};
return returnArray;
}
/**
* Performs an Auto-Regressive (AR(p)) model on the provided data. The order p is determined by the number of phi
* values in phiString. p = phiString.split("\t").length;
* @param mainFolder the file location to write out the error file if necessary
* @param phiString a string of the phi values for the AR(p) model separated by "\t" (ex. for AR(3) phiString = phi_1 \t phi_2 \tphi_3)
* @param transformedData double array of data. Column1 = years, column2 = annual flow values
* @return an Object[] with the first element being the partial dataset of provided transformedData (double[][])
* and the second element being the AR(p) generated data (double[][]) based on the provided phi values
* @throws IOException
*/
public double[] ARoptimize(String mainFolder, String phiString, double[][] transformedData) throws IOException{
DoubleMath doubleMath = new DoubleMath();
DoubleArray doubleArray = new DoubleArray();
//Error catch for available data vs. "p" order AR model
int N = transformedData.length;
String[] phiValues = phiString.split("\t");
int p = phiValues.length;
if(N < (2*p + 1)){
String[] errorMessage = {"There is insufficient available data for the order of AR model you have choosen, AR(" + p + "). Please choose a smaller order and try again."};
writeError(errorMessage);
}
long start = System.currentTimeMillis();
//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
Matrix A = new Matrix(N-p, p);
for(int j=0; j<p; j++){//Column dimension of the matrix
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
A.set(i, j, transformedData[p-j+i-1][1]);
}
}
//Using non-lagged data for the original timeseries construct matrix b
Matrix b = new Matrix(N-p, 1);
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
b.set(i, 0, transformedData[p+i][1]);
}
//Convert phi values into a matrix
Matrix phi = new Matrix(p, 1);
double[] data0 = doubleArray.getColumn(b.getArray(), 0);
double sum = 0;
for(int i=0; i<phiValues.length; i++){
double currentPhi = Double.parseDouble(phiValues[i]);
phi.set(i, 0, currentPhi);
//Calculate sum of phi * lag(p) for variance of error estimate
double[] dataP = doubleArray.getColumn(A.getMatrix(0, A.getRowDimension() - 1, i, i).getArray(), 0);
double lagP = doubleMath.CorrelationCoefficient(dataP, data0);
sum = sum + (phi.get(i, 0) * lagP);
}
//Use the calculated phi values to fit a line to the dataset and predict new data
//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p
double[][] fittedData = new double[transformedData.length - p][2];
double[][] transformedData_partial = new double[transformedData.length -p][2];
double[] errors = new double[transformedData_partial.length];
for(int i=0; i<transformedData.length - p; i++){
double predictedValue = 0;
for(int j=0; j<phi.getRowDimension(); j++){
double currentPHI = phi.get(j,0);
double previousValue = A.get(i, j);
predictedValue = predictedValue + currentPHI * previousValue;
}
fittedData[i][0] = transformedData[i+p][0];
fittedData[i][1] = predictedValue;
transformedData_partial[i][0] = transformedData[i+p][0];
transformedData_partial[i][1] = transformedData[i+p][1];
errors[i] = transformedData_partial[i][1] - fittedData[i][1];
}
//Compute stats on the errors
double aic = doubleMath.AIC(errors, p);
double bic = doubleMath.BIC(errors, p);
// testErrors(errors);
//Return the generated data as well as the shortened version of the original data so they can be graphed together
double[] returnArray = {aic, bic};
String modelType = "AR(" + p + ")";
System.out.println(modelType + " JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to generate a dataset.");
return returnArray;
}
/**
* Perform an Auto-Regressive Moving Average ARMA(p,q) model on the provided annual timeseries data
* by generating 500*(p + q + 2) sets of possible values for p and q. Then evaluates the best fit of
* the generated models (mimimum SSE) and returns the phi and theta values of the best fit model.
* @param mainFolder the file location to write out the error file if necessary
* @param p the number of lagged data sets used to develop the ARMA(p,q) model
* @param q the number of error data sets used to develop the ARMA(p,q) model
* @param transformedData double array of data. Column1 = years, column2 = flow values
* @return a string array with returnArray[0] = tab-delimited string containing p number of phi values for the ARMA(p,q) model
* and returnArray[1]= a tab-delimited string containing q number of theta values for the ARMA(p,q) model
* @throws IOException
*/
public String[] ARMA(String mainFolder, Integer p, Integer q, double[][] transformedData) throws IOException{
if((p.compareTo(1) == 0) && (q.compareTo(1) == 0)){
String[] returnArray = ARMA11(mainFolder, transformedData);
return returnArray;
}else{
//Generate 500*(p+q+2) random sets of values for each phi and theta (between -1 and 1)
long start = System.currentTimeMillis();
String[][] phiTheta = new String[500*(p+q+2)][3];
for(int i=0; i<phiTheta.length; i++){
//Generate p number of phi values
String phiString = "";
for(int j=0; j<p; j++){
//Randomize numbers between -1 and 1
double currentPhi = Math.random();
if(Math.random() > 0.5){
currentPhi = -currentPhi;
}
if(j==0){
phiString = String.valueOf(currentPhi);
}else{
phiString = phiString + "\t" + String.valueOf(currentPhi);
}
}
//Generate q number of theta values
String thetaString = "";
for(int j=0; j<q; j++){
double currentTheta = Math.random();
if(Math.random() > 0.5){
currentTheta = -currentTheta;
}
if(j==0){
thetaString = String.valueOf(currentTheta);
}else{
thetaString = thetaString + "\t" + String.valueOf(currentTheta);
}
}
double[] aicBIC = ARMAoptimize(mainFolder, phiString, thetaString, transformedData);
phiTheta[i][0] = String.valueOf(aicBIC[1]);//the BIC value
phiTheta[i][1] = phiString;
phiTheta[i][2] = thetaString;
}
//Evaluate which of the phi and theta values provide the best fit of the data
Arrays.sort(phiTheta, new sort0_largeToSmall());
//Return the best phi and theta values
String[] returnArray = {phiTheta[0][1], phiTheta[0][2]};
System.out.println("ARMA(" + p + ", " + q + ") JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to calculate phi and theta parameters.");
return returnArray;
}
}
/**
* Perform an Auto-Regressive Moving Average ARMA(p,q) model on the provided annual timeseries data
* @param mainFolder the file location to write out the error file if necessary
* @param phiString a tab-delimited string containing the pre-calculated phi values (lagged data coefficients) of the fitted ARMA(p,q) model
* @param thetaString a tab-delimited string containing the pre-calculated theta (error coefficients) values of the fitted ARMA(p,q) model
* @param transformedData double array of data. Column1 = years, column2 = flow values
* @return a double[] with two items returnArray[0] is the value of Akaike Information Criterion (AIC) between the calculated ARMA(p, q) model and the original data,
* returnArray[1] is the value of Bayesian Information Criterion (BIC) between the calculated ARMA(p, q) model and the original data
* @throws IOException
*/
public double[] ARMAoptimize(String mainFolder, String phiString, String thetaString, double[][] transformedData) throws IOException{
DoubleMath doubleMath = new DoubleMath();
String[] phiValues = phiString.split("\t");
String[] thetaValues = thetaString.split("\t");
int p = phiValues.length;
int q = thetaValues.length;
//Error catch for available data vs. "p" order AR model
int N = transformedData.length;
int largestParameter = p;
if(p < q){
largestParameter = q;
}
if(N < (2*largestParameter + 1)){
String[] errorMessage = {"There is insufficient available data for the order of ARMA model you have choosen, ARMA(" + p + ", " + q +"). Please choose a smaller order and try again."};
writeError(errorMessage);
}
// long start = System.nanoTime();
//Convert phis and thetas into doubles
double[] phi = new double[phiValues.length];
for(int i=0; i<phiValues.length; i++){
phi[i] = Double.parseDouble(phiValues[i]);
}
double[] theta = new double[thetaValues.length];
for(int i=0; i<thetaValues.length; i++){
theta[i] = Double.parseDouble(thetaValues[i]);
}
//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
Matrix A = new Matrix(N-p, p);
for(int j=0; j<p; j++){//Column dimension of the matrix
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
A.set(i, j, transformedData[p-j+i-1][1]);
}
}
//Use the calculated phi values to fit a line to the dataset and predict new data
//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p - theta_1*error_t-1 - theta_2*error_t-2 - ... theta_q*error_t-q
double[][] fittedData = new double[transformedData.length - p][2];
double[][] transformedData_partial = new double[transformedData.length - p][2];
double[] errors = new double[transformedData.length - p];
for(int i=0; i<transformedData.length - p; i++){
double predictedValue = 0;
//Loop through and add each lag term times its appropriate phi value
for(int j=0; j<phi.length; j++){
double currentPHI = phi[j];
double previousValue = A.get(i, j);
predictedValue = predictedValue + currentPHI * previousValue;
}
//Loop through and subtract each error term times its appropriate theta value
for(int k=0; k<theta.length; k++){
double currentTHETA = theta[k];
if( (i-k-1) < 0){//If there is no previous error (only for the first i < q steps) report zero error changes
predictedValue = predictedValue - currentTHETA * 0.0;
}else{
double previousError = errors[i - k - 1];
predictedValue = predictedValue - currentTHETA * previousError;
}
}
fittedData[i][0] = transformedData[i+p][0];
fittedData[i][1] = predictedValue;
transformedData_partial[i][0] = transformedData[i+p][0];
transformedData_partial[i][1] = transformedData[i+p][1];
errors[i] = transformedData_partial[i][1] - fittedData[i][1];
}
//Compute stats on the errors
// double sse = doubleMath.SSE(errors);
// System.out.println("SSE: " + sse);
double aic = doubleMath.AIC(errors, p+q);
double bic = doubleMath.BIC(errors, p+q);
// testErrors(errors);
// System.out.println("ARMA(" + p + ", " + q + ") JAMA model (SSE: " + sse + ") took " + (System.nanoTime()-start) + " nanoseconds to generate a dataset.");
double[] returnArray = {aic, bic};
return returnArray;
}
/**
* Perform an Auto-Regressive Moving Average ARMA(p,q) model on the provided annual timeseries data
* @param mainFolder the file location to write out the error file if necessary
* @param phiString a tab-delimited string containing the pre-calculated phi values (lagged data coefficients) of the fitted ARMA(p,q) model
* @param thetaString a tab-delimited string containing the pre-calculated theta (error coefficients) values of the fitted ARMA(p,q) model
* @param transformedData double array of data. Column1 = years, column2 = flow values
* @return an Object[] with the first element being the partial dataset of provided transformedData (double[][])
* and the second element being the ARMA(p, q) generated data (double[][]) based on the provided phi values (phiString) and theta values (thetaString)
* @throws IOException
*/
public Object[] ARMA(String mainFolder, String phiString, String thetaString, double[][] transformedData) throws IOException{
DoubleMath doubleMath = new DoubleMath();
String[] phiValues = phiString.split("\t");
String[] thetaValues = thetaString.split("\t");
int p = phiValues.length;
int q = thetaValues.length;
//Error catch for available data vs. "p" order AR model
int N = transformedData.length;
if(N < (2*p + 1)){
String[] errorMessage = {"There is insufficient available data for the order of ARMA model you have choosen, ARMA(" + p + ", " + q +"). Please choose a smaller order and try again."};
writeError(errorMessage);
}
long start = System.currentTimeMillis();
//Convert phis and thetas into doubles
double[] phi = new double[phiValues.length];
for(int i=0; i<phiValues.length; i++){
phi[i] = Double.parseDouble(phiValues[i]);
}
double[] theta = new double[thetaValues.length];
for(int i=0; i<thetaValues.length; i++){
theta[i] = Double.parseDouble(thetaValues[i]);
}
//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
Matrix A = new Matrix(N-p, p);
for(int j=0; j<p; j++){//Column dimension of the matrix
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
A.set(i, j, transformedData[p-j+i-1][1]);
}
}
//Use the calculated phi values to fit a line to the dataset and predict new data
//x_hat = phi_1*x_t-1 + phi_2*x_t-2 + ... phi_p*x_t-p - theta_1*error_t-1 - theta_2*error_t-2 - ... theta_q*error_t-q
double[][] fittedData = new double[transformedData.length - p][2];
double[][] transformedData_partial = new double[transformedData.length - p][2];
double[] errors = new double[transformedData.length - p];
for(int i=0; i<transformedData.length - p; i++){
double predictedValue = 0;
//Loop through and add each lag term times its appropriate phi value
for(int j=0; j<phi.length; j++){
double currentPHI = phi[j];
double previousValue = A.get(i, j);
predictedValue = predictedValue + currentPHI * previousValue;
}
//Loop through and subtract each error term times its appropriate theta value
for(int k=0; k<theta.length; k++){
double currentTHETA = theta[k];
if( (i-k-1) < 0){//If there is no previous error (only for the first i < q steps) report zero error changes
predictedValue = predictedValue - currentTHETA * 0.0;
}else{
double previousError = errors[i - k - 1];
predictedValue = predictedValue - currentTHETA * previousError;
}
}
fittedData[i][0] = transformedData[i+p][0];
fittedData[i][1] = predictedValue;
transformedData_partial[i][0] = transformedData[i+p][0];
transformedData_partial[i][1] = transformedData[i+p][1];
errors[i] = transformedData_partial[i][1] - fittedData[i][1];
}
// //Compute stats on the errors
// double aic = doubleMath.AIC(errors, p+q);
// double bic = doubleMath.BIC(errors, p+q);
// testErrors(errors);
//Project forward the data to perform drought statistics on
Random generator = new Random();
double mean = doubleMath.meanArithmetic(errors);
double stDev = doubleMath.StandardDeviationSample(errors);
double[][] predictedData = new double[projectionCount + p][2];
for(int i=0; i<predictedData.length; i++){
//Pull first p values from actual observed data
if(i < p){
predictedData[i][0] = transformedData[transformedData.length - i - 1][0];
predictedData[i][1] = transformedData[transformedData.length - i - 1][1];
}else{
//Calculate model based on p previous values and q previous errors
double predictedValue = 0;
//Loop through and add each lag term times its appropriate phi value
for(int j=0; j<phi.length; j++){
double currentPHI = phi[j];
predictedValue = predictedValue + currentPHI * predictedData[i-p+j][1];
}
//Loop through and subtract each error term times its appropriate theta value
for(int k=0; k<theta.length; k++){
double currentTHETA = theta[k];
if( (i-k-1) < 0){//If there is no previous error (only for the first i < q steps) report zero error changes
predictedValue = predictedValue - currentTHETA * 0.0;
}else{
double randNumber = generator.nextGaussian();
double previousError = (stDev*randNumber) + mean;
predictedValue = predictedValue - currentTHETA * previousError;
}
}
predictedData[i][0] = predictedData[i-1][0] + 1;//previous year + 1
predictedData[i][1] = predictedValue;
// if(predictedValue < -0.5){
// System.out.println(predictedData[i][0] + "\t" + predictedData[i][1] + "\t" + i);
// }
}
}
//Return the generated data as well as the shortened version of the original data so they can be graphed together
String modelType = "ARMA(" + p + ", " + q + ")";
System.out.println(modelType + " JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to generate a dataset.");
Object[] returnArray = {modelType, transformedData_partial, fittedData, predictedData};
return returnArray;
}
/**
* Perform an Auto-Regressive Moving Average, ARMA(1,1), model on the provided annual timeseries data
* @param mainFolder the file location to write out the error file if necessary
* @param transformedData double array of data. Column1 = years, column2 = flow values
* @throws IOException
*/
public String[] ARMA11(String mainFolder, double[][] transformedData) throws IOException{
DoubleMath doubleMath = new DoubleMath();
DoubleArray doubleArray = new DoubleArray();
//Error catch for available data vs. "p" order AR model
int N = transformedData.length;
if(N < (2*1 + 1)){
String[] errorMessage = {"There is insufficient available data for the order of ARMA model you have choosen, ARMA(1, 1). Please choose a smaller order and try again."};
writeError(errorMessage);
}
//General form of ARMA(1,1) model
//Y_t = phi_1*(Y_t-1) + epsilon_t - theta_1 * epsilon_t-1
//Note -1 < phi_1 < 1 and -1 < theta_1 < 1 must be satisfied
long start = System.currentTimeMillis();
//Create and populate the "A" matrix with lagged data, later used in computing the varience/covarience matrix
int p = 2;
Matrix A = new Matrix(N-p, p);
for(int j=0; j<p; j++){//Column dimension of the matrix
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
A.set(i, j, transformedData[p-j+i-1][1]);
}
}
//Using non-lagged data for the original timeseries construct matrix b
Matrix b = new Matrix(N-p, 1);
for(int i=0; i<(N-p); i++){//Row dimension of the matrix
b.set(i, 0, transformedData[p+i][1]);
}
//Pull out datasets for lag/correlation coefficient calculations
double[][] data0 = b.getArray();
double[][] data1 = A.getMatrix(0, A.getRowDimension() - 1, 0, 0).getArray();
double[][] data2 = A.getMatrix(0, A.getRowDimension() - 1, 1, 1).getArray();
double lag1 = doubleMath.CorrelationCoefficient(doubleArray.getColumn(data0, 0), doubleArray.getColumn(data1, 0));
double lag2 = doubleMath.CorrelationCoefficient(doubleArray.getColumn(data0, 0), doubleArray.getColumn(data2, 0));
//Calculated phi1 of ARMA(1,1)
double phi1 = lag2/lag1;
//Calculated theta1 of ARMA(1,1)
double B = 1 - (2*phi1*lag1) + Math.pow(phi1, 2);
double root = Math.pow(B, 2) - 4*Math.pow((lag1 - phi1), 2);
double theta1 = ((-B) + Math.pow(root, 0.5))/(2*(lag1 - phi1));
// //Estimate varience of errors for ARMA(1,1) only
// double variance = doubleMath.VarianceSample(doubleArray.getColumn(data0, 0));
// double numerator = variance *(1 - phi1);
// double denominator = 1 - (2*phi1*theta1) + Math.pow(theta1, 1);
// double variance_e = numerator/denominator;
// System.out.println("Estimated variance of Errors: " + variance_e);
//Check for causal/explosive model based on abs(phi) < 1
if(Math.abs(phi1) >= 1){
System.err.println("Calculated phi value exceeds 1. ARMA(1,1) model is not causal, it is explosive. Phi = " + phi1);
}
//Check for invertibility of the ARMA model based on abs(theta) < 1
if(Math.abs(theta1) >= 1){
System.err.println("Calculated phi value exceeds 1. ARMA(1,1) model is not intertible. Theta = " + theta1);
}
//Return array of tab-delimited strings of phi values and theta values
System.out.println("ARMA(1, 1) JAMA model took " + (System.currentTimeMillis()-start) + " milliseconds to calculated phi and theta parameters.");
String[] returnArray = {String.valueOf(phi1), String.valueOf(theta1)};
return returnArray;
}
/**
* Prints out the contents of the provided matrix with a label printed before it of the name provided
* @param matrix 2D matrix to be printed out
* @param name the label for the matrix will be "name matrix"
*/
private void displayMatrix(Matrix matrix, String name){
System.out.println(name + " matrix");
for(int i=0; i<matrix.getRowDimension(); i++){
String currentLine = String.valueOf(matrix.get(i, 0));
for(int j=1; j<matrix.getColumnDimension(); j++){
currentLine = currentLine + "\t" + String.valueOf(matrix.get(i, j));
}
System.out.println(currentLine);
}
}
/**
* Writes out the error message, if any, for finding the file and then exits the program
* @param error string array to be written as each line of an error message
* @throws IOException
*/
public void writeError(String[] error) throws IOException{
//Output data to text file
String errorContents = error[0];
for(int i=1; i<error.length; i++){
errorContents = errorContents + "\n" + error[i];
}
throw new IOException("Error encountered. Please see the following message for details: \n" + errorContents);
}
// /**
// * Calculate and output a few statistics of the errors.
// * @param errors a double[] containing the differences between the observed data and the predicted data
// */
// private void testErrors(double[] errors){
// //Calculate error mean and compare it against the assumed mean of zero
// System.out.println("Error Mean: " + doubleMath.Average(errors));
//
// //Calculated the error variance from portions of the sample and test if it is constant
// System.out.println("Error Variance: " + doubleMath.VarianceSample(errors));
// System.out.println("Error Standard Deviation: " + doubleMath.StandardDeviationSample(errors));
//
// //Calculate other stats. of the errors
// System.out.println("Error Likelihood: " + doubleMath.LF(errors));
// System.out.println("Error Log-Likelihood: " + doubleMath.LLF(errors));
// System.out.println("Error SSE: " + doubleMath.SSE(errors));
// }
}