DoubleMath.java [src/java/m/cfa] Revision: default Date:
package m.cfa;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.commons.math.ArgumentOutsideDomainException;
import org.apache.commons.math.analysis.interpolation.SplineInterpolator;
import org.apache.commons.math.analysis.polynomials.PolynomialSplineFunction;
/**
* Last Updated: 20-April-2017
* @author Tyler Wible
* @since 21-June-2012
*/
class sort1_smallToLargeDoubleMath implements Comparator<double[]>{
//Compares the first entry and sorts smallest to largest
public int compare(final double[] entry1, final double[] entry2) {
double value1 = entry1[0];
double value2 = entry2[0];
//Compare and return the second entries
return Double.compare(value1,value2);
}
}
public class DoubleMath{
private static double[] convertArray(ArrayList<Double> array){
double[] newArray = new double[array.size()];
for(int i=0; i<array.size(); i++){
newArray[i] = array.get(i);
}
return newArray;
}
/**
* Finds the maximum value of a double[] array
* @param array the double[] array
* @return the maximum of the above array as a double
*/
public static double max(double[] array){
double maximum = -9999;
for(int i=0; i<array.length; i++){
if(i == 0){
maximum = array[i];
}else{
if(Double.compare(maximum, array[i]) < 0){
maximum = array[i];
}
}
}
return maximum;
}
/**
* Finds the maximum value of a ArrayList<Double>
* @param array the ArrayList<Double> array
* @return the maximum of the above array as a double
*/
public static double max(ArrayList<Double> array){
double[] newArray = convertArray(array);
double maximum = max(newArray);
return maximum;
}
/**
* Finds the minimum value of a double[] array
* @param array the double[] array
* @return the minimum of the above array as a double
*/
public static double min(double[] array){
double minimum = -9999;
for(int i=0; i<array.length; i++){
if(i == 0){
minimum = array[i];
}else{
if(Double.compare(minimum, array[i]) > 0){
minimum = array[i];
}
}
}
return minimum;
}
/**
* Finds the minimum value of a ArrayList<Double>
* @param array the ArrayList<Double> array
* @return the minimum of the above array as a double
*/
public static double min(ArrayList<Double> array){
double[] newArray = convertArray(array);
double minimum = min(newArray);
return minimum;
}
/**
* Calculates the sum of a double[] array
* @param array the array to be summed
* @return the sum of the array
*/
public static double sum(double[] array){
double sum = 0;
//Calculate the sum of the array
for(int i=0; i<array.length; i++){
sum = sum + array[i];
}
return sum;
}
/**
* Calculates the sum of a ArrayList<Double>
* @param array the array to be summed
* @return the sum of the array
*/
public static double sum(ArrayList<Double> array){
double[] newArray = convertArray(array);
double sum = sum(newArray);
return sum;
}
/**
* Rounds the double to the number of decimal places "n" where roundingValue = 10^n
* For example if the roundingValue is equal to 10, then the returned value would have 1 decimal place
* @param originalValue the original double value to be rounded
* @param decimalPlaces is equal to the number of decimal places desired
* @return a double rounded value of the orignal data
*/
public static double round(double originalValue, double decimalPlaces){
double decimals = Math.pow(10, decimalPlaces);
double roundedValue = Math.round(originalValue*decimals);
roundedValue = roundedValue/decimals;
return roundedValue;
}
/**
* Rounds the double[] array to the number of decimal places "n" where roundingValue = 10^n
* For example if the roundingValue is equal to 10, then the returned matrix would contain the original matrix with 1 decimal place
* @param array the original double[] matrix to be rounded
* @param decimalPlaces is equal to the number of decimal places desired
* @return a double[] array of the rounded values of the original data
*/
public static double[] roundColumn(double[] array, double decimalPlaces){
double[] roundedArray = new double[array.length];
for(int i=0; i<array.length; i++){
roundedArray[i] = round(array[i], decimalPlaces);
}
return roundedArray;
}
/**
* Calculates the arithmetic mean of the provided double[] array
* @param array the double[] array of which the arithmetic mean is desired
* @return the double value of the arithmetic mean of the data array
*/
public static double meanArithmetic(double[] array){
//Calculates the average of a double array
double sum = sum(array);
double count = array.length;
if(count == 0){//fix divide by zero errors
count = 1;
}
double average = sum/count;
return average;
}
/**
* Calculates the arithmetic mean of the provided ArrayList<Double>
* @param array the ArrayList<Double> of which the arithmetic mean is desired
* @return the double value of the arithmetic mean of the data array
*/
public static double meanArithmetic(ArrayList<Double> array){
double[] newArray = convertArray(array);
double average = meanArithmetic(newArray);
return average;
}
/**
* Calculates the harmonic mean of the provided double[] array,
* only calculates for real positive values.
*
* Note that the final estimate of the harmonic mean is a weighted average
* of the harmonic mean of the non-zero elements and zero.
* @param array the double[] array of which the harmonic mean is desired
* @return the double value of the harmonic mean of the data array
*/
public static double meanHarmonic(double[] array){
//Calculate properties of harmonic mean
double reciprocalSum = 0, nZeros = 0, nData = 0;
for(int i=0; i<array.length; i++){
if(array[i] > 0){
reciprocalSum = reciprocalSum + (1/array[i]);//sum of reciprocals
}else{
nZeros++;
}
nData++;
}
//Compute harmonic mean (with correction for the number of zero items in the array)
double meanHarmonic = (nData - nZeros) / (reciprocalSum * ((nData - nZeros)/nData));
return meanHarmonic;
}
/**
* Calculates the harmonic mean of the provided ArrayList<Double> array,
* only calculates for real positive values.
*
* Note that the final estimate of the harmonic mean is a weighted average
* of the harmonic mean of the non-zero elements and zero.
* @param array the ArrayList<Double> array of which the harmonic mean is desired
* @return the double value of the harmonic mean of the data array
*/
public static double meanHarmonic(ArrayList<Double> array){
double[] newArray = convertArray(array);
double meanHarmonic = meanHarmonic(newArray);
return meanHarmonic;
}
/**
* Finds the average of the Log10 of a double[] array
* @param array the double[] array that the average is desired for
* @return the double value of the average of the Log10 of the data array
*/
public static double Average_Log10(double[] array){
//Matlab code: Xmean = mean(log10(data));
double[] log10array = Log10(array);
double averageLog10 = meanArithmetic(log10array);
return averageLog10;
}
/**
* Calculates and returns the Java Math.Log10 value of each element in the double[] data array
* @param array the double[] array containing the pre-Log10 values
* @return the resulting double[] array containing the Log10 values of the "data" array
*/
public static double[] Log10(double[] array){
//Matlab code: X = log10(data);
//Returns an array containing the Log10 value of each element in the data array
double[] log10Array = new double[array.length];
for(int i=0; i<array.length; i++){//Loop through rows
log10Array[i] = Math.log10(array[i]);
}
return log10Array;
}
/**
* Calculates and returns the Java Math.Log10 value of the specified column in the double[all][column] data array
* @param array the double[][] array containing the pre-Log10 values
* @param column the column of data to be calculated with
* @return the resulting double[] array containing the Log10 values of the "data" array
*/
public static double[] Log10(double[][] array, int column){
//Matlab code: X = log10(data);
//Returns an array containing the Log10 value of each element in the data array
double[] log10Array = new double[array.length];
for(int i=0; i<array.length; i++){//Loop through rows
log10Array[i] = Math.log10(array[i][column]);
}
return log10Array;
}
/**
* Performs a spline interpolation and computes an array of y values using the Xarray and Yarray evaluated at the xarray points
* @param Xarray an array of existing X points
* @param Yarray an array of existing Y points corresponding to the above Xarray
* @param xarray an array of x points for which y points are desired
* @return an array of y points corresponding to the x points of the xarray after a spline interpolation between Xarray and Yarray
* @throws ArgumentOutsideDomainException
*/
public static double[] splineInterpolation(double[] Xarray, double[] Yarray, double[] xarray) throws ArgumentOutsideDomainException{
//Interpolate each y point for each provided x point using the above interpolatoer
double[] yarray = new double[xarray.length];
for(int i=0; i<xarray.length; i++){
yarray[i] = splineInterpolation(Xarray, Yarray, xarray[i]);
}
return yarray;
}
/**
* Performs a spline interpolation and computes a y value using the Xarray and Yarray evaluated at the xValue point
* @param Xarray a double[] array of existing X points
* @param Yarray a double[] array of existing Y points corresponding to the above Xarray
* @param xValue a double x point for which y point is desired
* @return a double y point corresponding to the x point of the xValue after a spline interpolation between Xarray and Yarray
*/
public static double splineInterpolation(double[] Xarray, double[] Yarray, double xValue) throws ArgumentOutsideDomainException{
//Set up the spline interplator
SplineInterpolator splineInterp2 = new SplineInterpolator();
PolynomialSplineFunction splineFunction2 = splineInterp2.interpolate(Xarray, Yarray);
//Interpolate each y point for each provided x point using the above interpolatoer
double yValue = splineFunction2.value(xValue);
return yValue;
}
/**
* Performs a linear interpolation and computes an array of y values using the Xarray and Yarray evaluated at the xArray points
* @param Xarray a double[] array of existing X points
* @param Yarray a double[] array of existing Y points corresponding to the above Xarray
* @param xArray a double[] array x points for which y points is desired
* @return a double[] array of y points corresponding to the array of x points of the xArray after a linear interpolation between Xarray and Yarray
*/
public static double[] linearInterpolation(double[] Xarray, double[] Yarray, double[] xArray){
//Interpolate a y value for each x value in xArray
double[] yArray = new double[xArray.length];
for(int i=0; i<xArray.length; i++){
yArray[i] = linearInterpolation(Xarray, Yarray, xArray[i]);
}
return yArray;
}
/**
* Performs a linear interpolation and computes a y value using the Xarray and Yarray evaluated at the xValue point
* @param Xarray a double[] array of existing X points
* @param Yarray a double[] array of existing Y points corresponding to the above Xarray
* @param xValue a double x point for which y point is desired
* @return a double y point corresponding to the x point of the xValue after a linear interpolation between Xarray and Yarray
*/
public static double linearInterpolation(double[] Xarray, double[] Yarray, double xValue){
double yValue = 0;
//Sort the Xarray based on values as to properly interpolate between points
double[][] tempMatrix = new double[Xarray.length][3];
for(int i=0; i<Xarray.length; i++){
tempMatrix[i][0] = Xarray[i];
tempMatrix[i][1] = Yarray[i];
tempMatrix[i][2] = i;
}
Arrays.sort(tempMatrix, new sort1_smallToLargeDoubleMath());
Xarray = DoubleArray.getColumn(tempMatrix, 0);
Yarray = DoubleArray.getColumn(tempMatrix, 1);
//Interpolate bewtween the arrays
for(int i=0; i<Xarray.length; i++){
if(i == 0 && xValue < Xarray[0]){
//If xValue is smaller than the first Xarray value, extrapolate based on the slope bewteen the first two points in Xarray and Yarray
yValue = ((Yarray[i+1] - Yarray[i])/(Xarray[i+1] - Xarray[i]))*(xValue - Xarray[i]) + Yarray[i];
System.err.println("The x value: " + xValue + " is smaller than the smallest provided X value: " + Xarray[0] + ". Therefore it's corresponding y value: " + yValue + " was extrapolated from the dataset.");
break;
}else if(xValue == Xarray[i]){
yValue = Yarray[i];
break;
}else if(i != 0 && Xarray[i-1] < xValue && xValue < Xarray[i]){
yValue = ((xValue - Xarray[i-1])/(Xarray[i] - Xarray[i-1]))*(Yarray[i] - Yarray[i-1]) + Yarray[i-1];
break;
}else if(i == (Xarray.length-1) && xValue > Xarray[i]){
//If xValue is larger than the last Xarray value, extrapolate based on the slope bewteen the last two points in Xarry and Yarray
yValue = ((Yarray[i-1] - Yarray[i])/(Xarray[i-1] - Xarray[i]))*(xValue - Xarray[i]) + Yarray[i];
System.err.println("The x value: " + xValue + " is larger than the largest provided X value: " + Xarray[i] + ". Therefore it's corresponding y value: " + yValue + " was extrapolated from the dataset.");
break;
}
}
return yValue;
}
/**
* Sub-function to calculate a percentile of a dataset
* @param array input list data for percentile (double[])
* @param percentile_type which percentile value is desired 0.25, or 0.95, etc.
* @return percentile of the dataset.
*/
public static double Percentile_function (double[] array, double percentile_type){
//Sort Data
Arrays.sort(array);
double percentile = -9999;
//return quartiles for small datasets early, if not small, then continue to find which percentile is asked for
if(array.length == 0){
percentile = -9999;
}else if(array.length == 1){
percentile = array[0];
}else if(Double.compare(percentile_type, 0.5) == 0){
//Find median of dataset
percentile = median(array);
}else{
//Find rank of the desired percentile
double rank = percentile_type*(array.length + 1);//ex: 0.95*(n + 1) = rank of 95th percentile
try{
int rank_int = Integer.parseInt(String.valueOf(rank));
//If the rank is an integer find the value corresponding to it
for(int i=0; i<array.length; i++){
if(i+1 == rank_int){//i+1 is to compensate for Java's zero-based indexing system
percentile = array[i];
break;
}
}
}catch(NumberFormatException e){
//If the rank is not an integer average it between the two nearest ranks
for(int i=0; i<array.length; i++){
if(i+1 < rank && i+2 >= rank){//i+1 is to compensate for Java's zero-based indexing system
try{
percentile = (array[i] + array[i+1])/2;
}catch(IndexOutOfBoundsException err){
percentile = array[i];
System.err.println("Insufficient data to calculate the desired percentile accurately");
}
break;
}else if(i+1 == array.length){
percentile = array[i];
System.err.println("Insufficient data to calculate the desired percentile accurately");
}
}
}
}
return percentile;
}
/**
* Sub-function to calculate a percentile of a dataset
* @param array input list data for percentile (List<Double>)
* @param percentile_type which percentile value is desired 0.25, or 0.95, etc.
* @return percentile of the dataset.
*/
public static double Percentile_function (ArrayList<Double> array, double percentile_type){
double[] newArray = convertArray(array);
double percentile = Percentile_function(newArray, percentile_type);
return percentile;
}
/**
* Sub-function to calculate the median of a dataset
* @param array input list data for median
* @return median of the dataset.
*/
public static double median(double[] array){
//sort dataset before calculating median
Arrays.sort(array);
//Find the median
int midpoint = (array.length)/2;
double median = -9999;
if(array.length == 0){
median = -9999;
}else if(array.length%2 == 1){
median = array[midpoint];
}else{
median = (array[midpoint-1] + array[midpoint])/2;
}
return median;
}
/**
* Sub-function to calculate the median of a dataset
* @param array input list data for median
* @return median of the dataset.
*/
public static double median(ArrayList<Double> array){
double[] newArray = convertArray(array);
double median = median(newArray);
return median;
}
/**
* Sub-function to calculate the standard deviation of the population
* @param array input list data for standard deviation
* @return the population standard deviation of the dataList.
*/
public static double StandardDeviationSample(double[] array){
double variance = VarianceSample(array);
double standardDeviation = Math.pow(variance,0.5);
return standardDeviation;
}
/**
* Sub-function to calculate the standard deviation of a dataset
* @param array input list data for standard deviation
* @return standard deviation of the dataset.
*/
public static double StandardDeviationSample(ArrayList<Double> array){
double[] newArray = convertArray(array);
double standardDeviation = StandardDeviationSample(newArray);
return standardDeviation;
}
/**
* Sub-function to calculate the variance of the population
* @param array data array for which the variance is to be found
* @return variance of the data array
*/
public static double VariancePopulation(double[] array){
double average = meanArithmetic(array);
//Calculate sum of differences
double sum = 0;
for(int i=0; i<array.length; i++){
sum = sum + Math.pow((array[i] - average), 2);
}
//Calculate total number in the set
double ctr = (double) array.length;
if(ctr == 0){//fix divide by zero errors
ctr = 1;
}
double variance = sum/ctr;
return variance;
}
/**
* Sub-function to calculate the variance of the population
* @param array data array for which the variance is to be found
* @return variance of the data array
*/
public static double VariancePopulation(ArrayList<Double> array){
double[] newArray = convertArray(array);
double variance = VariancePopulation(newArray);
return variance;
}
/**
* Sub-function to calculate the variance of the population, variance = [1/(n-1)] * sum[(value - average)^2]
* @param array data array for which the variance is to be found
* @return variance of the data array
*/
public static double VarianceSample(double[] array){
double average = meanArithmetic(array);
//Calculate sum of differences
double sum = 0;
for(int i=0; i<array.length; i++){
sum = sum + Math.pow((array[i] - average), 2);
}
//Calculate total number in the set
double denominator = 0;
//fix divide by zero errors
if((array.length-1) <= 0){
denominator = 1;
}else{
denominator = array.length - 1;
}
//Calculate variance
double variance = sum/denominator;
return variance;
}
/**
* Sub-function to calculate the variance of the population
* @param array data array for which the variance is to be found
* @return variance of the data array
*/
public static double VarianceSample(ArrayList<Double> array){
double[] newArray = convertArray(array);
double variance = VarianceSample(newArray);
return variance;
}
/**
* Sub-function to calculate the coefficient of variation of a dataset (Standard deviation / average)
* @param array input list data for coefficient of variation
* @return coefficient of variation of the dataset.
*/
public static double CoefficientOfVariation(double[] array){
//Calculate the average and standard deviation for the coefficient of varience
double average = meanArithmetic(array);
double stDev = StandardDeviationSample(array);
double coefVar = stDev/average;
return coefVar;
}
/**
* Sub-function to calculate the coefficient of variation of a dataset (Standard deviation / average)
* @param array input list data for coefficient of variation
* @return coefficient of variation of the dataset.
*/
public static double CoefficientOfVariation(ArrayList<Double> array){
double[] newArray = convertArray(array);
double coefVar = CoefficientOfVariation(newArray);
return coefVar;
}
/**
* Sub-function to calculate the covariance of two datasets
* @param xData input list of x data
* @param yData input list of y data
* @return covariance of variation of the dataset.
* @throws IOException
*/
public static double Covariance(double[] xData, double[] yData) throws IOException{
//Check if arrays are the same size
if(xData.length != yData.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.length + "\tY data size:\t" + yData.length));
}
double N = xData.length;
//Calculate the average of the two datasets
double xBar = meanArithmetic(xData);
double yBar = meanArithmetic(yData);
//Sum (Xi - Xbar) * (Yi - Ybar)
double sum = 0;
for(int i=0; i<N; i++){
sum = sum + ((xData[i] - xBar) * (yData[i] - yBar));
}
double covariance = sum / (N - 1);
return covariance;
}
/**
* Sub-function to calculate the covariance of two datasets
* @param xArray input list of x data
* @param yArray input list of y data
* @return covariance of variation of the dataset.
* @throws IOException
*/
public static double Covariance(ArrayList<Double> xArray, ArrayList<Double> yArray) throws IOException{
double[] newXarray = convertArray(xArray);
double[] newYarray = convertArray(yArray);
double covariance = Covariance(newXarray, newYarray);
return covariance;
}
/**
* Sub-function to calculate the skewness of a dataset
* @param array input list data for skewness
* @return skewness of the dataset.
*/
public static double SkewnessPopulation(double[] array){
//Get the average and standard deviation for use in the skewness formula
double average = meanArithmetic(array);
double stDev = StandardDeviationSample(array);
//Create list of values to compute the expected value (average) of in order to get the skewness
double[] list = new double[array.length];
for(int i=0; i<array.length; i++){
list[i] = Math.pow(((array[i] - average)/stDev), 3);
}
double skewness = meanArithmetic(list);
return skewness;
}
/**
* Sub-function to calculate the skewness of a dataset
* @param array input list data for skewness
* @return skewness of the dataset.
*/
public static double SkewnessPopulation(ArrayList<Double> array){
double[] newArray = convertArray(array);
double skewness = SkewnessPopulation(newArray);
return skewness;
}
/**
* Sub-function to calculate the sample skewness (Fisher-Pearson skewness) of a dataset.
* Skewness = (n /((n-1)(n-2))) * sum{ ((x - average)/standard Deviation) ^ 3 }
* @param array input list data for skewness
* @return skewness of the dataset.
*/
public static double SkewnessSample(double[] array){
//Get the average and standard deviation for use in the skewness formula
double average = meanArithmetic(array);
double variance = VarianceSample(array);
double count = array.length;
if(count == 0){//fix divide by zero errors
count = 1;
}
//Create list of values to compute the expected value (average) of in order to get the skewness
double[] list = new double[array.length];
for(int i=0; i<array.length; i++){
list[i] = Math.pow((array[i] - average)/variance, 3);
}
double coefficient = count /( (count - 1)*(count - 2) );
double skewness = coefficient * sum(list);
return skewness;
}
/**
* Sub-function to calculate the sample skewness of a dataset.
* @param array input list data for skewness
* @return skewness of the dataset.
*/
public static double SkewnessSample(ArrayList<Double> array){
double[] newArray = convertArray(array);
double skewness = SkewnessSample(newArray);
return skewness;
}
/**
* Sub-function to calculate the sum of squares of x (Sxx) of a dataset.
* @param xData input list data for Sxx
* @return Sxx of the dataset.
*/
public static double Sxx(double[] xData){
double xBar = meanArithmetic(xData);
double sxx = 0;
for(int i=0; i<xData.length; i++){
sxx = sxx + Math.pow(xData[i] - xBar, 2);
}
return sxx;
}
/**
* Sub-function to calculate the sum of squares of x (Sxx) of a dataset.
* @param xArray input list data for Sxx
* @return Sxx of the dataset.
*/
public static double Sxx(ArrayList<Double> xArray){
double[] newXarray = convertArray(xArray);
double sxx = Sxx(newXarray);
return sxx;
}
/**
* Sub-function to calculate the sum of cross products (Sxy) of two datasets.
* @param xArray input list of x data for Sxy
* @param yArray input list of y data for Sxy
* @return Sxx of the datasets.
* @throws java.io.IOException
*/
public static double Sxy(double[] xArray, double[] yArray) throws IOException{
//Check if arrays are the same size
if(xArray.length != yArray.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xArray.length + "\tY data size:\t" + yArray.length));
}
double xBar = meanArithmetic(xArray);
double yBar = meanArithmetic(yArray);
double sxy = 0;
for(int i=0; i<xArray.length; i++){
sxy = sxy + (xArray[i] - xBar)*(yArray[i] - yBar);
}
return sxy;
}
/**
* Sub-function to calculate the sum of cross products (Sxy) of two datasets.
* @param xArray input list of x data for Sxy
* @param yArray input list of y data for Sxy
* @return Sxx of the datasets.
* @throws java.io.IOException
*/
public static double Sxy(ArrayList<Double> xArray, ArrayList<Double> yArray) throws IOException{
double[] newXarray = convertArray(xArray);
double[] newYarray = convertArray(yArray);
double sxy = Sxy(newXarray, newYarray);
return sxy;
}
/**
* Sub-function to calculate the correlation coefficient (r) of two datasets.
* @param xArray input list of x data
* @param yArray input list of y data
* @return the correlation coefficient of the two data sets [r = Sxy / squareRoot(Sxx * Syy)]
* @throws java.io.IOException
*/
public static double CorrelationCoefficient(double[] xArray, double[] yArray) throws IOException{
//Check if arrays are the same size
if(xArray.length != yArray.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xArray.length + "\tY data size:\t" + yArray.length));
}
double sxx = Sxx(xArray);
double syy = Sxx(yArray);
double sxy = Sxy(xArray, yArray);
double r = sxy / Math.pow(sxx*syy, 0.5);
return r;
}
/**
* Sub-function to calculate the correlation coefficient (r) of two datasets.
* @param xArray input list of x data
* @param yArray input list of y data
* @return the correlation coefficient of the two data sets [r = Sxy / squareRoot(Sxx * Syy)]
* @throws java.io.IOException
*/
public static double CorrelationCoefficient(ArrayList<Double> xArray, ArrayList<Double> yArray) throws IOException{
double[] newXarray = convertArray(xArray);
double[] newYarray = convertArray(yArray);
double r = CorrelationCoefficient(newXarray, newYarray);
return r;
}
/**
* Sub-function to calculate the R^2 (Rsquare) value of two datasets. Rsquare is the coefficient of determination fraction of the variance explained by regression
* @param xArray input list of x data
* @param yArray input list of y data
* @return the Rsquare of the two data sets [r = 1 - See/Syy]
* @throws java.io.IOException
*/
public static double Rsquare(double[] xArray, double[] yArray) throws IOException{
//Check if arrays are the same size
if(xArray.length != yArray.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xArray.length + "\tY data size:\t" + yArray.length));
}
double sxx = Sxx(xArray);
double syy = Sxx(yArray);
double n = xArray.length;
if(n == 0){//fix divide by zero errors
n = 1;
}
double sxy = Sxy(xArray, yArray);
double b1 = sxy / sxx;
double Ssquare = (syy - b1*sxx)/(n-2);
double rSquare = (syy - Ssquare*(n - 2))/syy;
return rSquare;
}
/**
* Sub-function to calculate the R^2 (Rsquare) value of two datasets. Rsquare is the coefficient of determination fraction of the variance explained by regression
* @param xArray input list of x data for Sxy
* @param yArray input list of y data for Sxy
* @return the Rsquare of the two data sets [r = 1 - See/Syy]
* @throws java.io.IOException
*/
public static double Rsquare(ArrayList<Double> xArray, ArrayList<Double> yArray) throws IOException{
double[] newXarray = convertArray(xArray);
double[] newYarray = convertArray(yArray);
double rSquare = Rsquare(newXarray, newYarray);
return rSquare;
}
/**
* Computes the sum of squared errors for a set of give error values
* @param array array of difference values (y - y_hat)
* @return the sum of squared errors, sum(error_i ^ 2)
*/
public static double SSE(double[] array){
double sum = 0;
for(int i=0; i<array.length;i++){
sum = sum + Math.pow(array[i], 2);
}
return sum;
}
/**
* Computes the Likelihood function for a set of give error values
* @param array array of difference values (y - y_hat)
* @return the value of the likelihood function LH = product( [1/sqrt(2*pi*sigma_errors^2)] * exp [-(error^2) / (2*sigma_errors^2)]
*/
public static double LF(double[] array){
//Compute the variance of the errors
double sigma_e2 = VarianceSample(array);
//Calculate the likelihood function for the array and the varience
double LH = 1;
for(int i=0; i<array.length; i++){
LH = LH * ( (Math.pow(2*Math.PI*sigma_e2, -0.5)) * Math.exp(-Math.pow(array[i], 2) / (2*sigma_e2)) );
}
return LH;
}
/**
* Computes the Log-Likelihood function for a set of give error values
* @param array array of difference values (y - y_hat)
* @return the value of the likelihood function, LLH = (-n/2)*ln(2*pi) - 1/2*ln(sigma_errors ^ 2n) - 1/2*(sigma_errors ^ -2)*sum(errors[i] ^ 2)
*/
public static double LLF(double[] array){
//Compute the standard deviation of the errors
double N = array.length;
double sigma_e = StandardDeviationSample(array);
//Calculate the log-likelihood function for the array and the standard deviation
double sse = SSE(array);
Double part1 = (N * Math.log(2*Math.PI) / (-2));
Double part2 = (Math.log(Math.pow(sigma_e, 2*N)) / (2));
Double part3 = (Math.pow(sigma_e, -2) / (2));
//Check if the log/powers calculated any infinite values
if(part2.isInfinite()){
part2 = 1.0;
}
if(part3.isInfinite()){
part3 = 1.0;
}
double LLH = part1 - part2 - (part3 * sse);
//System.out.println("LLF Results: "+N +"\t"+sigma_e+"\t"+sse+"\t"+LLH);
return LLH;
}
/**
* Computes the Akaike Information Criterion (AIC) value for a given dataset using the Log-Likelihood function
* and number of parameters of the model as a goodness of fit test
* @param array array of difference values (y - y_hat) or (actual - predicted)
* @param numberOfParams the number of parameters used in the model (AR(p) or ARMA(p,q)) to predict future data
* @return the value of AIC for the given errors and parameters
*/
public static double AIC(double[] array, int numberOfParams){
//Compute the log-likelihood function
double llf = LLF(array);
//Compute the value of aic
double aic = 2*numberOfParams - 2*llf;
return aic;
}
/**
* Computes the Bayesian Information Criterion (BIC) value for a given dataset using the Log-Likelihood function
* and number of parameters of the model as a goodness of fit test
* @param array array of difference values (y - y_hat) or (actual - predicted)
* @param numberOfParams the number of parameters used in the model (AR(p) or ARMA(p,q)) to predict future data
* @return the value of BIC for the given errors and parameters
*/
public static double BIC(double[] array, int numberOfParams){
//Compute the log-likelihood function
double llf = LLF(array);
double N = array.length;
//Compute the value of bic
double bic = 2*numberOfParams*Math.log(N) - 2*llf;
return bic;
}
/**
* Computes the Nash-Sutcliffe Model Efficiency Coefficient, note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
* @throws java.io.IOException
*/
public static double NashSutcliffe(double[] observed, double[] modeled) throws IOException{
//Check if arrays are the same size
if(observed.length != modeled.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. Observed data size:\t" +
observed.length + "\tModeled data size:\t" + modeled.length));
}
double observed_ave = meanArithmetic(observed);
double numerator = 0;
double denominator = 0;
for(int i=0; i<observed.length; i++){
numerator = numerator + (observed[i] - modeled[i])*(observed[i] - modeled[i]);
denominator = denominator + (observed[i] - observed_ave)*(observed[i] - observed_ave);
}
double E = 1 - (numerator/denominator);
return E;
}
/**
* Computes the Nash-Sutcliffe Model Efficiency Coefficient, note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
* @throws java.io.IOException
*/
public static double NashSutcliffe(ArrayList<Double> observed, ArrayList<Double> modeled) throws IOException{
double[] newObsArray = convertArray(observed);
double[] newModArray = convertArray(modeled);
double E = NashSutcliffe(newObsArray, newModArray);
return E;
}
/**
* Calculates the Kendall Correlation Coefficient (tau) based on the provided paired data
* @param pairedData a list of only y values which belong to a sorted (by magnitude of x) list of paired x-y data in the Mann-Kendall test.
* @return the Mann-Kendall Correlation Coefficient
*/
public static double KendallCorrelationCoefficient(ArrayList<Double> pairedData){
//Compare ordered pairs such that i > j
int M = 0, P = 0;
for(int j=0; j<pairedData.size(); j++){
double yValue_j = pairedData.get(j);
for(int i=(j+1); i<pairedData.size(); i++){//Starts at i = j+1 to enforce i > j at all times
double yValue_i = pairedData.get(i);
if(yValue_i > yValue_j){
P++;
}else if(yValue_i < yValue_j){
M++;
}
}
}
//Calculate Kendall Tau
double S = P - M;
double n = pairedData.size();
double tau = S / (n * (n-1) / 2);
return tau;
}
/**
* Computes the Mean Relative Error, note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
* @throws java.io.IOException
*/
public static double MeanRelativeError(double[] observed, double[] modeled) throws IOException{
//Check if arrays are the same size
if(observed.length != modeled.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. Observed data size:\t" +
observed.length + "\tModeled data size:\t" + modeled.length));
}
double sum = 0;
for(int i=0; i<observed.length; i++){
sum += Math.abs( (observed[i] - modeled[i]) / observed[i] );
}
double mre = (100.0/observed.length) * sum;
return mre;
}
/**
* Computes the Mean Relative Error, note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
* @throws java.io.IOException
*/
public static double MeanRelativeError(ArrayList<Double> observed, ArrayList<Double> modeled) throws IOException{
double[] newObsArray = convertArray(observed);
double[] newModArray = convertArray(modeled);
double mre = MeanRelativeError(newObsArray, newModArray);
return mre;
}
/**
* Computes the Percent Bias (PBIAS), note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
* @throws java.io.IOException
*/
public static double PercentBias(double[] observed, double[] modeled) throws IOException{
//Check if arrays are the same size
if(observed.length != modeled.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. Observed data size:\t" +
observed.length + "\tModeled data size:\t" + modeled.length));
}
double numerator = 0;
double denominator = 0;
for(int i=0; i<observed.length; i++){
numerator += (observed[i] - modeled[i]) * 100;
denominator += observed[i];
}
double pbias = numerator/denominator;
return pbias;
}
/**
* Computes the Percent Bias (PBIAS), note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
* @throws java.io.IOException
*/
public static double PercentBias(ArrayList<Double> observed, ArrayList<Double> modeled) throws IOException{
double[] newObsArray = convertArray(observed);
double[] newModArray = convertArray(modeled);
double pbias = PercentBias(newObsArray, newModArray);
return pbias;
}
}