DoubleMath.java [src/java/cfa] Revision: 8757fdacfcbf6b6952f4821e0026b7162f6f1c4c Date: Tue Dec 16 15:26:15 MST 2014
package cfa;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.math.ArgumentOutsideDomainException;
import org.apache.commons.math.analysis.interpolation.SplineInterpolator;
import org.apache.commons.math.analysis.polynomials.PolynomialSplineFunction;
/**
* Last Updated: 16-December-2014
* @author Tyler Wible
* @since 21-June-2012
*/
class sort1_smallToLargeDoubleMath implements Comparator<double[]>{
//Compares the first entry and sorts smallest to largest
public int compare(final double[] entry1, final double[] entry2) {
double value1 = entry1[0];
double value2 = entry2[0];
//Compare and return the second entries
return Double.compare(value1,value2);
}
}
public class DoubleMath{
/**
* Finds the maximum value of a double[] array
* @param array the double[] array
* @return the maximum of the above array as a double
*/
public double max(double[] array){
double maximum = -9999;
for(int i=0; i<array.length; i++){
if(i == 0){
maximum = array[i];
}else{
if(Double.compare(maximum, array[i]) < 0){
maximum = array[i];
}
}
}
return maximum;
}
/**
* Finds the maximum value of a ArrayList<Double>
* @param array the ArrayList<Double> array
* @return the maximum of the above array as a double
*/
public double max(ArrayList<Double> array){
double maximum = -9999;
for(int i=0; i<array.size(); i++){
if(i == 0){
maximum = array.get(i);
}else{
if(Double.compare(maximum, array.get(i)) < 0){
maximum = array.get(i);
}
}
}
return maximum;
}
/**
* Finds the maximum value of a double[][] array
* @param array the double[][] array
* @return the maximum of the above array as a double
*/
public double max(double[][] array){
double maximum = -9999;
for(int i=0; i<array.length; i++){
for(int j=0; j<array[i].length; j++){
if(Double.compare(maximum, array[i][j]) < 0){
maximum = array[i][j];
}
}
}
return maximum;
}
/**
* Finds the minimum value of a double[] array
* @param array the double[] array
* @return the minimum of the above array as a double
*/
public double min(double[] array){
double minimum = -9999;
for(int i=0; i<array.length; i++){
if(i == 0){
minimum = array[i];
}else{
if(Double.compare(minimum, array[i]) > 0){
minimum = array[i];
}
}
}
return minimum;
}
/**
* Finds the minimum value of a ArrayList<Double>
* @param array the ArrayList<Double> array
* @return the minimum of the above array as a double
*/
public double min(ArrayList<Double> array){
double minimum = -9999;
for(int i=0; i<array.size(); i++){
if(i == 0){
minimum = array.get(i);
}else{
if(Double.compare(minimum, array.get(i)) > 0){
minimum = array.get(i);
}
}
}
return minimum;
}
/**
* Finds the minimum value of a double[][] array
* @param array the double[][] array
* @return the minimum of the above array as a double
*/
public double min(double[][] array){
double minimum = array[0][0];
for(int i=0; i<array.length; i++){
for(int j=0; j<array[i].length; j++){
if(Double.compare(minimum, array[i][j]) > 0){
minimum = array[i][j];
}
}
}
return minimum;
}
/**
* Calculates the sum of a double[] array
* @param dataArray the array to be summed
* @return the sum of the array
*/
public double sum(double[] dataArray){
double sum = 0;
//Calculate the sum of the array
for(int i=0; i<dataArray.length; i++){
sum = sum + dataArray[i];
}
return sum;
}
/**
* Calculates the sum of a ArrayList<Double>
* @param dataArray the array to be summed
* @return the sum of the array
*/
public double sum(ArrayList<Double> dataArray){
double sum = 0;
//Calculate the sum of the array
for(int i=0; i<dataArray.size(); i++){
sum = sum + dataArray.get(i);
}
return sum;
}
/**
* Rounds the double to the number of decimal places "n" where roundingValue = 10^n
* For example if the roundingValue is equal to 10, then the returned value would have 1 decimal place
* @param originalValue the original double value to be rounded
* @param decimalPlaces is equal to the number of decimal places desired
* @return a double rounded value of the orignal data
*/
public double round(double originalValue, double decimalPlaces){
double decimals = Math.pow(10, decimalPlaces);
double roundedValue = Math.round(originalValue*decimals);
roundedValue = roundedValue/decimals;
return roundedValue;
}
/**
* Rounds the double[] array to the number of decimal places "n" where roundingValue = 10^n
* For example if the roundingValue is equal to 10, then the returned matrix would contain the original matrix with 1 decimal place
* @param originalColumn the original double[] matrix to be rounded
* @param roundingValue is equal to the 10^number of decimal places desired (aka 2 decimal places has a roundingValue = 10)
* @return a double[] array of the rounded values of the original data
*/
public double[] roundColumn(double[] originalColumn, double roundingValue){
double[] roundedColumn = new double[originalColumn.length];
for(int i=0; i<originalColumn.length; i++){
double tempValue = Math.round(originalColumn[i]*roundingValue);
roundedColumn[i] = tempValue/roundingValue;
}
return roundedColumn;
}
/**
* Calculates the arithmetic mean of the provided double[] array
* @param data the double[] array of which the arithmetic mean is desired
* @return the double value of the arithmetic mean of the data array
*/
public double meanArithmetic(double[] data){
//Calculates the average of a double array
double sum = sum(data);
double count = data.length;
if(count == 0){//fix divide by zero errors
count = 1;
}
double average = sum/count;
return average;
}
/**
* Calculates the arithmetic mean of the provided ArrayList<Double>
* @param data the ArrayList<Double> of which the arithmetic mean is desired
* @return the double value of the arithmetic mean of the data array
*/
public double meanArithmetic(ArrayList<Double> data){
//Calculates the average of a double array
double sum = sum(data);
double count = data.size();
double average = sum/count;
return average;
}
/**
* Calculates the harmonic mean of the provided double[] array,
* only calculates for real positive values.
*
* Note that the final estimate of the harmonic mean is a weighted average
* of the harmonic mean of the non-zero elements and zero.
* @param data the double[] array of which the harmonic mean is desired
* @return the double value of the harmonic mean of the data array
*/
public double meanHarmonic(double[] data){
//Calculate properties of harmonic mean
double reciprocalSum = 0, nZeros = 0, nNonZeros = 0;
for(int i=0; i<data.length; i++){
if(data[i] > 0){
reciprocalSum = reciprocalSum + (1/data[i]);//sum of reciprocals
}else{
nZeros++;
}
nNonZeros++;
}
//Compute harmonic mean (with correction for the number of zero items in the array)
double meanHarmonic = (nNonZeros - nZeros) / (reciprocalSum * ((nNonZeros - nZeros)/nNonZeros));
return meanHarmonic;
}
/**
* Finds the average of the Log10 of a double[] array
* @param data the double[] array that the average is desired for
* @return the double value of the average of the Log10 of the data array
*/
public double Average_Log10(double[] data){
//Matlab code: Xmean = mean(log10(data));
//Finds the average of the Log10 of a double array
double averageLog10 = 0;
double sum = 0;
double ctr = data.length;
if(ctr == 0){//fix divide by zero errors
ctr = 1;
}
for(int i=0; i<data.length; i++){//Loop through rows and get average of Log10 of rows
sum = sum + Math.log10(data[i]);
}
averageLog10 = sum/ctr;
return averageLog10;
}
/**
* Calculates and returns the Java Math.Log10 value of each element in the double[] data array
* @param data the double[] array containing the pre-Log10 values
* @return the resulting double[] array containing the Log10 values of the "data" array
*/
public double[] Log10(double[] data){
//Matlab code: X = log10(data);
//Returns an array containing the Log10 value of each element in the data array
double[] log10Array = new double[data.length];
for(int i=0; i<data.length; i++){//Loop through rows
log10Array[i] = Math.log10(data[i]);
}
return log10Array;
}
/**
* Calculates and returns the Java Math.Log10 value of the second column in the double[all][column] data array
* @param data the double[][] array containing the pre-Log10 values
* @param column the column of data to be calculated with
* @return the resulting double[] array containing the Log10 values of the "data" array
*/
public double[] Log10(double[][] data, int column){
//Matlab code: X = log10(data);
//Returns an array containing the Log10 value of each element in the data array
double[] log10Array = new double[data.length];
for(int i=0; i<data.length; i++){//Loop through rows
log10Array[i] = Math.log10(data[i][column]);
}
return log10Array;
}
/**
* Performs a spline interpolation and computes an array of y values using the Xarray and Yarray evaluated at the xarray points
* @param Xarray an array of existing X points
* @param Yarray an array of existing Y points corresponding to the above Xarray
* @param xarray an array of x points for which y points are desired
* @return an array of y points corresponding to the x points of the xarray after a spline interpolation between Xarray and Yarray
* @throws ArgumentOutsideDomainException
*/
public double[] splineInterpolation(double[] Xarray, double[] Yarray, double[] xarray) throws ArgumentOutsideDomainException{
//Set up the spline interplator
SplineInterpolator splineInterp2 = new SplineInterpolator();
PolynomialSplineFunction splineFunction2 = splineInterp2.interpolate(Xarray, Yarray);
//Interpolate each y point for each provided x point using the above interpolatoer
double[] yarray = new double[xarray.length];
for(int i=0; i<xarray.length; i++){
try{
yarray[i] = splineFunction2.value(xarray[i]);
}catch(ArgumentOutsideDomainException e){
yarray[i] = 0;
}
}
return yarray;
}
/**
* Performs a spline interpolation and computes a y value using the Xarray and Yarray evaluated at the xValue point
* @param Xarray a double[] array of existing X points
* @param Yarray a double[] array of existing Y points corresponding to the above Xarray
* @param xValue a double x point for which y point is desired
* @return a double y point corresponding to the x point of the xValue after a spline interpolation between Xarray and Yarray
*/
public double splineInterpolation(double[] Xarray, double[] Yarray, double xValue) throws ArgumentOutsideDomainException{
//Set up the spline interplator
SplineInterpolator splineInterp2 = new SplineInterpolator();
PolynomialSplineFunction splineFunction2 = splineInterp2.interpolate(Xarray, Yarray);
//Interpolate each y point for each provided x point using the above interpolatoer
double yValue = splineFunction2.value(xValue);
return yValue;
}
/**
* Performs a linear interpolation and computes an array of y values using the Xarray and Yarray evaluated at the xArray points
* @param Xarray a double[] array of existing X points
* @param Yarray a double[] array of existing Y points corresponding to the above Xarray
* @param xArray a double[] array x points for which y points is desired
* @return a double[] array of y points corresponding to the array of x points of the xArray after a linear interpolation between Xarray and Yarray
*/
public double[] linearInterpolation(double[] Xarray, double[] Yarray, double[] xArray){
double[] yArray = new double[xArray.length];
//Interpolate a y value for each x value in xArray
for(int i=0; i<xArray.length; i++){
yArray[i] = linearInterpolation(Xarray, Yarray, xArray[i]);
}
return yArray;
}
/**
* Performs a linear interpolation and computes a y value using the Xarray and Yarray evaluated at the xValue point
* @param Xarray a double[] array of existing X points
* @param Yarray a double[] array of existing Y points corresponding to the above Xarray
* @param xValue a double x point for which y point is desired
* @return a double y point corresponding to the x point of the xValue after a linear interpolation between Xarray and Yarray
*/
public double linearInterpolation(double[] Xarray, double[] Yarray, double xValue){
DoubleArray doubleArray = new DoubleArray();
double yValue = 0;
//Sort the Xarray based on values as to properly interpolate between points
double[][] tempMatrix = new double[Xarray.length][3];
for(int i=0; i<Xarray.length; i++){
tempMatrix[i][0] = Xarray[i];
tempMatrix[i][1] = Yarray[i];
tempMatrix[i][2] = i;
}
Arrays.sort(tempMatrix, new sort1_smallToLargeDoubleMath());
Xarray = doubleArray.getColumn(tempMatrix, 0);
Yarray = doubleArray.getColumn(tempMatrix, 1);
//Interpolate bewtween the arrays
for(int i=0; i<Xarray.length; i++){
if(i == 0 && xValue < Xarray[0]){
//If xValue is smaller than the first Xarray value, extrapolate based on the slope bewteen the first two points in Xarray and Yarray
yValue = ((Yarray[i+1] - Yarray[i])/(Xarray[i+1] - Xarray[i]))*(xValue - Xarray[i]) + Yarray[i];
System.err.println("The x value: " + xValue + " is smaller than the smallest provided X value: " + Xarray[0] + ". Therefore it's corresponding y value: " + yValue + " was extrapolated from the dataset.");
break;
}else if(xValue == Xarray[i]){
yValue = Yarray[i];
break;
}else if(i != 0 && Xarray[i-1] < xValue && xValue < Xarray[i]){
yValue = ((xValue - Xarray[i-1])/(Xarray[i] - Xarray[i-1]))*(Yarray[i] - Yarray[i-1]) + Yarray[i-1];
break;
}else if(i == (Xarray.length-1) && xValue > Xarray[i]){
//If xValue is larger than the last Xarray value, extrapolate based on the slope bewteen the last two points in Xarry and Yarray
yValue = ((Yarray[i-1] - Yarray[i])/(Xarray[i-1] - Xarray[i]))*(xValue - Xarray[i]) + Yarray[i];
System.err.println("The x value: " + xValue + " is larger than the largest provided X value: " + Xarray[i] + ". Therefore it's corresponding y value: " + yValue + " was extrapolated from the dataset.");
break;
}
}
return yValue;
}
/**
* Sub-function to calculate a percentile of a dataset
* @param dataList input list data for percentile (double[])
* @param percentile_type which percentile value is desired 0.25, or 0.95, etc.
* @return percentile of the dataset.
*/
public double Percentile_function (double[] dataList, double percentile_type){
//Sort Data
Arrays.sort(dataList);
double percentile = -9999;
//return quartiles for small datasets early, if not small, then continue to find which percentile is asked for
if(dataList.length == 0){
percentile = -9999;
}else if(dataList.length == 1){
percentile = dataList[0];
}else if(Double.compare(percentile_type, 0.5) == 0){
//Find median of dataset
percentile = Median(dataList);
}else{
//Find rank of the desired percentile
double rank = percentile_type*(dataList.length + 1);//ex: 0.95*(n + 1) = rank of 95th percentile
try{
int rank_int = Integer.parseInt(String.valueOf(rank));
//If the rank is an integer find the value corresponding to it
for(int i=0; i<dataList.length; i++){
if(i+1 == rank_int){//i+1 is to compensate for Java's zero-based indexing system
percentile = dataList[i];
break;
}
}
}catch(NumberFormatException e){
//If the rank is not an integer average it between the two nearest ranks
for(int i=0; i<dataList.length; i++){
if(i+1 < rank && i+2 >= rank){//i+1 is to compensate for Java's zero-based indexing system
try{
percentile = (dataList[i] + dataList[i+1])/2;
}catch(IndexOutOfBoundsException err){
percentile = dataList[i];
System.err.println("Insufficient data to calculate the desired percentile accurately");
}
break;
}else if(i+1 == dataList.length){
percentile = dataList[i];
System.err.println("Insufficient data to calculate the desired percentile accurately");
}
}
}
}
return percentile;
}
/**
* Sub-function to calculate a percentile of a dataset
* @param input input list data for percentile (List<Double>)
* @param percentile_type which percentile value is desired 0.25, or 0.95, etc.
* @return percentile of the dataset.
*/
public double Percentile_function (List<Double> input, double percentile_type){
//Create a double[] list of data from the input ArrayList
Iterator<Double> iterate = input.iterator();
double[] dataList = new double[input.size()];
int ctr = 0;
while(iterate.hasNext()){
dataList[ctr] = (Double) iterate.next();
ctr++;
}
//Call the percentile function
double percentile = Percentile_function(dataList, percentile_type);
return percentile;
}
/**
* Sub-function to calculate the median of a dataset
* @param input input list data for median
* @return median of the dataset.
*/
public double Median(double[] input){
//sort dataset before calculating median
Arrays.sort(input);
//Find the median
int midpoint = (input.length)/2;
double median = 0;
if(input.length == 0){
median = -9999;
}else if(input.length%2 == 1){
median = input[midpoint];
}else{
median = (input[midpoint-1] + input[midpoint])/2;
}
return median;
}
/**
* Sub-function to calculate the median of a dataset
* @param input input list data for median
* @return median of the dataset.
*/
public double Median(ArrayList<Double> input){
Iterator<Double> iterate = input.iterator();
double[] dataList = new double[input.size()];
int ctr = 0;
while(iterate.hasNext()){
dataList[ctr] = (Double) iterate.next();
ctr++;
}
//Call the median function
double median = Median(dataList);
return median;
}
/**
* Sub-function to calculate the standard deviation of the population
* @param dataList input list data for standard deviation
* @return the population standard deviation of the dataList.
*/
public double StandardDeviationSample(double[] dataList){
double variance = VarianceSample(dataList);
double standardDeviation = Math.pow(variance,0.5);
return standardDeviation;
}
/**
* Sub-function to calculate the standard deviation of a dataset
* @param dataList input list data for standard deviation
* @return standard deviation of the dataset.
*/
public double StandardDeviationSample(ArrayList<Double> dataList){
double variance = VarianceSample(dataList);
double standardDeviation = Math.pow(variance,0.5);
return standardDeviation;
}
/**
* Sub-function to calculate the variance of the population
* @param dataList data array for which the variance is to be found
* @return variance of the data array
*/
public double VariancePopulation(double[] dataList){
double average = meanArithmetic(dataList);
//Calculate sum of differences
double sum = 0;
for(int i=0; i<dataList.length; i++){
sum = sum + Math.pow((dataList[i] - average), 2);
}
//Calculate total number in the set
double ctr = (double) dataList.length;
if(ctr == 0){//fix divide by zero errors
ctr = 1;
}
double variance = sum/ctr;
return variance;
}
/**
* Sub-function to calculate the variance of the population
* @param dataList data array for which the variance is to be found
* @return variance of the data array
*/
public double VariancePopulation(ArrayList<Double> dataList){
double average = meanArithmetic(dataList);
//Calculate sum of differences
double sum = 0;
for(int i=0; i<dataList.size(); i++){
sum = sum + Math.pow((dataList.get(i) - average), 2);
}
//Calculate total number in the set
double ctr = (double) dataList.size();
if(ctr == 0){//fix divide by zero errors
ctr = 1;
}
double variance = sum/ctr;
return variance;
}
/**
* Sub-function to calculate the variance of the population, variance = [1/(n-1)] * sum[(value - average)^2]
* @param dataList data array for which the variance is to be found
* @return variance of the data array
*/
public double VarianceSample(double[] dataList){
double average = meanArithmetic(dataList);
//Calculate sum of differences
double sum = 0;
for(int i=0; i<dataList.length; i++){
sum = sum + Math.pow((dataList[i] - average), 2);
}
//Calculate total number in the set
double denominator = 0;
//fix divide by zero errors
if((dataList.length-1) <= 0){
denominator = 1;
}else{
denominator = dataList.length - 1;
}
//Calculate variance
double variance = sum/denominator;
return variance;
}
/**
* Sub-function to calculate the variance of the population
* @param dataList data array for which the variance is to be found
* @return variance of the data array
*/
public double VarianceSample(ArrayList<Double> dataList){
double average = meanArithmetic(dataList);
//Calculate sum of differences
double sum = 0;
for(int i=0; i<dataList.size(); i++){
sum = sum + Math.pow((dataList.get(i) - average), 2);
}
//Calculate total number in the set
double denominator = 0;
if(dataList.size()-1 <= 0){//fix divide by zero errors
denominator = 1;
}else{
denominator = dataList.size() - 1;
}
double variance = sum/denominator;
return variance;
}
/**
* Sub-function to calculate the coefficient of variation of a dataset (Standard deviation / average)
* @param dataList input list data for coefficient of variation
* @return coefficient of variation of the dataset.
*/
public double CoefficientOfVariation(double[] dataList){
//Calculate the average and standard deviation for the coefficient of varience
double average = meanArithmetic(dataList);
double stDev = StandardDeviationSample(dataList);
double coefVar = stDev/average;
return coefVar;
}
/**
* Sub-function to calculate the coefficient of variation of a dataset (Standard deviation / average)
* @param dataList input list data for coefficient of variation
* @return coefficient of variation of the dataset.
*/
public double CoefficientOfVariation(ArrayList<Double> dataList){
//Calculate the average and standard deviation for the coefficient of varience
double average = meanArithmetic(dataList);
double stDev = StandardDeviationSample(dataList);
double coefVar = stDev/average;
return coefVar;
}
/**
* Sub-function to calculate the covariance of two datasets
* @param xData input list of x data
* @param yData input list of y data
* @return covariance of variation of the dataset.
* @throws IOException
*/
public double Covariance(double[] xData, double[] yData) throws IOException{
//Check if arrays are the same size
if(xData.length != yData.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.length + "\tY data size:\t" + yData.length));
}
double N = xData.length;
//Calculate the average of the two datasets
double xBar = meanArithmetic(xData);
double yBar = meanArithmetic(yData);
//Sum (Xi - Xbar) * (Yi - Ybar)
double sum = 0;
for(int i=0; i<N; i++){
sum = sum + ((xData[i] - xBar) * (yData[i] - yBar));
}
double covariance = sum / (N - 1);
return covariance;
}
/**
* Sub-function to calculate the covariance of two datasets
* @param xData input list of x data
* @param yData input list of y data
* @return covariance of variation of the dataset.
* @throws IOException
*/
public double Covariance(ArrayList<Double> xData, ArrayList<Double> yData) throws IOException{
//Check if arrays are the same size
if(xData.size() != yData.size()){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.size() + "\tY data size:\t" + yData.size()));
}
double N = xData.size();
//Calculate the average of the two datasets
double xBar = meanArithmetic(xData);
double yBar = meanArithmetic(yData);
//Sum (Xi - Xbar) * (Yi - Ybar)
double sum = 0;
for(int i=0; i<N; i++){
sum = sum + ((xData.get(i) - xBar) * (yData.get(i) - yBar));
}
double covariance = sum / (N - 1);
return covariance;
}
/**
* Sub-function to calculate the skewness of a dataset
* @param dataList input list data for skewness
* @return skewness of the dataset.
*/
public double SkewnessPopulation(double[] dataList){
//Get the average and standard deviation for use in the skewness formula
double average = meanArithmetic(dataList);
double stDev = StandardDeviationSample(dataList);
//Create list of values to compute the expected value (average) of in order to get the skewness
double[] list = new double[dataList.length];
for(int i=0; i<dataList.length; i++){
list[i] = Math.pow(((dataList[i] - average)/stDev), 3);
}
double skewness = meanArithmetic(list);
return skewness;
}
/**
* Sub-function to calculate the skewness of a dataset
* @param dataList input list data for skewness
* @return skewness of the dataset.
*/
public double SkewnessPopulation(ArrayList<Double> dataList){
//Get the average and standard deviation for use in the skewness formula
double average = meanArithmetic(dataList);
double stDev = StandardDeviationSample(dataList);
//Create list of values to compute the expected value (average) of in order to get the skewness
double[] list = new double[dataList.size()];
for(int i=0; i<dataList.size(); i++){
list[i] = Math.pow(((dataList.get(i) - average)/stDev), 3);
}
double skewness = meanArithmetic(list);
return skewness;
}
/**
* Sub-function to calculate the sample skewness (Fisher-Pearson skewness) of a dataset.
* Skewness = (n /((n-1)(n-2))) * sum{ ((x - average)/standard Deviation) ^ 3 }
* @param dataList input list data for skewness
* @return skewness of the dataset.
*/
public double SkewnessSample(double[] dataList){
//Get the average and standard deviation for use in the skewness formula
double average = meanArithmetic(dataList);
double variance = VarianceSample(dataList);
double count = dataList.length;
if(count == 0){//fix divide by zero errors
count = 1;
}
//Create list of values to compute the expected value (average) of in order to get the skewness
double[] list = new double[dataList.length];
for(int i=0; i<dataList.length; i++){
list[i] = Math.pow((dataList[i] - average)/variance, 3);
}
double coefficient = count /( (count - 1)*(count - 2) );
double skewness = coefficient * sum(list);
return skewness;
}
/**
* Sub-function to calculate the sample skewness (Fisher-Pearson skewness) of a dataset.
* Skewness = (n /((n-1)(n-2))) * sum{ ((x - average)/standard Deviation) ^ 3 }
* @param dataList input list data for skewness
* @return skewness of the dataset.
*/
public double SkewnessSample(ArrayList<Double> dataList){
//Get the average and standard deviation for use in the skewness formula
double average = meanArithmetic(dataList);
double variance = VarianceSample(dataList);
double count = dataList.size();
if(count == 0){//fix divide by zero errors
count = 1;
}
//Create list of values to compute the expected value (average) of in order to get the skewness
double[] list = new double[dataList.size()];
for(int i=0; i<dataList.size(); i++){
list[i] = Math.pow((dataList.get(i) - average)/variance, 3);
}
double coefficient = count /( (count - 1)*(count - 2) );
double skewness = coefficient * sum(list);
return skewness;
}
/**
* Sub-function to calculate the sum of squares of x (Sxx) of a dataset.
* @param xData input list data for Sxx
* @return Sxx of the dataset.
*/
public double Sxx(double[] xData){
double xBar = meanArithmetic(xData);
double sxx = 0;
for(int i=0; i<xData.length; i++){
sxx = sxx + Math.pow(xData[i] - xBar, 2);
}
return sxx;
}
/**
* Sub-function to calculate the sum of squares of x (Sxx) of a dataset.
* @param xData input list data for Sxx
* @return Sxx of the dataset.
*/
public double Sxx(ArrayList<Double> xData){
double xBar = meanArithmetic(xData);
double sxx = 0;
for(int i=0; i<xData.size(); i++){
sxx = sxx + Math.pow(xData.get(i) - xBar, 2);
}
return sxx;
}
/**
* Sub-function to calculate the sum of cross products (Sxy) of two datasets.
* @param xData input list of x data for Sxy
* @param yData input list of y data for Sxy
* @return Sxx of the datasets.
*/
public double Sxy(double[] xData, double[] yData) throws IOException{
//Check if arrays are the same size
if(xData.length != yData.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.length + "\tY data size:\t" + yData.length));
}
double xBar = meanArithmetic(xData);
double yBar = meanArithmetic(yData);
double sxy = 0;
for(int i=0; i<xData.length; i++){
sxy = sxy + (xData[i] - xBar)*(yData[i] - yBar);
}
return sxy;
}
/**
* Sub-function to calculate the sum of cross products (Sxy) of two datasets.
* @param xData input list of x data for Sxy
* @param yData input list of y data for Sxy
* @return Sxx of the datasets.
*/
public double Sxy(ArrayList<Double> xData, ArrayList<Double> yData) throws IOException{
//Check if arrays are the same size
if(xData.size() != yData.size()){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.size() + "\tY data size:\t" + yData.size()));
}
double xBar = meanArithmetic(xData);
double yBar = meanArithmetic(yData);
double sxy = 0;
for(int i=0; i<xData.size(); i++){
sxy = sxy + (xData.get(i) - xBar)*(yData.get(i) - yBar);
}
return sxy;
}
/**
* Sub-function to calculate the correlation coefficient (r) of two datasets.
* @param xData input list of x data
* @param yData input list of y data
* @return the correlation coefficient of the two data sets [r = Sxy / squareRoot(Sxx * Syy)]
*/
public double CorrelationCoefficient(double[] xData, double[] yData) throws IOException{
//Check if arrays are the same size
if(xData.length != yData.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.length + "\tY data size:\t" + yData.length));
}
double sxx = Sxx(xData);
double syy = Sxx(yData);
double sxy = Sxy(xData, yData);
double r = sxy / Math.pow(sxx*syy, 0.5);
return r;
}
/**
* Sub-function to calculate the correlation coefficient (r) of two datasets.
* @param xData input list of x data
* @param yData input list of y data
* @return the correlation coefficient of the two data sets [r = Sxy / squareRoot(Sxx * Syy)]
*/
public double CorrelationCoefficient(ArrayList<Double> xData, ArrayList<Double> yData) throws IOException{
//Check if arrays are the same size
if(xData.size() != yData.size()){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.size() + "\tY data size:\t" + yData.size()));
}
double sxx = Sxx(xData);
double syy = Sxx(yData);
double sxy = Sxy(xData, yData);
double r = sxy / Math.pow(sxx*syy, 0.5);
return r;
}
/**
* Sub-function to calculate the R^2 (Rsquare) value of two datasets. Rsquare is the coefficient of determination fraction of the variance explained by regression
* @param xData input list of x data
* @param yData input list of y data
* @return the Rsquare of the two data sets [r = 1 - See/Syy]
*/
public double Rsquare(double[] xData, double[] yData) throws IOException{
//Check if arrays are the same size
if(xData.length != yData.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.length + "\tY data size:\t" + yData.length));
}
double sxx = Sxx(xData);
double syy = Sxx(yData);
double n = xData.length;
if(n == 0){//fix divide by zero errors
n = 1;
}
double sxy = Sxy(xData, yData);
double b1 = sxy / sxx;
double Ssquare = (syy - b1*sxx)/(n-2);
double rSquare = (syy - Ssquare*(n - 2))/syy;
return rSquare;
}
/**
* Sub-function to calculate the R^2 (Rsquare) value of two datasets. Rsquare is the coefficient of determination fraction of the variance explained by regression
* @param xData input list of x data for Sxy
* @param yData input list of y data for Sxy
* @return the Rsquare of the two data sets [r = 1 - See/Syy]
*/
public double Rsquare(ArrayList<Double> xData, ArrayList<Double> yData) throws IOException{
//Check if arrays are the same size
if(xData.size() != yData.size()){
throw(new IOException("Data arrays must be the same size to perform this statistic. X data size:\t" +
xData.size() + "\tY data size:\t" + yData.size()));
}
double sxx = Sxx(xData);
double syy = Sxx(yData);
double n = xData.size();
double sxy = Sxy(xData, yData);
double b1 = sxy / sxx;
double Ssquare = (syy - b1*sxx)/(n-2);
double rSquare = (syy - Ssquare*(n - 2))/syy;
return rSquare;
}
/**
* Computes the sum of squared errors for a set of give error values
* @param errors array of diverence values (y - y_hat)
* @return the sum of squared errors, sum(error_i ^ 2)
*/
public double SSE(double[] errors){
double sum = 0;
for(int i=0; i<errors.length;i++){
sum = sum + Math.pow(errors[i], 2);
}
return sum;
}
/**
* Computes the Likelihood function for a set of give error values
* @param errors array of difference values (y - y_hat)
* @return the value of the likelihood function LH = product( [1/sqrt(2*pi*sigma_errors^2)] * exp [-(error^2) / (2*sigma_errors^2)]
*/
public double LF(double[] errors){
//Compute the variance of the errors
double sigma_e2 = VarianceSample(errors);
//Calculate the likelihood function for the array and the varience
double LH = 1;
for(int i=0; i<errors.length; i++){
LH = LH * ( (Math.pow(2*Math.PI*sigma_e2, -0.5)) * Math.exp(-Math.pow(errors[i], 2) / (2*sigma_e2)) );
}
return LH;
}
/**
* Computes the Log-Likelihood function for a set of give error values
* @param errors array of difference values (y - y_hat)
* @return the value of the likelihood function, LLH = (-n/2)*ln(2*pi) - 1/2*ln(sigma_errors ^ 2n) - 1/2*(sigma_errors ^ -2)*sum(errors[i] ^ 2)
*/
public double LLF(double[] errors){
//Compute the standard deviation of the errors
double N = errors.length;
double sigma_e = StandardDeviationSample(errors);
//Calculate the log-likelihood function for the array and the standard deviation
double sse = SSE(errors);
Double part1 = (N * Math.log(2*Math.PI) / (-2));
Double part2 = (Math.log(Math.pow(sigma_e, 2*N)) / (2));
Double part3 = (Math.pow(sigma_e, -2) / (2));
//Check if the log/powers calculated any infinite values
if(part2.isInfinite()){
part2 = 1.0;
}
if(part3.isInfinite()){
part3 = 1.0;
}
double LLH = part1 - part2 - (part3 * sse);
//System.out.println("LLF Results: "+N +"\t"+sigma_e+"\t"+sse+"\t"+LLH);
return LLH;
}
/**
* Computes the Akaike Information Criterion (AIC) value for a given dataset using the Log-Likelihood function
* and number of parameters of the model as a goodness of fit test
* @param errors array of difference values (y - y_hat) or (actual - predicted)
* @param numberOfParams the number of parameters used in the model (AR(p) or ARMA(p,q)) to predict future data
* @return the value of AIC for the given errors and parameters
*/
public double AIC(double[] errors, int numberOfParams){
//Compute the log-likelihood function
double llf = LLF(errors);
//Compute the value of aic
double aic = 2*numberOfParams - 2*llf;
return aic;
}
/**
* Computes the Bayesian Information Criterion (BIC) value for a given dataset using the Log-Likelihood function
* and number of parameters of the model as a goodness of fit test
* @param errors array of difference values (y - y_hat) or (actual - predicted)
* @param numberOfParams the number of parameters used in the model (AR(p) or ARMA(p,q)) to predict future data
* @return the value of BIC for the given errors and parameters
*/
public double BIC(double[] errors, int numberOfParams){
//Compute the log-likelihood function
double llf = LLF(errors);
double N = errors.length;
//Compute the value of bic
double bic = 2*numberOfParams*Math.log(N) - 2*llf;
return bic;
}
/**
* Computes the Nash-Sutcliffe Model Efficiency Coefficient, note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
*/
public double NashSutcliffe(double[] observed, double[] modeled) throws IOException{
//Check if arrays are the same size
if(observed.length != modeled.length){
throw(new IOException("Data arrays must be the same size to perform this statistic. Observed data size:\t" +
observed.length + "\tModeled data size:\t" + modeled.length));
}
double observed_ave = meanArithmetic(observed);
double numerator = 0;
double denominator = 0;
for(int i=0; i<observed.length; i++){
numerator = numerator + (observed[i] - modeled[i])*(observed[i] - modeled[i]);
denominator = denominator + (observed[i] - observed_ave)*(observed[i] - observed_ave);
}
double E = 1 - (numerator/denominator);
return E;
}
/**
* Computes the Nash-Sutcliffe Model Efficiency Coefficient, note that the provided data must be match lists, aka
* the value of observed[i] must be at the same date as modeled[i] for the equation to work
* @param observed array of observed values, sorted by observation date
* @param modeled array of modeled values, sorted by model date, paired to the same dates as the above observed dates
* @return
*/
public double NashSutcliffe(ArrayList<Double> observed, ArrayList<Double> modeled) throws IOException{
//Check if arrays are the same size
if(observed.size() != modeled.size()){
throw(new IOException("Data arrays must be the same size to perform this statistic. Observed data size:\t" +
observed.size() + "\tModeled data size:\t" + modeled.size()));
}
double observed_ave = meanArithmetic(observed);
double numerator = 0;
double denominator = 0;
for(int i=0; i<observed.size(); i++){
numerator = numerator + (observed.get(i) - modeled.get(i))*(observed.get(i) - modeled.get(i));
denominator = denominator + (observed.get(i) - observed_ave)*(observed.get(i) - observed_ave);
}
double E = 1 - (numerator/denominator);
return E;
}
/**
* Calculates the Kendall Correlation Coefficient (tau) based on the provided paired data
* @param pairedData a list of only y values which belong to a sorted (by magnitude of x) list of paired x-y data in the Mann-Kendall test.
* @return the Mann-Kendall Correlation Coefficient
*/
public double KendallCorrelationCoefficient(ArrayList<Double> pairedData){
//Compare ordered pairs such that i > j
int M = 0, P = 0;
for(int j=0; j<pairedData.size(); j++){
double yValue_j = pairedData.get(j);
for(int i=(j+1); i<pairedData.size(); i++){//Starts at i = j+1 to enforce i > j at all times
double yValue_i = pairedData.get(i);
if(yValue_i > yValue_j){
P++;
}else if(yValue_i < yValue_j){
M++;
}
}
}
//Calculate Kendall Tau
double S = P - M;
double n = pairedData.size();
double tau = S / (n * (n-1) / 2);
return tau;
}
}