Statistics.java [src/csip/cosu] Revision: default  Date:
/*
 * $Id$
 *
 * This file is part of the Cloud Services Integration Platform (CSIP),
 * a Model-as-a-Service framework, API and application suite.
 *
 * 2012-2022, Olaf David and others, OMSLab, Colorado State University.
 *
 * OMSLab licenses this file to you under the MIT license.
 * See the LICENSE file in the project root for more information.
 */
package csip.cosu;

import java.util.Arrays;

/**
 * Statistics basics.
 *
 * @author od
 */
public class Statistics {

  public static final int MAXIMIZATION = 1;
  public static final int MINIMIZATION = 2;
  public static final int ABSMAXIMIZATION = 3;
  public static final int ABSMINIMIZATION = 4;


  private Statistics() {
  }


  /**
   * Normalized Vector.
   */
  public static double norm_vec(double x, double y, double z) {
    return Math.sqrt(x * x + y * y + z * z);
  }


  public static double max(double[] vals) {
    double max = vals[0];
    for (double v : vals) {
      if (v > max) {
        max = v;
      }
    }
    return max;
  }


  public static double min(double[] vals) {
    double min = vals[0];
    for (double v : vals) {
      if (v < min) {
        min = v;
      }
    }
    return min;
  }


  public static double range(double[] vals) {
    double min = vals[0];
    double max = vals[0];
    for (double v : vals) {
      if (v < min) {
        min = v;
      }
      if (v > max) {
        max = v;
      }
    }
    return max - min;
  }


  public static int length(double[] vals) {
    return vals.length;
  }


  public static double median(double[] vals) {
    return quantile(vals, 0.5);
  }


  public static double mean(double[] vals) {
    return sum(vals) / vals.length;
  }


  public static double stddev(double[] vals) {
    double mean = mean(vals);
    double squareSum = 0;
    for (double v : vals) {
      squareSum += v * v;
    }
    return Math.sqrt(squareSum / vals.length - mean * mean);
  }


  public static double stderr(double[] vals) {
//        return Math.sqrt(variance(vals) / vals.length);
    return stddev(vals) / Math.sqrt(vals.length);
  }


  public static double variance(double[] vals) {
    double stddev = stddev(vals);
    return stddev * stddev;
  }


  public static double meandev(double[] vals) {
    double mean = mean(vals);
    int size = vals.length;
    double sum = 0;
    for (int i = size; --i >= 0;) {
      sum += Math.abs(vals[i] - mean);
    }
    return sum / size;
  }


  public static double sum(double[] vals) {
    double sum = 0;
    for (double v : vals) {
      sum = sum + v;
    }
    return sum;
  }


  public static double product(double[] vals) {
    double prod = 1;
    for (double v : vals) {
      prod = prod * v;
    }
    return prod;
  }


  public static double quantile(double[] vals, double phi) {
    if (vals.length == 0) {
      return 0.0;
    }

    double[] sortedElements = Arrays.copyOf(vals, vals.length);
    Arrays.sort(sortedElements);
    int n = sortedElements.length;

    double index = phi * (n - 1);
    int lhs = (int) index;
    double delta = index - lhs;
    double result;

    if (lhs == n - 1) {
      result = sortedElements[lhs];
    } else {
      result = (1 - delta) * sortedElements[lhs] + delta * sortedElements[lhs + 1];
    }
    return result;
  }


  /**
   * Returns the lag-1 autocorrelation of a dataset;
   */
  public static double lag1(double[] vals) {
    double mean = mean(vals);
    int size = vals.length;
    double r1;
    double q = 0;
    double v = (vals[0] - mean) * (vals[0] - mean);
    for (int i = 1; i < size; i++) {
      double delta0 = (vals[i - 1] - mean);
      double delta1 = (vals[i] - mean);
      q += (delta0 * delta1 - q) / (i + 1);
      v += (delta1 * delta1 - v) / (i + 1);
    }
    r1 = q / v;
    return r1;
  }


  public static double norm_rmse(double[] obs, double[] sim, double missing) {
    double sum = 0, size = 0;
    for (int i = 0; i < obs.length; i++) {
      if (obs[i] > missing) {
        sum += obs[i];
        size++;
      }
    }

    double measuredMean = sum / size;
    int N = Math.min(obs.length, sim.length);
    double numerator = 0, denominator = 0;
    for (int i = 0; i < N; i++) {
      if (obs[i] > missing) {
        numerator += (obs[i] - sim[i]) * (obs[i] - sim[i]);
        denominator += (obs[i] - measuredMean) * (obs[i] - measuredMean);
      }
    }
    if (denominator == 0) {
      throw new RuntimeException("Error: The denominator is 0.\n"
          + "This happens if all observed values are equal to their mean.");
    }
    return Math.sqrt(numerator / denominator);
  }


  public static double err_sum(double[] obs, double[] sim) {
    double volError = 0;
    for (int i = 0; i < sim.length; i++) {
      volError += (sim[i] - obs[i]);
    }
    return volError;
  }


  /**
   * Calcs coefficients of linear regression between x, y data
   *
   * @param xData the independent data array (x)
   * @param yData the dependent data array (y)
   * @return (intercept, gradient, r2)
   */
  public static double[] linearReg(double[] xData, double[] yData) {
    sameArrayLen(xData, yData);
    double sumX = 0;
    double sumY = 0;
    double prod = 0;
    int nstat = xData.length;
    double[] regCoef = new double[3]; //(intercept, gradient, r2)

    double meanYValue = mean(yData);
    double meanXValue = mean(xData);

    //calculating regression coefficients
    for (int i = 0; i < nstat; i++) {
      sumX += (xData[i] - meanXValue) * (xData[i] - meanXValue);
      sumY += (yData[i] - meanYValue) * (yData[i] - meanYValue);
      prod += (xData[i] - meanXValue) * (yData[i] - meanYValue);
    }
    if (sumX > 0 && sumY > 0) {
      regCoef[1] = prod / sumX;  //gradient
      regCoef[0] = meanYValue - regCoef[1] * meanXValue; //intercept
      regCoef[2] = Math.pow((prod / Math.sqrt(sumX * sumY)), 2); //r2
    }
    return regCoef;
  }


  public static double intercept(double[] xData, double[] yData) {
    return linearReg(xData, yData)[0];
  }


  public static double gradient(double[] xData, double[] yData) {
    return linearReg(xData, yData)[1];
  }


  public static double r2(double[] xData, double[] yData) {
    return linearReg(xData, yData)[2];
  }


  /**
   * Generate a random number in a range.
   *
   * @param min
   * @param max
   * @return the random value in the min/max range
   */
  public static double random(double min, double max) {
    assert max > min;
    return min + Math.random() * (max - min);
  }


  /**
   * Runoff coefficient error ROCE
   *
   * @param obs
   * @param sim
   * @param precip
   * @return roce
   */
  public static double runoffCoefficientError(double[] obs, double[] sim, double[] precip) {
    sameArrayLen(sim, obs, precip);
    double mean_pred = mean(sim);
    double mean_val = mean(obs);
    double mean_ppt = mean(precip);
    double error = Math.abs((mean_pred / mean_ppt) - (mean_val / mean_ppt));
    return Math.sqrt(error);
  }


  public static double stderrReg(double[] regcoeff, double[] obs, double[] sim) {
    sameArrayLen(obs, sim);
    double sum = 0;
    for (int i = 0; i < obs.length; i++) {
      double res = sim[i] * regcoeff[1] + regcoeff[0];
      sum += (obs[i] - res) * (obs[i] - res);
    }
    return Math.sqrt(sum / (obs.length - 2));
  }


  /**
   *
   * @param obs observed data
   * @param sim simulated data
   * @return dsGrad
   */
  public static double dsGrad(double[] obs, double[] sim) {
    sameArrayLen(obs, sim);
    int dsLength = sim.length;

    double[] cumPred = new double[dsLength];
    double[] cumVali = new double[dsLength];

    double cp = 0;
    double cv = 0;
    for (int i = 0; i < dsLength; i++) {
      cp += sim[i];
      cv += obs[i];
      cumPred[i] = cp;
      cumVali[i] = cv;
    }

    //interc., grad., r?
    double[] regCoef = linearReg(cumVali, cumPred);
    return regCoef[1];
  }


  /**
   * Check if the arrays have the same length
   *
   * @param arr
   */
  private static void sameArrayLen(double[]... arr) {
    int len = arr[0].length;
    for (double[] a : arr) {
      if (a.length != len) {
        throw new IllegalArgumentException("obs and sim data have not same size (" + a.length + "/" + len + ")");
      }
    }
  }
}