CDPHE_lowFlowStats.java [src/java/cfa] Revision: e1193d329bdb60fd9f031e276c8f51759a869a3e  Date: Tue Jan 20 14:46:24 MST 2015
package cfa;

import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Calendar;

/**
* Last Updated: 20-January-2015
* @author Tyler Wible
* @since 16-December-2014
*/
public class CDPHE_lowFlowStats {
    /**
     * Calculate the CDPHE "extreme value" design flow (see DFLOW user manual) 
     * based on an 'm'-day average low flow of each year fitted to a Log-Pearson
     * Type III distribution and interpolated for a return period of 'R' years
     * @param flowData  flow data, column1 = dates (format yyyy-mm-dd), column2 = flow values
     * @param m  the number of days to average for annual low flow analysis (m-day average)
     * @param R  the desired return period of the m-day low flow (in years)
     * @param waterYearBegin  the month and day (MM-dd) of the start of the water year for this analysis
     * @return
     * @throws IOException
     * @throws ParseException 
     */
    public double CDPHE_ExtremeValue(String[][] flowData,
                                     int m,
                                     double R,
                                     String waterYearBegin) throws IOException, ParseException{
        DoubleArray doubleArray =  new DoubleArray();
        DoubleMath doubleMath = new DoubleMath();
        
        int currentYear = Integer.parseInt(flowData[0][0].substring(0,4));
        int finalYear = Integer.parseInt(flowData[flowData.length - 1][0].substring(0,4));
//        //Start by pulling the first complete water year (determine if this is necessary
//        if(flowData[0][0].compareToIgnoreCase(String.valueOf(currentYear) + "-04-01") > 0){
//            //If the start date of data is after the start of the water year, reset the 
//            //begin date to next year so that only complete water years are analyzed
//            currentYear++;
//        }
        int nextYear = currentYear + 1;
        String beginDate = String.valueOf(currentYear) + "-" + waterYearBegin;//String beginDate = String.valueOf(currentYear) + "-04-01";
        String previousDay = doubleArray.getDay(beginDate, -1);
        String endDate = String.valueOf(nextYear) + previousDay.substring(4);//String endDate = String.valueOf(nextYear) + "-03-31";
        
        //Calculate Flow statistics for each water-year in time period
        ArrayList<Double> mDay_annualNonZero = new ArrayList<Double>();
        double mDay_annual = 0;
        boolean moreYears = finalYear > nextYear;
        while(moreYears){
            //Get current water year's data and calculate it's statistics
            String[][] partialData = doubleArray.getPeriodData(flowData, beginDate, endDate);
            if(partialData.length > 0){
                
                //Calculate m-day statistics
                Object[] resultArray = doubleArray.getMdayData(partialData, m, "arithmetic");
                //ArrayList<String> average_Mday_date = (ArrayList<String>) resultArray[0];
                ArrayList<Double> average_Mday = (ArrayList<Double>) resultArray[1];

                //Calculate minimum flow and save it
                double mDay_min = doubleMath.min(average_Mday);
                if(mDay_min > 0) mDay_annualNonZero.add(mDay_min);
                mDay_annual++;
            }
            
            //Check if this is the last year
            if(finalYear >= nextYear){
                currentYear++;
                nextYear++;
                beginDate = String.valueOf(currentYear) + "-" + waterYearBegin;//beginDate = String.valueOf(currentYear) + "-04-01";
                previousDay = doubleArray.getDay(beginDate, -1);
                endDate = String.valueOf(nextYear) + previousDay.substring(4);//endDate = String.valueOf(nextYear) + "-03-31";
            }else{
                moreYears = false;
            }
        }
        
        //Convert low flows to natural-log low-flows
        ArrayList<Double> lnMday_annualNonZero = new ArrayList<Double>();
        for(int i=0; i<mDay_annualNonZero.size(); i++){
            lnMday_annualNonZero.add( Math.log(mDay_annualNonZero.get(i)) );
        }
        
        //Determine properties of natural-log low-flows
        double U = doubleMath.meanArithmetic(lnMday_annualNonZero);//Mean
        double S = doubleMath.StandardDeviationSample(lnMday_annualNonZero);//Standard Deviation
        double G = doubleMath.SkewnessSample(lnMday_annualNonZero);//Skewness
        
        //Determine fraction of m-day low flows that are zero
        double nonZeroFlows = Double.parseDouble(String.valueOf(mDay_annualNonZero.size()));
        double f0 = (mDay_annual - nonZeroFlows) / mDay_annual;
        
        //Determine cumulative probability of corresponding user-supplied return period
        double P = ((1./R) - f0) / (1 - f0);
        
        //Determine standard normal deviate from cumulative probability (Joiner and Rosenblatt, 1971)
        double Z = 4.91 * (Math.pow(P, 0.14) - Math.pow(1-P, 0.14));
        
        //Compute gamma deviate, K, using Wilson-Hilferty transformation (Loucks et al., 1981)
        double K = (2./G) * (Math.pow(1 + (G*Z/6) - (G*G/36), 3) - 1);
        
        //Compute design flow
        double designFlow = Math.exp(U + K*S);
        
        //Call file writer for outputs fo flow statistics
        designFlow = doubleMath.round(designFlow, 3);
        
        return designFlow;
    }
    /**
     * Calculate the CDPHE "biologically-based" design flow (see DFLOW user manual) 
     * which is an 'm'-day harmonic average low flow based on a certain excursion count (exceedance?), 
     * performs this calculation for the entire flow record as well as each month of the year
     * @param data_all  a String[][] of flow data, column1 = dates (format yyyy-mm-dd), column2 = flow values
     * @param m  the number of days to average for annual low flow analysis (m-day average)
     * @param R  the desired return period of the m-day low flow (in years)
     * @param clusterLength  the length of time for the definition of a 'cluster' of excursion 
     * periods (default should be 120). All excursion periods within this amount of 
     * time of each other will be counted, subject to the clusterCountMax below.
     * @param clusterCountMax  the upper count limit on how many excursion periods 
     * will be counted within an excursion cluster (default should be 5)
     * @return
     * @throws IOException
     * @throws ParseException 
     */
    public double[] CDPHE_Biological(String[][] data_all,
                                   int m,
                                   int R,
                                   int clusterLength,
                                   int clusterCountMax) throws IOException, ParseException{
        DoubleArray doubleArray =  new DoubleArray();
        DoubleMath doubleMath = new DoubleMath();
        
        //Pull data
        String[][] data_jan = doubleArray.getSeasonalData(data_all, "01-01", "01-31");
        String[][] data_feb = doubleArray.getSeasonalData(data_all, "02-01", "02-28");
        String[][] data_mar = doubleArray.getSeasonalData(data_all, "03-01", "03-31");
        String[][] data_apr = doubleArray.getSeasonalData(data_all, "04-01", "04-30");
        String[][] data_may = doubleArray.getSeasonalData(data_all, "05-01", "05-31");
        String[][] data_jun = doubleArray.getSeasonalData(data_all, "06-01", "06-30");
        String[][] data_jul = doubleArray.getSeasonalData(data_all, "07-01", "07-31");
        String[][] data_aug = doubleArray.getSeasonalData(data_all, "08-01", "08-31");
        String[][] data_sep = doubleArray.getSeasonalData(data_all, "09-01", "09-30");
        String[][] data_oct = doubleArray.getSeasonalData(data_all, "10-01", "10-31");
        String[][] data_nov = doubleArray.getSeasonalData(data_all, "11-01", "11-30");
        String[][] data_dec = doubleArray.getSeasonalData(data_all, "12-01", "12-31");
        
        //Calculate design flows
        double[] designFlows = new double[13];
        designFlows[0] = doubleMath.round(CDPHE_Biological_calcs(data_all, m, R, clusterLength, clusterCountMax), 3);   //entire record
        designFlows[1] = doubleMath.round(CDPHE_Biological_calcs(data_jan, m, R, clusterLength, clusterCountMax), 3);   //January
        designFlows[2] = doubleMath.round(CDPHE_Biological_calcs(data_feb, m, R, clusterLength, clusterCountMax), 3);   //February
        designFlows[3] = doubleMath.round(CDPHE_Biological_calcs(data_mar, m, R, clusterLength, clusterCountMax), 3);   //March
        designFlows[4] = doubleMath.round(CDPHE_Biological_calcs(data_apr, m, R, clusterLength, clusterCountMax), 3);   //April
        designFlows[5] = doubleMath.round(CDPHE_Biological_calcs(data_may, m, R, clusterLength, clusterCountMax), 3);   //May
        designFlows[6] = doubleMath.round(CDPHE_Biological_calcs(data_jun, m, R, clusterLength, clusterCountMax), 3);   //June
        designFlows[7] = doubleMath.round(CDPHE_Biological_calcs(data_jul, m, R, clusterLength, clusterCountMax), 3);   //July
        designFlows[8] = doubleMath.round(CDPHE_Biological_calcs(data_aug, m, R, clusterLength, clusterCountMax), 3);   //August
        designFlows[9] = doubleMath.round(CDPHE_Biological_calcs(data_sep, m, R, clusterLength, clusterCountMax), 3);   //September
        designFlows[10] = doubleMath.round(CDPHE_Biological_calcs(data_oct, m, R, clusterLength, clusterCountMax), 3);  //October
        designFlows[11] = doubleMath.round(CDPHE_Biological_calcs(data_nov, m, R, clusterLength, clusterCountMax), 3);  //November
        designFlows[12] = doubleMath.round(CDPHE_Biological_calcs(data_dec, m, R, clusterLength, clusterCountMax), 3);  //December
        
        return designFlows;
    }
    /**
     * Calculate the CDPHE "biologically-based" design flow (see DFLOW user manual) 
     * which is an 'm'-day harmonic average low flow based on a certain excursion count (exceedance?)
     * @param flowData  flow data, column1 = dates (format yyyy-mm-dd), column2 = flow values
     * @param m  the number of days to average for annual low flow analysis (m-day average)
     * @param R  the desired return period of the m-day low flow (in years)
     * @param clusterLength  the length of time for the definition of a 'cluster' of excursion 
     * periods (default should be 120). All excursion periods within this amount of 
     * time of each other will be counted, subject to the clusterCountMax below.
     * @param clusterCountMax  the upper count limit on how many excursion periods 
     * will be counted within an excursion cluster (default should be 5)
     * @return
     * @throws IOException
     * @throws ParseException 
     */
    private double CDPHE_Biological_calcs(String[][] flowData,
                                   int m,
                                   int R,
                                   int clusterLength,
                                   int clusterCountMax) throws IOException, ParseException{
        DoubleArray doubleArray =  new DoubleArray();
        DoubleMath doubleMath = new DoubleMath();
        
        //Calculate m-day statistics
        Object[] resultArray = doubleArray.getMdayData(flowData, m, "harmonic");
        ArrayList<String> average_Mday_date = (ArrayList<String>) resultArray[0];
        ArrayList<Double> average_Mday = (ArrayList<Double>) resultArray[1];
        
        //Get extreme-value based design flow (as a trial design flow to start searching for the biologically based one, note this still uses an arithmetic mean)
        double trialDFLOW = CDPHE_ExtremeValue(flowData, m, R, "04-01");
        double trialDFLOW_excursions = countExcursions(average_Mday_date, average_Mday, m, trialDFLOW, clusterLength, clusterCountMax);
        
        //Get allowed number of excursions
        double A = flowData.length/(R * 365.25);
        
        //Loop until the Method of False Position (Carnahan et al., 1969) converges for the limits of the design flow
        double flow_lowerBound = 0, flow_upperBound = trialDFLOW, excursions_lowerBound = 0, excursions_upperBound = trialDFLOW_excursions;
        double designFlow = -1;
        boolean convergance = false;
        while(!convergance){
            
            //Check for convergance
            if( Math.abs(flow_upperBound - flow_lowerBound) <= (0.005*flow_lowerBound) ){
                designFlow = flow_upperBound;
                convergance = true;
            }else if( Math.abs(A - excursions_lowerBound) <= (0.005*A) ){
                designFlow = flow_lowerBound;
                convergance = true;
            }else if( Math.abs(A - excursions_upperBound) <= (0.005*A) ){
                designFlow = flow_upperBound;
                convergance = true;
            }else{
                //If convergance is not met, interpolate a new trial design flow and count it's excursions
                trialDFLOW = flow_lowerBound + ( ((flow_upperBound - flow_lowerBound) * (A - excursions_lowerBound)) / (excursions_upperBound - excursions_lowerBound) );
                trialDFLOW_excursions = countExcursions(average_Mday_date, average_Mday, m, trialDFLOW, clusterLength, clusterCountMax);
                if(trialDFLOW_excursions <= A){
                    flow_lowerBound = trialDFLOW;
                    excursions_lowerBound = trialDFLOW_excursions;
                }else{
                    flow_upperBound = trialDFLOW;
                    excursions_upperBound = trialDFLOW_excursions;
                }
            }
        }
        
        //Call file writer for outputs fo flow statistics
        designFlow = doubleMath.round(designFlow, 3);
        
        return designFlow;
    }
    /**
     * Counts excursions of m-day average flows below the trialDFLOW based on an 
     * excursion cluster length (excursionClusterLength) and a maximum number of 
     * excursions counted per excursion cluster (clusterCountMax)
     * @param averageMday_date  a list of begin and end dates of the m-day average flows, format: "yyyy-mm-dd to yyyy-mm-dd"
     * @param averageMday  a list of m-day averages corresponding to the dates in the averageMday_date array (aka must be same size)
     * @param m  the m-day averaging number
     * @param trialDFLOW  trial design flow, m-day averages below this will be counted as excursions
     * @param clusterLength  the size of a cluster (default is 120 days)
     * @param clusterCountMax  the maximum number of excursions per cluster to be counted (default is 5)
     * @return 
     */
    private double countExcursions(ArrayList<String> averageMday_date, ArrayList<Double> averageMday, int m, double trialDFLOW, int clusterLength, double clusterCountMax){
        DoubleArray doubleArray = new DoubleArray();
        DoubleMath doubleMath = new DoubleMath();
        
        //Determine number of excursions periods
        ArrayList<String> periodList_dates = new ArrayList<String>();
        ArrayList<Integer> periodList_lengths = new ArrayList<Integer>();
        for(int i=0; i<averageMday.size(); i++){
            //Check if m-day average flow is below the design flow (aka the start of an excursion period)
            if(averageMday.get(i) < trialDFLOW){
                //Check if this excursion period is a new one or the extension of the previous one
                if(periodList_dates.size() > 0){
                    //Get the date of the last excursion period
                    String lastPeriodDates = periodList_dates.get(periodList_dates.size() - 1);
                    String lastPeriod_beginDate = lastPeriodDates.substring(0,10);
                    Calendar lastPeriod_endDate = doubleArray.getCalendar(lastPeriodDates.substring(lastPeriodDates.length() - 10), "daily");
                    Calendar newPeriod_beginDate = doubleArray.getCalendar(averageMday_date.get(i).substring(0,10), "daily");
                    String newPeriod_endDate = averageMday_date.get(i).substring(averageMday_date.get(i).length() - 10);
                    
                    if(lastPeriod_endDate.after(newPeriod_beginDate) || lastPeriod_endDate.equals(newPeriod_beginDate)){
                        //If the lasted entered excursion period ends 'after' the new excursion period then this is just a continuation of it
                        //(aka excursion period 1 starts on day 1 and lasts until day 4, excursion period 2 starts on day 2 and lasts until day 5,
                        // therefore the whole thing is just 1 excursion period that lasts from day 1 to day 5)
                        periodList_dates.set(periodList_dates.size() - 1, lastPeriod_beginDate + " to " + newPeriod_endDate);
                        periodList_lengths.set(periodList_dates.size() - 1, getExcursionLength(lastPeriod_beginDate, newPeriod_endDate));
                    }else{
                        //This is a new excursion period, so add it to the list
                        periodList_dates.add(averageMday_date.get(i));
                        periodList_lengths.add(m);
                    }
                    
                }else{
                    //If this is the first excursion period, add it to the list
                    periodList_dates.add(averageMday_date.get(i));
                    periodList_lengths.add(m);
                }
            }
        }
        
        //Group the excursions periods into excursion clusters
        ArrayList<String> clusterList_dates = new ArrayList<String>();
        ArrayList<Double> clusterList_lengths = new ArrayList<Double>();
        double m_double = m;
        for(int i=0; i<periodList_dates.size(); i++){
            //Get the dates of the excursion period
            String period_beginDate_String = periodList_dates.get(i).substring(0,10);
            Calendar period_beginDate = doubleArray.getCalendar(period_beginDate_String, "daily");
            //Check if this is the first excursion cluster or not
            if(clusterList_dates.size() > 0){
                //Check if this excursion period is within the cluster or not
                String clusterDates = clusterList_dates.get(clusterList_dates.size() - 1);
                Calendar cluster_endDate = doubleArray.getCalendar(clusterDates.substring(clusterDates.length() - 10), "daily");

                if(period_beginDate.before(cluster_endDate) || period_beginDate.equals(cluster_endDate)){
                    //If the lasted excursion period starts before the end of the current excursion cluster, add it's 
                    //length to the cluster length (up to the maximum cluster count limit)
                    double periodLength = periodList_lengths.get(i);
                    double clusterCount = clusterList_lengths.get(clusterList_lengths.size() - 1) + (periodLength / m_double);
                    if(clusterCount > clusterCountMax){
                        clusterList_lengths.set(clusterList_lengths.size() - 1, clusterCountMax);
                    }else{
                        clusterList_lengths.set(clusterList_lengths.size() - 1, clusterCount);
                    }
                }else{
                    //This is excursion period is in a new excursion cluster, so add it to the list
                    String clusterEndDate = doubleArray.getDay(period_beginDate_String, clusterLength);
                    double periodLength = periodList_lengths.get(i);
                    double clusterCount = periodLength / m_double;
                    if(clusterCount > clusterCountMax){
                        clusterList_lengths.add(clusterCountMax);
                    }else{
                        clusterList_lengths.add(clusterCount);
                    }
                    clusterList_dates.add(period_beginDate_String + " to " + clusterEndDate);
                }
            }else{
                //If this is the first excursion cluster, determine it's end date
                String clusterEndDate = doubleArray.getDay(period_beginDate_String, clusterLength);
                double periodLength = periodList_lengths.get(i);
                double clusterCount = periodLength / m_double;
                if(clusterCount > clusterCountMax){
                    clusterList_lengths.add(clusterCountMax);
                }else{
                    clusterList_lengths.add(clusterCount);
                }
                clusterList_dates.add(period_beginDate_String + " to " + clusterEndDate);
            }
        }
        
        //Sum the sizes of the excursion clusters
        double excursionCountTotal = doubleMath.sum(clusterList_lengths);
        return excursionCountTotal;
    }
    /**
     * Counts how many days exist between the provided begin date and end date (beginDate cannot equal endDate)
     * @param beginDate  the begin date (format yyyy-MM-dd)
     * @param endDate  the end date (format yyyy-MM-dd)
     * @return 
     */
    private int getExcursionLength(String beginDate, String endDate){
        DoubleArray doubleArray = new DoubleArray();
        
        //Determine how many days pass before "nextDay" == "endDay"
        String nextDay = doubleArray.getDay(beginDate, 1);
        int excursionLength = 2;
        while(!nextDay.equals(endDate)){
            nextDay = doubleArray.getDay(nextDay, 1);
            excursionLength++;
        }
        return excursionLength;
    }
    /**
     * Calculate the CDPHE "human health" design flow (see DFLOW user manual) 
     * which is the harmonic mean of the flows
     * @param flowData  flow data, column1 = dates (format yyyy-mm-dd), column2 = flow values
     * @return
     * @throws IOException
     * @throws ParseException 
     */
    public double CDPHE_HumanHealth(String[][] flowData) throws IOException, ParseException{
        DoubleMath doubleMath = new DoubleMath();
        
        //Pull data
        double[] flowOnlyData = new double[flowData.length];
        for(int i=0; i<flowData.length; i++){
            flowOnlyData[i] = Double.parseDouble(flowData[i][1]);
        }
        
        //Calculate design flows
        double designFlow = doubleMath.round(doubleMath.meanHarmonic(flowOnlyData), 3);
        
        return designFlow;
    }
}