WaterData_USGS.java [src/WaterData] Revision: default  Date:
package WaterData;

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import utils.WebPageUtils;

/**
* Last Updated: 9-April-2019
* @author Tyler Wible
* @since 21-June-2012
*/
public class WaterData_USGS implements WaterDataInterface{
    public String database = "USGS";
    
    @Override
    public String getDataSourceCitation(){
        //Get today's date for the source reference
        DateFormat sourceDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm");
        String today = sourceDateFormat.format(new Date());
        
        //Cite USGS NWIS
        String dataSource = "Stream flow data and water quality test data retrieved from the U.S. Geological Survey, National Water Information System: Web Interface. http://waterdata.usgs.gov/nwis, accessed: " + today;
        return dataSource;
    }

    @Override
    public ArrayList<String> extractFlowData_raw(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
        //Specify flow website from inputs
        //https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&begin_date=1900-01-01&end_date=2015-07-01&site_no=07369654&referred_module=sw
        String flowUrl = "https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb" +
                "&site_no=" + stationId + 
                "&begin_date=" + startDate +
                "&end_date=" + endDate + "&referred_module=sw";
        
        //Fetch the flow data webpage for the current USGS station
        ArrayList<String> webpageAll = new ArrayList<>();
        try {
            webpageAll = WebPageUtils.downloadWebpage(flowUrl);
        } catch (IOException ex) {
            throw new WaterDataException("The was an issue extracting " + database + " flow data from the specified URl: " + flowUrl + "." + ex.getMessage());
        }
        return webpageAll;
    }

    @Override
    public String[][] extractFlowData_formatted(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
        //Fetch flow data
        ArrayList<String> webpageAll = extractFlowData_raw(directory, orgId, stationId, startDate, endDate);
        
        //Extract data from the result webpage
        Iterator<String> iterate = webpageAll.iterator();
        ArrayList<String> stationData = new ArrayList<>();
        while(iterate.hasNext()){
            String temp_pageData = (String) iterate.next();
            String[] f = temp_pageData.split("\t");

            if ((f.length >= 4) && ("USGS".equals(f[0]))) {
                boolean Ice = f[3].equalsIgnoreCase("Ice");
                boolean Ssn = f[3].equalsIgnoreCase("Ssn");
                boolean Dis = f[3].equalsIgnoreCase("Dis");
                boolean rat = f[3].equalsIgnoreCase("Rat");
                boolean eqp = f[3].equalsIgnoreCase("Eqp");
                boolean mnt = f[3].equalsIgnoreCase("Mnt");
                boolean other = f[3].equalsIgnoreCase("***");
                boolean blank = f[3].equalsIgnoreCase("");
                if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
                    //Keep out only the desired data
                    //f[1] = StationID
                    //f[2] = Date
                    //f[3] = FlowValue
                    stationData.add(f[1] + "\t" + f[2] + "\t" + f[3]);
                }
            }
        }
        
        //Reformat data
        String[][] returnArray = new String[stationData.size()][2];
        for(int i=0; i<returnArray.length; i++){
            String[] currentColumns = stationData.get(i).split("\t");
            //currentColumns[0] = stationId
            //currentColumns[1] = date
            //currentColumns[2] = value
            returnArray[i][0] = currentColumns[1];
            returnArray[i][1] = currentColumns[2];
        }
        return returnArray;
    }

    @Override
    public ArrayList<String> extractWaterQualityData_raw(String directory, String orgId, String stationId, String startDate, String endDate, String wqTest) throws WaterDataException {
        //Specify water quality website from inputs
        //String WQWebsite = "http://waterdata.usgs.gov/nwis/nwisman/?site_no=" + stationId + "&agency_cd=USGS";
        String wqUrl = "https://nwis.waterdata.usgs.gov/usa/nwis/qwdata/?" +
                "site_no=" + stationId +
                "&begin_date=" + startDate +
                "&end_date=" + endDate + 
                "&agency_cd=USGS&inventory_output=0&rdb_inventory_output=value&TZoutput=0&pm_cd_compare=Greater%20than&radio_parm_cds=all_parm_cds&qw_attributes=0&format=rdb&qw_sample_wide=0&rdb_qw_attributes=0&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list";
        
        //Fetch the water quality data webpage for the current USGS station
        ArrayList<String> webpageAll = new ArrayList<>();
        try {
            webpageAll = WebPageUtils.downloadWebpage(wqUrl);
        } catch (IOException ex) {
            throw new WaterDataException("The was an issue extracting " + database + " water quality data from the specified URl: " + wqUrl + "." + ex.getMessage());
        }
        return webpageAll;
    }
    
    @Override
    public String[][] extractWaterQualityData_formatted(String directory, String orgId, String stationId, String startDate, String endDate, String wqTest) throws WaterDataException {
        //Fetch water quality data
        ArrayList<String> webpageAll = extractWaterQualityData_raw(directory, orgId, stationId, startDate, endDate, wqTest);
        
        //Pull out new arraylist of only the desired data from the arraylist to return as the web page result
        Iterator<String> iterate = webpageAll.iterator( );
        ArrayList<String> stationData = new ArrayList<>();
        while(iterate.hasNext()){
            String temp_pageData = (String) iterate.next();
            String[] f = temp_pageData.split("\t");

            if ( (f.length >= 15) && (f[0].equals("USGS")) ) {
                String WQSample_code = f[12];
                String WQSample_result = f[14];
                boolean A = WQSample_code.equals("");
                boolean B = WQSample_result.equals("");
                if (!A && !B){
                    //Count only the rows which contain the desired values of "agency_cd	site_no	sample_dt...	
                    //	sample_tm	sample_end_dt	sample_end_tm	sample_start_time_datum_cd	tm_datum_rlbty_cd...	
                    //	coll_ent_cd	medium_cd	tu_id	body_part_id	parm_cd	remark_cd	result_va"

                    //Pull out only the data needed to pass between sub-functions
                    //f[1] = stationId
                    //f[2] = date
                    //f[12] = water quality test code
                    //f[14] = water quality test value
                    stationData.add(f[2] + "\t" + f[12] + "\t" + f[14]);
                }
            }
        }
        
        //Parse out the USGS code
        String wqCode = wqTest;
        if(wqCode.length() > 5){//i.e. that the wqCode is not "all", which should includ everything
            wqCode = wqCode.substring(0,5);
        }

        //Determine which data to keep: is it the requested wq test code
        int ctr = 0;
        for(int i=0; i<stationData.size(); i++){
            String[] columns = stationData.get(i).split("\t");
            if(wqCode.equalsIgnoreCase("all")){
                ctr++;
            }else if(columns[1].equalsIgnoreCase(wqCode)){
                ctr++;
            }
        }

        //Reformat data
        String[][] returnArray = new String[ctr][2];
        ctr=0;
        for(int i=0; i<stationData.size(); i++){
            String[] columns = stationData.get(i).split("\t");
            if(wqCode.equalsIgnoreCase("all")){
                returnArray[ctr][0] = columns[0];//date
                returnArray[ctr][1] = columns[2];//WQ test result value
                ctr++;
            }else if(columns[1].equalsIgnoreCase(wqCode)){
                returnArray[ctr][0] = columns[0];//date
                returnArray[ctr][1] = columns[2];//WQ test result value
                ctr++;
            }
        }
        return returnArray;
    }

    @Override
    public ArrayList<String> extractFloodData_raw(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
        //Specify flow website from inputs
        //https://nwis.waterdata.usgs.gov/nwis/peak?format=rdb&site_no=06764880&period=&begin_date=1990-01-01&end_date=2017-01-01&agency_cd=USGS
        String peakFlowUrl = "https://nwis.waterdata.usgs.gov/nwis/peak?" +
                "site_no=" + stationId +
                "&period=&begin_date=" + startDate +
                "&end_date=" + endDate + "&agency_cd=USGS&format=rdb";
        
        //Get peak flow (flood) data
        ArrayList<String> webpageAll;
        try {
            webpageAll = WebPageUtils.downloadWebpage(peakFlowUrl);
        } catch (IOException ex) {
            throw new WaterDataException("The was an issue extracting " + database + " peak flow (flood) data from the specified URl: " + peakFlowUrl + "." + ex.getMessage());
        }
        return webpageAll;
    }
    
    @Override
    public double[][] extractFloodData_formatted(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
        //Get peak flow (flood) data
        ArrayList<String> peakWebPage = extractFloodData_raw(directory, orgId, stationId, startDate, endDate);
        
        //Loop through and pull out the desired data
        ArrayList<String> stationData = new ArrayList<>();
        Iterator<String> iterate = peakWebPage.iterator( );
        while(iterate.hasNext()){
            String temp_pageData = (String) iterate.next();
            String[] f = temp_pageData.split("\t");

            if ((f.length >= 5) && ("USGS".equals(f[0]))) {
                boolean Ice = f[4].equalsIgnoreCase("Ice");
                boolean Ssn = f[4].equalsIgnoreCase("Ssn");
                boolean Dis = f[4].equalsIgnoreCase("Dis");
                boolean rat = f[4].equalsIgnoreCase("Rat");
                boolean eqp = f[4].equalsIgnoreCase("Eqp");
                boolean mnt = f[4].equalsIgnoreCase("Mnt");
                boolean other = f[4].equalsIgnoreCase("***");
                boolean blank = f[4].equalsIgnoreCase("");
                if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
                    //Keep only the rows which contain the desired values of "USGS StationNumber Date Time FlowValue"
                    //However, it looks like there are 2 formats of flood data that USGS is using right now
                    boolean date2 = f[2].length() == 10; //length of a date string in yyyy-mm-dd format
                    boolean date4 = f[4].length() == 10; //length of a date string in yyyy-mm-dd format
                    if (f.length > 6 && date4 && !date2) {
                        //f[1] = stationId
                        //f[4] = date
                        //f[6] = peak flow (cfs)
                        stationData.add(f[1] + "\t" + f[4] + "\t" + f[6]);
                    } else {
                        //f[1] = stationId
                        //f[2] = date
                        //f[4] = peak flow (cfs)
                        stationData.add(f[1] + "\t" + f[2] + "\t" + f[4]);
                    }
                }
            }
        }

        //Reformat data
        double[][] returnArray = new double[stationData.size()][2];
        for(int i=0; i<returnArray.length; i++){
            String[] currentColumns = stationData.get(i).split("\t");
            //currentColumns[0] = stationId
            //currentColumns[1] = date
            //currentColumns[2] = value
            returnArray[i][0] = Double.valueOf(WaterQualityInfo.getWaterYear(currentColumns[1]));
            returnArray[i][1] = Double.valueOf(currentColumns[2]);
        }
        return returnArray;
    }

    @Override
    public ArrayList<String> extractInstantaneousFlowData_raw(String directory, String stationId, String startDate, String endDate) throws WaterDataException {
        //Artificial limit due to the current status of USGS's Instantaneous Data Archive transition to NWIS
        if(startDate.compareToIgnoreCase("2007-10-01") < 0){
            startDate = "2007-10-01";
        }
        if(endDate.compareToIgnoreCase("2007-10-01") < 0){
            endDate = "2007-10-01";
        }
        
        //Specify flow website from inputs
        //Instantaneous Data Archive (IDA) website (to be discontinued in 2015) = "http://ida.water.usgs.gov/ida/available_records.cfm?sn=" + stationId;
        String flowUrl = "https://nwis.waterdata.usgs.gov/nwis/uv?cb_00060=on&format=rdb" +
                "&site_no=" + stationId + "&period=" +
                "&begin_date=" + startDate + 
                "&end_date=" + endDate;
        
        //Get 15-minute (instantaneous) flow data
        ArrayList<String> webpageAll;
        try {
            webpageAll = WebPageUtils.downloadWebpage_slowData(flowUrl);
        } catch (IOException ex) {
            throw new WaterDataException("The was an issue extracting " + database + " instantaneous flow (15-minute flow) data from the specified URl: " + flowUrl + "." + ex.getMessage());
        }
        return webpageAll;
    }

    @Override
    public String[][] extractInstantaneousFlowData_formatted(String directory, String stationId, String startDate, String endDate) throws WaterDataException {
        //Get 15-minute flow (instantaneous) data
        ArrayList<String> webpageAll = extractInstantaneousFlowData_raw(directory, stationId, startDate, endDate);
        
        //Pull out new arraylist of only the desired data from the arraylist to return as the web page result
        Iterator<String> iterate = webpageAll.iterator( );
        ArrayList<String> stationData = new ArrayList<>();
        while(iterate.hasNext()){
            String temp_pageData = (String) iterate.next();
            String[] f = temp_pageData.split("\t");

            if ((f.length >= 4) && ("USGS".equals(f[0]))) {
                boolean Ice = f[4].equalsIgnoreCase("Ice");
                boolean Ssn = f[4].equalsIgnoreCase("Ssn");
                boolean Dis = f[4].equalsIgnoreCase("Dis");
                boolean rat = f[4].equalsIgnoreCase("Rat");
                boolean eqp = f[4].equalsIgnoreCase("Eqp");
                boolean mnt = f[4].equalsIgnoreCase("Mnt");
                boolean other = f[4].equalsIgnoreCase("***");
                boolean blank = f[4].equalsIgnoreCase("");
                if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
                    //Pull out only the data needed to pass between sub-functions

                    //f[1] = StationID
                    //f[2] = Date
                    //f[3] = time location (ex. MDT = mountain daylight time)
                    //f[4] = FlowValue
                    stationData.add(f[1] + "\t" + f[2] + "\t" + f[3] + "\t" + f[4]);
                }
            }
        }

        //Reformat data
        String[][] returnArray = new String[stationData.size()][2];
        for(int i=0; i<returnArray.length; i++){
            String[] currentColumns = stationData.get(i).split("\t");
            //currentColumns[0] = stationId
            //currentColumns[1] = date (yyyy-MM-dd hh:mm)
            //currentColumns[2] = time location
            //currentColumns[3] = value
            returnArray[i][0] = currentColumns[1];
            returnArray[i][1] = currentColumns[3];
        }
        return returnArray;
    }

    @Override
    public ArrayList<String> extractStageDischarge_raw(String stationId) throws WaterDataException {
        //Specify stage discharge (rating curve) website from inputs
        //"https://nwis.waterdata.usgs.gov/nwisweb/data/ratings/exsa/USGS.06741510.exsa.rdb"
        String stageDischargeUrl = "https://nwis.waterdata.usgs.gov/nwisweb/data/ratings/exsa/USGS." + stationId + ".exsa.rdb";
        
        //Get stage discharge (rating curve) data
        ArrayList<String> webpageAll;
        try {
            webpageAll = WebPageUtils.downloadWebpage(stageDischargeUrl);
        } catch (IOException ex) {
            throw new WaterDataException("The was an issue extracting " + database + " stage discharge (rating curve) data from the specified URl: " + stageDischargeUrl + "." + ex.getMessage());
        }
        return webpageAll;
    }

    @Override
    public double[][] extractStageDischarge_formatted(String stationId) throws WaterDataException {
        //Get 15-minute flow (instantaneous) data
        ArrayList<String> webpageAll = extractStageDischarge_raw(stationId);
        
        //Pull out new arraylist of only the desired data from the arraylist to return as the web page result
        Iterator<String> iterate = webpageAll.iterator( );
        ArrayList<String> stationData = new ArrayList<>();
        while(iterate.hasNext()){
            String temp_pageData = (String) iterate.next();
            String[] f = temp_pageData.split("\t");
            try{
                String text1 = temp_pageData.substring(0,1);
                if((f.length >= 3) && (!"#".equals(text1))){
                    try{
                        double depth = Double.parseDouble(f[0]);
                        double shift = Double.parseDouble(f[1]);
                        double discharge = Double.parseDouble(f[2]);
                        stationData.add(f[0] + "\t" + f[1] + "\t" + f[2]);
                    }catch(NumberFormatException e){
                        //Skip this entry and move on to the next line in the file
                    }
                }
            }catch(IndexOutOfBoundsException e){
                //There are too few columns in this row of data, move on to the next line
            }
        }

        //Reformat data
        double[][] returnArray = new double[stationData.size()][2];
        for(int i=0; i<returnArray.length; i++){
            String[] currentColumns = stationData.get(i).split("\t");
            //currentColumns[0] = depth
            //currentColumns[1] = shift
            //currentColumns[2] = discharge
            returnArray[i][0] = Double.parseDouble(currentColumns[2]);
            returnArray[i][1] = Double.parseDouble(currentColumns[0]);
        }
        return returnArray;
    }
    
//    @Override
//    public ArrayList<String> extractDepthData_raw(String stationId, String startDate, String endDate) throws WaterDataException {
//        //Artificial limit due to the current status of USGS's Instantaneous Data Archive transition to NWIS
//        if(startDate.compareToIgnoreCase("2007-10-01") < 0){
//            startDate = "2007-10-01";
//        }
//        if(endDate.compareToIgnoreCase("2007-10-01") < 0){
//            endDate = "2007-10-01";
//        }
//        
//        //Specify flow website from inputs
//        //Instantaneous Data Archive (IDA) website (to be discontinued in 2015) = "http://ida.water.usgs.gov/ida/available_records.cfm?sn=" + stationId;
//        String stageUrl = "https://nwis.waterdata.usgs.gov/nwis/uv?cb_00065=on&format=rdb&site_no=" + stationId + 
//                             "&period=&begin_date=" + startDate + "&end_date=" + endDate;        
//        
//        //Get stage (flow depth) data
//        ArrayList<String> webpageAll;
//        try {
//            webpageAll = downloadUsgsWebpage_slowData(stageUrl);
//        } catch (IOException ex) {
//            throw new WaterDataException("The was an issue extracting " + database + " instantaneous flow (15-minute flow) data from the specified URl: " + flowUrl + "." + ex.getMessage());
//        }
//        return webpageAll;
//    }
//
//    @Override
//    public String[][] extractDepthData_formatted(String stationId, String startDate, String endDate) throws WaterDataException {
//        //Get stage (flow depth) data
//        ArrayList<String> webpageAll = extractStageData_raw(stationId, startDate, endDate);
//        
//        //Pull out new arraylist of only the desired data from the arraylist to return as the web page result
//        Iterator<String> iterate = webpageAll.iterator( );
//        ArrayList<String> textData = new ArrayList<>();
//        while(iterate.hasNext()){
//            String temp_pageData = (String) iterate.next();
//            String[] f = temp_pageData.split("\t");
//
//            if ((f.length >= 4) && ("USGS".equals(f[0]))) {
//                boolean Ice = f[4].equalsIgnoreCase("Ice");
//                boolean Ssn = f[4].equalsIgnoreCase("Ssn");
//                boolean Dis = f[4].equalsIgnoreCase("Dis");
//                boolean rat = f[4].equalsIgnoreCase("Rat");
//                boolean eqp = f[4].equalsIgnoreCase("Eqp");
//                boolean mnt = f[4].equalsIgnoreCase("Mnt");
//                boolean other = f[4].equalsIgnoreCase("***");
//                boolean blank = f[4].equalsIgnoreCase("");
//                if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
//                    //Pull out only the data needed to pass between sub-functions
//                    //f[1] = StationID
//                    //f[2] = Date-Time
//                    //f[3] = time location (ex. MDT = mountain daylight time)
//                    //f[4] = stage
//                    textData.add(f[1] + "\t" + f[2] + "\t" + f[3] + "\t" + f[4]);
//                }
//            }
//        }
//
//        //convert Array list into String[][] array (column1 = date, column2 = value)
//        String[][] returnArray = new String[textData.size()][2];
//        for(int i=0; i<returnArray.length; i++){
//            String[] currentColumns = textData.get(i).split("\t");
//            //currentColumns[0] = stationId
//            //currentColumns[1] = date (yyyy-MM-dd hh:mm)
//            //currentColumns[2] = time location
//            //currentColumns[3] = value
//            returnArray[i][0] = currentColumns[1];
//            returnArray[i][1] = currentColumns[3];
//        }
//        return returnArray;
//    }
}