WaterData_USGS.java [src/WaterData] Revision: default Date:
package WaterData;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import utils.WebPageUtils;
/**
* Last Updated: 9-April-2019
* @author Tyler Wible
* @since 21-June-2012
*/
public class WaterData_USGS implements WaterDataInterface{
public String database = "USGS";
@Override
public String getDataSourceCitation(){
//Get today's date for the source reference
DateFormat sourceDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm");
String today = sourceDateFormat.format(new Date());
//Cite USGS NWIS
String dataSource = "Stream flow data and water quality test data retrieved from the U.S. Geological Survey, National Water Information System: Web Interface. http://waterdata.usgs.gov/nwis, accessed: " + today;
return dataSource;
}
@Override
public ArrayList<String> extractFlowData_raw(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
//Specify flow website from inputs
//https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&begin_date=1900-01-01&end_date=2015-07-01&site_no=07369654&referred_module=sw
String flowUrl = "https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb" +
"&site_no=" + stationId +
"&begin_date=" + startDate +
"&end_date=" + endDate + "&referred_module=sw";
//Fetch the flow data webpage for the current USGS station
ArrayList<String> webpageAll = new ArrayList<>();
try {
webpageAll = WebPageUtils.downloadWebpage(flowUrl);
} catch (IOException ex) {
throw new WaterDataException("The was an issue extracting " + database + " flow data from the specified URl: " + flowUrl + "." + ex.getMessage());
}
return webpageAll;
}
@Override
public String[][] extractFlowData_formatted(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
//Fetch flow data
ArrayList<String> webpageAll = extractFlowData_raw(directory, orgId, stationId, startDate, endDate);
//Extract data from the result webpage
Iterator<String> iterate = webpageAll.iterator();
ArrayList<String> stationData = new ArrayList<>();
while(iterate.hasNext()){
String temp_pageData = (String) iterate.next();
String[] f = temp_pageData.split("\t");
if ((f.length >= 4) && ("USGS".equals(f[0]))) {
boolean Ice = f[3].equalsIgnoreCase("Ice");
boolean Ssn = f[3].equalsIgnoreCase("Ssn");
boolean Dis = f[3].equalsIgnoreCase("Dis");
boolean rat = f[3].equalsIgnoreCase("Rat");
boolean eqp = f[3].equalsIgnoreCase("Eqp");
boolean mnt = f[3].equalsIgnoreCase("Mnt");
boolean other = f[3].equalsIgnoreCase("***");
boolean blank = f[3].equalsIgnoreCase("");
if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
//Keep out only the desired data
//f[1] = StationID
//f[2] = Date
//f[3] = FlowValue
stationData.add(f[1] + "\t" + f[2] + "\t" + f[3]);
}
}
}
//Reformat data
String[][] returnArray = new String[stationData.size()][2];
for(int i=0; i<returnArray.length; i++){
String[] currentColumns = stationData.get(i).split("\t");
//currentColumns[0] = stationId
//currentColumns[1] = date
//currentColumns[2] = value
returnArray[i][0] = currentColumns[1];
returnArray[i][1] = currentColumns[2];
}
return returnArray;
}
@Override
public ArrayList<String> extractWaterQualityData_raw(String directory, String orgId, String stationId, String startDate, String endDate, String wqTest) throws WaterDataException {
//Specify water quality website from inputs
//String WQWebsite = "http://waterdata.usgs.gov/nwis/nwisman/?site_no=" + stationId + "&agency_cd=USGS";
String wqUrl = "https://nwis.waterdata.usgs.gov/usa/nwis/qwdata/?" +
"site_no=" + stationId +
"&begin_date=" + startDate +
"&end_date=" + endDate +
"&agency_cd=USGS&inventory_output=0&rdb_inventory_output=value&TZoutput=0&pm_cd_compare=Greater%20than&radio_parm_cds=all_parm_cds&qw_attributes=0&format=rdb&qw_sample_wide=0&rdb_qw_attributes=0&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list";
//Fetch the water quality data webpage for the current USGS station
ArrayList<String> webpageAll = new ArrayList<>();
try {
webpageAll = WebPageUtils.downloadWebpage(wqUrl);
} catch (IOException ex) {
throw new WaterDataException("The was an issue extracting " + database + " water quality data from the specified URl: " + wqUrl + "." + ex.getMessage());
}
return webpageAll;
}
@Override
public String[][] extractWaterQualityData_formatted(String directory, String orgId, String stationId, String startDate, String endDate, String wqTest) throws WaterDataException {
//Fetch water quality data
ArrayList<String> webpageAll = extractWaterQualityData_raw(directory, orgId, stationId, startDate, endDate, wqTest);
//Pull out new arraylist of only the desired data from the arraylist to return as the web page result
Iterator<String> iterate = webpageAll.iterator( );
ArrayList<String> stationData = new ArrayList<>();
while(iterate.hasNext()){
String temp_pageData = (String) iterate.next();
String[] f = temp_pageData.split("\t");
if ( (f.length >= 15) && (f[0].equals("USGS")) ) {
String WQSample_code = f[12];
String WQSample_result = f[14];
boolean A = WQSample_code.equals("");
boolean B = WQSample_result.equals("");
if (!A && !B){
//Count only the rows which contain the desired values of "agency_cd site_no sample_dt...
// sample_tm sample_end_dt sample_end_tm sample_start_time_datum_cd tm_datum_rlbty_cd...
// coll_ent_cd medium_cd tu_id body_part_id parm_cd remark_cd result_va"
//Pull out only the data needed to pass between sub-functions
//f[1] = stationId
//f[2] = date
//f[12] = water quality test code
//f[14] = water quality test value
stationData.add(f[2] + "\t" + f[12] + "\t" + f[14]);
}
}
}
//Parse out the USGS code
String wqCode = wqTest;
if(wqCode.length() > 5){//i.e. that the wqCode is not "all", which should includ everything
wqCode = wqCode.substring(0,5);
}
//Determine which data to keep: is it the requested wq test code
int ctr = 0;
for(int i=0; i<stationData.size(); i++){
String[] columns = stationData.get(i).split("\t");
if(wqCode.equalsIgnoreCase("all")){
ctr++;
}else if(columns[1].equalsIgnoreCase(wqCode)){
ctr++;
}
}
//Reformat data
String[][] returnArray = new String[ctr][2];
ctr=0;
for(int i=0; i<stationData.size(); i++){
String[] columns = stationData.get(i).split("\t");
if(wqCode.equalsIgnoreCase("all")){
returnArray[ctr][0] = columns[0];//date
returnArray[ctr][1] = columns[2];//WQ test result value
ctr++;
}else if(columns[1].equalsIgnoreCase(wqCode)){
returnArray[ctr][0] = columns[0];//date
returnArray[ctr][1] = columns[2];//WQ test result value
ctr++;
}
}
return returnArray;
}
@Override
public ArrayList<String> extractFloodData_raw(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
//Specify flow website from inputs
//https://nwis.waterdata.usgs.gov/nwis/peak?format=rdb&site_no=06764880&period=&begin_date=1990-01-01&end_date=2017-01-01&agency_cd=USGS
String peakFlowUrl = "https://nwis.waterdata.usgs.gov/nwis/peak?" +
"site_no=" + stationId +
"&period=&begin_date=" + startDate +
"&end_date=" + endDate + "&agency_cd=USGS&format=rdb";
//Get peak flow (flood) data
ArrayList<String> webpageAll;
try {
webpageAll = WebPageUtils.downloadWebpage(peakFlowUrl);
} catch (IOException ex) {
throw new WaterDataException("The was an issue extracting " + database + " peak flow (flood) data from the specified URl: " + peakFlowUrl + "." + ex.getMessage());
}
return webpageAll;
}
@Override
public double[][] extractFloodData_formatted(String directory, String orgId, String stationId, String startDate, String endDate) throws WaterDataException {
//Get peak flow (flood) data
ArrayList<String> peakWebPage = extractFloodData_raw(directory, orgId, stationId, startDate, endDate);
//Loop through and pull out the desired data
ArrayList<String> stationData = new ArrayList<>();
Iterator<String> iterate = peakWebPage.iterator( );
while(iterate.hasNext()){
String temp_pageData = (String) iterate.next();
String[] f = temp_pageData.split("\t");
if ((f.length >= 5) && ("USGS".equals(f[0]))) {
boolean Ice = f[4].equalsIgnoreCase("Ice");
boolean Ssn = f[4].equalsIgnoreCase("Ssn");
boolean Dis = f[4].equalsIgnoreCase("Dis");
boolean rat = f[4].equalsIgnoreCase("Rat");
boolean eqp = f[4].equalsIgnoreCase("Eqp");
boolean mnt = f[4].equalsIgnoreCase("Mnt");
boolean other = f[4].equalsIgnoreCase("***");
boolean blank = f[4].equalsIgnoreCase("");
if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
//Keep only the rows which contain the desired values of "USGS StationNumber Date Time FlowValue"
//However, it looks like there are 2 formats of flood data that USGS is using right now
boolean date2 = f[2].length() == 10; //length of a date string in yyyy-mm-dd format
boolean date4 = f[4].length() == 10; //length of a date string in yyyy-mm-dd format
if (f.length > 6 && date4 && !date2) {
//f[1] = stationId
//f[4] = date
//f[6] = peak flow (cfs)
stationData.add(f[1] + "\t" + f[4] + "\t" + f[6]);
} else {
//f[1] = stationId
//f[2] = date
//f[4] = peak flow (cfs)
stationData.add(f[1] + "\t" + f[2] + "\t" + f[4]);
}
}
}
}
//Reformat data
double[][] returnArray = new double[stationData.size()][2];
for(int i=0; i<returnArray.length; i++){
String[] currentColumns = stationData.get(i).split("\t");
//currentColumns[0] = stationId
//currentColumns[1] = date
//currentColumns[2] = value
returnArray[i][0] = Double.valueOf(WaterQualityInfo.getWaterYear(currentColumns[1]));
returnArray[i][1] = Double.valueOf(currentColumns[2]);
}
return returnArray;
}
@Override
public ArrayList<String> extractInstantaneousFlowData_raw(String directory, String stationId, String startDate, String endDate) throws WaterDataException {
//Artificial limit due to the current status of USGS's Instantaneous Data Archive transition to NWIS
if(startDate.compareToIgnoreCase("2007-10-01") < 0){
startDate = "2007-10-01";
}
if(endDate.compareToIgnoreCase("2007-10-01") < 0){
endDate = "2007-10-01";
}
//Specify flow website from inputs
//Instantaneous Data Archive (IDA) website (to be discontinued in 2015) = "http://ida.water.usgs.gov/ida/available_records.cfm?sn=" + stationId;
String flowUrl = "https://nwis.waterdata.usgs.gov/nwis/uv?cb_00060=on&format=rdb" +
"&site_no=" + stationId + "&period=" +
"&begin_date=" + startDate +
"&end_date=" + endDate;
//Get 15-minute (instantaneous) flow data
ArrayList<String> webpageAll;
try {
webpageAll = WebPageUtils.downloadWebpage_slowData(flowUrl);
} catch (IOException ex) {
throw new WaterDataException("The was an issue extracting " + database + " instantaneous flow (15-minute flow) data from the specified URl: " + flowUrl + "." + ex.getMessage());
}
return webpageAll;
}
@Override
public String[][] extractInstantaneousFlowData_formatted(String directory, String stationId, String startDate, String endDate) throws WaterDataException {
//Get 15-minute flow (instantaneous) data
ArrayList<String> webpageAll = extractInstantaneousFlowData_raw(directory, stationId, startDate, endDate);
//Pull out new arraylist of only the desired data from the arraylist to return as the web page result
Iterator<String> iterate = webpageAll.iterator( );
ArrayList<String> stationData = new ArrayList<>();
while(iterate.hasNext()){
String temp_pageData = (String) iterate.next();
String[] f = temp_pageData.split("\t");
if ((f.length >= 4) && ("USGS".equals(f[0]))) {
boolean Ice = f[4].equalsIgnoreCase("Ice");
boolean Ssn = f[4].equalsIgnoreCase("Ssn");
boolean Dis = f[4].equalsIgnoreCase("Dis");
boolean rat = f[4].equalsIgnoreCase("Rat");
boolean eqp = f[4].equalsIgnoreCase("Eqp");
boolean mnt = f[4].equalsIgnoreCase("Mnt");
boolean other = f[4].equalsIgnoreCase("***");
boolean blank = f[4].equalsIgnoreCase("");
if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
//Pull out only the data needed to pass between sub-functions
//f[1] = StationID
//f[2] = Date
//f[3] = time location (ex. MDT = mountain daylight time)
//f[4] = FlowValue
stationData.add(f[1] + "\t" + f[2] + "\t" + f[3] + "\t" + f[4]);
}
}
}
//Reformat data
String[][] returnArray = new String[stationData.size()][2];
for(int i=0; i<returnArray.length; i++){
String[] currentColumns = stationData.get(i).split("\t");
//currentColumns[0] = stationId
//currentColumns[1] = date (yyyy-MM-dd hh:mm)
//currentColumns[2] = time location
//currentColumns[3] = value
returnArray[i][0] = currentColumns[1];
returnArray[i][1] = currentColumns[3];
}
return returnArray;
}
@Override
public ArrayList<String> extractStageDischarge_raw(String stationId) throws WaterDataException {
//Specify stage discharge (rating curve) website from inputs
//"https://nwis.waterdata.usgs.gov/nwisweb/data/ratings/exsa/USGS.06741510.exsa.rdb"
String stageDischargeUrl = "https://nwis.waterdata.usgs.gov/nwisweb/data/ratings/exsa/USGS." + stationId + ".exsa.rdb";
//Get stage discharge (rating curve) data
ArrayList<String> webpageAll;
try {
webpageAll = WebPageUtils.downloadWebpage(stageDischargeUrl);
} catch (IOException ex) {
throw new WaterDataException("The was an issue extracting " + database + " stage discharge (rating curve) data from the specified URl: " + stageDischargeUrl + "." + ex.getMessage());
}
return webpageAll;
}
@Override
public double[][] extractStageDischarge_formatted(String stationId) throws WaterDataException {
//Get 15-minute flow (instantaneous) data
ArrayList<String> webpageAll = extractStageDischarge_raw(stationId);
//Pull out new arraylist of only the desired data from the arraylist to return as the web page result
Iterator<String> iterate = webpageAll.iterator( );
ArrayList<String> stationData = new ArrayList<>();
while(iterate.hasNext()){
String temp_pageData = (String) iterate.next();
String[] f = temp_pageData.split("\t");
try{
String text1 = temp_pageData.substring(0,1);
if((f.length >= 3) && (!"#".equals(text1))){
try{
double depth = Double.parseDouble(f[0]);
double shift = Double.parseDouble(f[1]);
double discharge = Double.parseDouble(f[2]);
stationData.add(f[0] + "\t" + f[1] + "\t" + f[2]);
}catch(NumberFormatException e){
//Skip this entry and move on to the next line in the file
}
}
}catch(IndexOutOfBoundsException e){
//There are too few columns in this row of data, move on to the next line
}
}
//Reformat data
double[][] returnArray = new double[stationData.size()][2];
for(int i=0; i<returnArray.length; i++){
String[] currentColumns = stationData.get(i).split("\t");
//currentColumns[0] = depth
//currentColumns[1] = shift
//currentColumns[2] = discharge
returnArray[i][0] = Double.parseDouble(currentColumns[2]);
returnArray[i][1] = Double.parseDouble(currentColumns[0]);
}
return returnArray;
}
// @Override
// public ArrayList<String> extractDepthData_raw(String stationId, String startDate, String endDate) throws WaterDataException {
// //Artificial limit due to the current status of USGS's Instantaneous Data Archive transition to NWIS
// if(startDate.compareToIgnoreCase("2007-10-01") < 0){
// startDate = "2007-10-01";
// }
// if(endDate.compareToIgnoreCase("2007-10-01") < 0){
// endDate = "2007-10-01";
// }
//
// //Specify flow website from inputs
// //Instantaneous Data Archive (IDA) website (to be discontinued in 2015) = "http://ida.water.usgs.gov/ida/available_records.cfm?sn=" + stationId;
// String stageUrl = "https://nwis.waterdata.usgs.gov/nwis/uv?cb_00065=on&format=rdb&site_no=" + stationId +
// "&period=&begin_date=" + startDate + "&end_date=" + endDate;
//
// //Get stage (flow depth) data
// ArrayList<String> webpageAll;
// try {
// webpageAll = downloadUsgsWebpage_slowData(stageUrl);
// } catch (IOException ex) {
// throw new WaterDataException("The was an issue extracting " + database + " instantaneous flow (15-minute flow) data from the specified URl: " + flowUrl + "." + ex.getMessage());
// }
// return webpageAll;
// }
//
// @Override
// public String[][] extractDepthData_formatted(String stationId, String startDate, String endDate) throws WaterDataException {
// //Get stage (flow depth) data
// ArrayList<String> webpageAll = extractStageData_raw(stationId, startDate, endDate);
//
// //Pull out new arraylist of only the desired data from the arraylist to return as the web page result
// Iterator<String> iterate = webpageAll.iterator( );
// ArrayList<String> textData = new ArrayList<>();
// while(iterate.hasNext()){
// String temp_pageData = (String) iterate.next();
// String[] f = temp_pageData.split("\t");
//
// if ((f.length >= 4) && ("USGS".equals(f[0]))) {
// boolean Ice = f[4].equalsIgnoreCase("Ice");
// boolean Ssn = f[4].equalsIgnoreCase("Ssn");
// boolean Dis = f[4].equalsIgnoreCase("Dis");
// boolean rat = f[4].equalsIgnoreCase("Rat");
// boolean eqp = f[4].equalsIgnoreCase("Eqp");
// boolean mnt = f[4].equalsIgnoreCase("Mnt");
// boolean other = f[4].equalsIgnoreCase("***");
// boolean blank = f[4].equalsIgnoreCase("");
// if (!Ice && !Ssn && !Dis && !rat && !eqp && !mnt && !other && !blank) {
// //Pull out only the data needed to pass between sub-functions
// //f[1] = StationID
// //f[2] = Date-Time
// //f[3] = time location (ex. MDT = mountain daylight time)
// //f[4] = stage
// textData.add(f[1] + "\t" + f[2] + "\t" + f[3] + "\t" + f[4]);
// }
// }
// }
//
// //convert Array list into String[][] array (column1 = date, column2 = value)
// String[][] returnArray = new String[textData.size()][2];
// for(int i=0; i<returnArray.length; i++){
// String[] currentColumns = textData.get(i).split("\t");
// //currentColumns[0] = stationId
// //currentColumns[1] = date (yyyy-MM-dd hh:mm)
// //currentColumns[2] = time location
// //currentColumns[3] = value
// returnArray[i][0] = currentColumns[1];
// returnArray[i][1] = currentColumns[3];
// }
// return returnArray;
// }
}