@@ -1,62 +1,22 @@ |
package datadownload; |
|
-import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; |
-import com.gargoylesoftware.htmlunit.TextPage; |
-import com.gargoylesoftware.htmlunit.WebClient; |
-import com.gargoylesoftware.htmlunit.WebWindow; |
-import com.gargoylesoftware.htmlunit.WebWindowEvent; |
-import com.gargoylesoftware.htmlunit.WebWindowListener; |
-import com.gargoylesoftware.htmlunit.html.HtmlAnchor; |
-import com.gargoylesoftware.htmlunit.html.HtmlPage; |
-import com.gargoylesoftware.htmlunit.html.HtmlSelect; |
-import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; |
-import com.gargoylesoftware.htmlunit.html.HtmlTextArea; |
import java.io.BufferedReader; |
+import java.io.FileNotFoundException; |
import java.io.IOException; |
import java.io.InputStreamReader; |
-import java.net.MalformedURLException; |
+import java.net.SocketException; |
import java.net.URL; |
import java.net.URLConnection; |
import java.util.ArrayList; |
import java.util.Iterator; |
-import java.util.LinkedList; |
-import java.util.List; |
|
/** |
-* Last Updated: 2-November-2012 |
+* Last Updated: 3-September-2014 |
* @author Tyler Wible |
* @since 21-June-2012 |
*/ |
public class USGS_Data { |
/** |
- * Opens a web connection to USGS and returns the contents of a search for all flow data for the specific station and date range |
- * @param stationID the USGS station ID for the current station |
- * @param beginDate the user specified begin date for the station (yyyy-mm-dd format) |
- * @param endDate the user specified end date for the station (yyyy-mm-dd format) |
- * @return an ArrayList<String> containing the results of the search for flow data using the above inputs |
- * @throws IOException |
- */ |
- public ArrayList<String> DownloadFlowWebpage(String stationID, String beginDate, String endDate) throws IOException { |
- //Specify flow website from inputs |
- String flowWebsite = "http://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&begin_date=" + |
- beginDate + "&end_date=" + endDate + "&site_no=" + stationID + "&referred_module=sw"; |
- |
- //Open the provided website |
- URL webpage = new URL(flowWebsite); |
- URLConnection yc = webpage.openConnection(); |
- BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream())); |
- //Read out all of the webpage out into an ArrayList<String> |
- String inputLine; |
- ArrayList<String> pageData = new ArrayList<String>( ); |
- |
- while((inputLine = in.readLine()) != null){ |
- pageData.add(inputLine); |
- } |
- in.close(); |
- |
- return pageData; |
- } |
- /** |
* Get the flow webpage and loop through and pull out the flow data for the current station |
* @param stationID the USGS station ID for the current station |
* @param beginDate the user specified begin date for the station (yyyy-mm-dd format) |
@@ -64,10 +24,10 @@ |
* @return a String[][] containing column1 = date(yyyy-mm-dd), column2 = flowValue |
* @throws IOException |
*/ |
- public String[][] USGS_read_FDC(String stationID, String beginDate, String endDate) throws IOException{ |
+ public Object[] getUSGSflowData(String stationID, String beginDate, String endDate) throws IOException{ |
//Get the webpage of data for the USGS flow station |
ArrayList<String> webpageAll = DownloadFlowWebpage(stationID, beginDate, endDate); |
- |
+ |
//Pull out new arraylist of only the desired data from the arraylist to return as the web page result |
Iterator<String> iterate = webpageAll.iterator( ); |
ArrayList<String> textData = new ArrayList<String>(); |
@@ -105,23 +65,33 @@ |
stringArray[i][0] = currentColumns[1]; |
stringArray[i][1] = currentColumns[2]; |
} |
- |
- |
- return stringArray; |
-} |
+ |
+ //Save analysis results |
+ String start = "-1"; |
+ String end = "-1"; |
+ if(stringArray.length > 0){ |
+ start = stringArray[0][0]; |
+ end = stringArray[stringArray.length - 1][0]; |
+ } |
+ |
+ Object[] returnArray = {webpageAll, stringArray, start, end}; |
+ return returnArray; |
+ } |
/** |
- * Opens a web connection to USGS and returns the contents of a search for all peak flow data for the specific station |
+ * Opens a web connection to USGS and returns the contents of a search for all flow data for the specific station and date range |
* @param stationID the USGS station ID for the current station |
+ * @param beginDate the user specified begin date for the station (yyyy-mm-dd format) |
+ * @param endDate the user specified end date for the station (yyyy-mm-dd format) |
* @return an ArrayList<String> containing the results of the search for flow data using the above inputs |
* @throws IOException |
*/ |
- public ArrayList<String> DownloadPeakFlowWebpage(String stationID) throws IOException { |
+ public ArrayList<String> DownloadFlowWebpage(String stationID, String beginDate, String endDate) throws IOException { |
//Specify flow website from inputs |
- String peakWebsite = "http://nwis.waterdata.usgs.gov/nwis/peak?site_no=" + stationID + "&agency_cd=USGS&format=rdb"; |
- |
- |
+ String flowWebsite = "http://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&begin_date=" + |
+ beginDate + "&end_date=" + endDate + "&site_no=" + stationID + "&referred_module=sw"; |
+ |
//Open the provided website |
- URL webpage = new URL(peakWebsite); |
+ URL webpage = new URL(flowWebsite); |
URLConnection yc = webpage.openConnection(); |
BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream())); |
//Read out all of the webpage out into an ArrayList<String> |
@@ -131,6 +101,7 @@ |
while((inputLine = in.readLine()) != null){ |
pageData.add(inputLine); |
} |
+ in.close(); |
|
return pageData; |
} |
@@ -142,7 +113,7 @@ |
* @return an ArrayList<String> containing the flow results tab deliminated (stationID \t date \t flowValue) |
* @throws IOException |
*/ |
- public ArrayList<String> getUSGSPeakData(String stationID) throws IOException{ |
+ public Object[] getUSGSPeakData(String stationID, String beginDate, String endDate) throws IOException{ |
//Get peak flow data |
ArrayList<String> peakWebPage = DownloadPeakFlowWebpage(stationID); |
|
@@ -171,29 +142,269 @@ |
} |
} |
} |
+ |
+ //Convert the array into a double array and remove data not within the date range provided |
+ double[][] doubleData = convertUSGSpeakData(textData, beginDate, endDate); |
+ |
+ //Save analysis results |
+ double start = -1; |
+ double end = -1; |
+ if(doubleData.length > 0){ |
+ start = doubleData[0][0]; |
+ end = doubleData[doubleData.length - 1][0]; |
+ } |
+ |
+ Object[] returnArray = {peakWebPage, doubleData, start, end}; |
+ return returnArray; |
+ } |
+ /** |
+ * Opens a web connection to USGS and returns the contents of a search for all peak flow data for the specific station |
+ * @param stationID the USGS station ID for the current station |
+ * @return an ArrayList<String> containing the results of the search for flow data using the above inputs |
+ * @throws IOException |
+ */ |
+ public ArrayList<String> DownloadPeakFlowWebpage(String stationID) throws IOException { |
+ //Specify flow website from inputs |
+ String peakWebsite = "http://nwis.waterdata.usgs.gov/nwis/peak?site_no=" + stationID + "&agency_cd=USGS&format=rdb"; |
|
- return textData; |
+ |
+ //Open the provided website |
+ URL webpage = new URL(peakWebsite); |
+ URLConnection yc = webpage.openConnection(); |
+ BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream())); |
+ //Read out all of the webpage out into an ArrayList<String> |
+ String inputLine; |
+ ArrayList<String> pageData = new ArrayList<String>( ); |
+ |
+ while((inputLine = in.readLine()) != null){ |
+ pageData.add(inputLine); |
+ } |
+ |
+ return pageData; |
} |
/** |
- * Checks if the webpage contains the search keyword of not, usually used to find a webpage error and return a false value |
- * @param in the BufferedReader for the webpage |
- * @param keyword the String keyword that is looked for in the webpage |
- * @return returns true if the webpage contains the keyword, false otherwise |
+ * Converts the string array into a double array. First it substrings the year from the |
+ * first column (the date of the flows) and then only converts the year into a double. |
+ * Then converts the flow values into doubles. It also only keeps data within the provided date range |
+ * @param stringData The ArrayList<String> array containing dates (YYYY-mm-dd) in the first column |
+ * and flow values (cfs) in the second column |
+ * @param beginDate the user defined begin date |
+ * @param endDate the user defined end date |
+ * @return returns a double[][] array the same size as the provided string array containing |
+ * the first column of years and the second column of flow values |
+ */ |
+ public double[][] convertUSGSpeakData(ArrayList<String> stringData, String beginDate, String endDate){ |
+ String beginYear = beginDate.substring(0,4); |
+ String endYear = endDate.substring(0,4); |
+ int ctr = 0; |
+ |
+ for(int i=0; i<stringData.size(); i++){ |
+ String[] f = stringData.get(i).split("\t"); |
+ String year = f[0].substring(0,4); |
+ |
+ //Only keep flood data years within the user defined date range |
+ if(year.compareToIgnoreCase(beginYear) >=0 && year.compareToIgnoreCase(endYear) <= 0){ |
+ ctr++; |
+ } |
+ } |
+ if(ctr == 1){ |
+ System.out.println("There is " + ctr + " flood year in the current analysis"); |
+ }else{ |
+ System.out.println("There are " + ctr + " flood years in the current analysis"); |
+ } |
+ |
+ //Initialize the return array |
+ double[][] doubleData = new double[ctr][2]; |
+ ctr = 0; |
+ |
+ for(int i=0; i<stringData.size(); i++){ |
+ String[] f = stringData.get(i).split("\t"); |
+ String year = f[0].substring(0,4); |
+ |
+ //Only keep flood data years within the user defined date range |
+ if(year.compareToIgnoreCase(beginYear) >=0 && year.compareToIgnoreCase(endYear) <= 0){ |
+ //convert the strings into doubles |
+ doubleData[ctr][0] = Double.valueOf(year);//year |
+ doubleData[ctr][1] = Double.valueOf(f[1]);//flow value |
+ ctr++; |
+ } |
+ } |
+ |
+ return doubleData; |
+ } |
+ /** |
+ * Get the 15 minute flow webpage and loop through and pull out the flow data for the current station |
+ * @param stationID the USGS station ID for the current station |
+ * @param beginDate the user specified begin date for the station (yyyy-mm-dd format) |
+ * @param endDate the user specified end date for the station (yyyy-mm-dd format) |
+ * @return a String[][] containing column1 = date(yyyy-mm-dd), column2 = flowValue |
* @throws IOException |
*/ |
- public boolean getWQPage(BufferedReader in, String keyword) throws IOException{ |
- String inputLine = ""; |
+ public Object[] getUSGS15minFlowData(String stationID, String beginDate, String endDate) throws IOException{ |
+ //Artificial limit due to the current status of USGS's Instantaneous Data Archive transition to NWIS |
+ if(beginDate.compareToIgnoreCase("2007-10-01") < 0){ |
+ beginDate = "2007-10-01"; |
+ } |
+ if(endDate.compareToIgnoreCase("2007-10-01") < 0){ |
+ endDate = "2007-10-01"; |
+ } |
+ |
+ //Get the webpage of data for the USGS flow station |
+ ArrayList<String> webpageAll = Download15minFlowWebpage(stationID, beginDate, endDate); |
+ |
+ //Pull out new arraylist of only the desired data from the arraylist to return as the web page result |
+ Iterator<String> iterate = webpageAll.iterator( ); |
+ ArrayList<String> textData = new ArrayList<String>(); |
|
- boolean containsKeyword = false; |
-// System.out.println("Current webpage: \n"); |
- while((inputLine = in.readLine()) != null) { |
- if(inputLine.contains(keyword)){ |
-// System.out.println(inputLine); |
- containsKeyword = true; |
- break; |
+ while(iterate.hasNext()){ |
+ String temp_pageData = (String) iterate.next(); |
+ String[] f = temp_pageData.split("\t"); |
+ |
+ if ((f.length >= 4) && ("USGS".equals(f[0]))) { |
+ boolean Ice = f[4].equalsIgnoreCase("Ice"); |
+ boolean Ssn = f[4].equalsIgnoreCase("Ssn"); |
+ boolean Dis = f[4].equalsIgnoreCase("Dis"); |
+ boolean rat = f[4].equalsIgnoreCase("Rat"); |
+ boolean eqp = f[4].equalsIgnoreCase("Eqp"); |
+ boolean other = f[4].equalsIgnoreCase("***"); |
+ boolean blank = f[4].equalsIgnoreCase(""); |
+ if (!Ice && !Ssn && !Dis && !rat && !eqp && !other && !blank) { |
+ //Pull out only the data needed to pass between sub-functions |
+ |
+ //f[1] = StationID |
+ //f[2] = Date |
+ //f[3] = time location (ex. MDT = mountain daylight time) |
+ //f[4] = FlowValue |
+ textData.add(f[1] + "\t" + f[2] + "\t" + f[3] + "\t" + f[4]); |
+ } |
} |
} |
- return containsKeyword; |
+ |
+ //convert Array list into String[][] array (column1 = date, column2 = value) |
+ String[][] stringArray = new String[textData.size()][2]; |
+ for(int i=0; i<stringArray.length; i++){ |
+ String[] currentColumns = textData.get(i).split("\t"); |
+ //currentColumns[0] = stationID |
+ //currentColumns[1] = date (yyyy-MM-dd hh:mm) |
+ //currentColumns[2] = time location |
+ //currentColumns[3] = value |
+ stringArray[i][0] = currentColumns[1]; |
+ stringArray[i][1] = currentColumns[3]; |
+ } |
+ |
+ //Save analysis results |
+ String start = "-1"; |
+ String end = "-1"; |
+ if(stringArray.length > 0){ |
+ start = stringArray[0][0]; |
+ end = stringArray[stringArray.length - 1][0]; |
+ } |
+ |
+ Object[] returnArray = {webpageAll, stringArray, start, end}; |
+ return returnArray; |
+ } |
+ /** |
+ * Opens a web connection to USGS and returns the contents of a search for 15 minute flow data for the specific station and date range |
+ * @param stationID the USGS station ID for the current station |
+ * @param beginDate the user specified begin date for the station (yyyy-mm-dd format) |
+ * @param endDate the user specified end date for the station (yyyy-mm-dd format) |
+ * @return an ArrayList<String> containing the results of the search for 15 minute flow data using the above inputs |
+ * @throws IOException |
+ */ |
+ public ArrayList<String> Download15minFlowWebpage(String stationID, String beginDate, String endDate) throws IOException { |
+ //Specify flow website from inputs |
+ String flowWebsite = "http://nwis.waterdata.usgs.gov/nwis/uv?cb_00060=on&format=rdb&site_no=" + stationID + |
+ "&period=&begin_date=" + beginDate + "&end_date=" + endDate; |
+ //Instantaneous Data Archive (IDA) website (to be discontinued in 2015) = "http://ida.water.usgs.gov/ida/available_records.cfm?sn=" + stationID; |
+ |
+ //Open the provided website |
+ URL webpage = new URL(flowWebsite); |
+ URLConnection yc = webpage.openConnection(); |
+ BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream())); |
+ //Read out all of the webpage out into an ArrayList<String> |
+ ArrayList<String> pageData = new ArrayList<String>( ); |
+ boolean moreLines = true; |
+ while(moreLines){ |
+ try{ |
+ String inputLine = in.readLine(); |
+ if(inputLine != null){ |
+ pageData.add(inputLine); |
+ }else{ |
+ moreLines = false; |
+ } |
+ }catch(SocketException e){ |
+ //The webpage is angry for some reason so quit out |
+ moreLines = false; |
+ } |
+ } |
+ in.close(); |
+ |
+ return pageData; |
+ } |
+ /** |
+ * Get the water quality webpage and loop through and pull out the water quality data for the current station |
+ * @param stationID the USGS station ID for the current station |
+ * @return a String[][] containing column1 = date(yyyy-mm-dd), column2 = flowValue |
+ * @throws IOException |
+ * @throws InterruptedException |
+ */ |
+ public Object[] getUSGSwqData(String stationID) throws IOException, InterruptedException{ |
+ |
+ //Get the webpage of data for the USGS flow station |
+ ArrayList<String> webpageAll = DownloadWQwebpage(stationID); |
+// ArrayList<String> webpageAll = DownloadWQwebpage_HtmlUnit(stationID, wqTestCode); |
+ |
+ //Pull out new arraylist of only the desired data from the arraylist to return as the web page result |
+ Iterator<String> iterate = webpageAll.iterator( ); |
+ ArrayList<String> textData = new ArrayList<String>(); |
+ while(iterate.hasNext()){ |
+ String temp_pageData = (String) iterate.next(); |
+ String[] f = temp_pageData.split("\t"); |
+ |
+ if ( (f.length >= 15) && (f[0].equals("USGS")) ) { |
+ String WQSample_code = f[12]; |
+ String WQSample_result = f[14]; |
+ boolean A = WQSample_code.equals(""); |
+ boolean B = WQSample_result.equals(""); |
+ if (!A && !B){ |
+ //Count only the rows which contain the desired values of "agency_cd site_no sample_dt... |
+ // sample_tm sample_end_dt sample_end_tm sample_start_time_datum_cd tm_datum_rlbty_cd... |
+ // coll_ent_cd medium_cd tu_id body_part_id parm_cd remark_cd result_va" |
+ |
+ //Pull out only the data needed to pass between sub-functions |
+ //f[1] = stationID |
+ //f[2] = date |
+ //f[12] = water quality test code |
+ //f[14] = water quality test value |
+ textData.add(f[1] + "\t" + f[2] + "\t" + f[12] + "\t" + f[14]); |
+ } |
+ } |
+ } |
+ |
+ //convert Array list into String[][] array (column1 = date, column2 = value) |
+ String[][] stringArray = new String[textData.size()][3]; |
+ for(int i=0; i<textData.size(); i++){ |
+ String[] currentColumns = textData.get(i).split("\t"); |
+ //currentColumns[0] = stationID |
+ //currentColumns[1] = date |
+ //currentColumns[2] = water quality test code |
+ //currentColumns[3] = water quality test value |
+ |
+ stringArray[i][0] = currentColumns[1];//date |
+ stringArray[i][1] = currentColumns[2];//test code |
+ stringArray[i][2] = currentColumns[3];//value |
+ } |
+ |
+ //Save analysis results |
+ String start = "-1"; |
+ String end = "-1"; |
+ if(stringArray.length > 0){ |
+ start = stringArray[0][0]; |
+ end = stringArray[stringArray.length - 1][0]; |
+ } |
+ |
+ Object[] returnArray = {webpageAll, stringArray, start, end}; |
+ return returnArray; |
} |
/** |
* Opens a web connection to USGS and returns the contents of a search for all water quality data for the specific station |
@@ -234,11 +445,31 @@ |
ArrayList<String> pageData = new ArrayList<String>( ); |
while((inputLine = in.readLine()) != null){ |
pageData.add(inputLine); |
-// System.out.println(inputLine); |
} |
return pageData; |
} |
/** |
+ * Checks if the webpage contains the search keyword of not, usually used to find a webpage error and return a false value |
+ * @param in the BufferedReader for the webpage |
+ * @param keyword the String keyword that is looked for in the webpage |
+ * @return returns true if the webpage contains the keyword, false otherwise |
+ * @throws IOException |
+ */ |
+ public boolean getWQPage(BufferedReader in, String keyword) throws IOException{ |
+ String inputLine = ""; |
+ |
+ boolean containsKeyword = false; |
+// System.out.println("Current webpage: \n"); |
+ while((inputLine = in.readLine()) != null) { |
+ if(inputLine.contains(keyword)){ |
+// System.out.println(inputLine); |
+ containsKeyword = true; |
+ break; |
+ } |
+ } |
+ return containsKeyword; |
+ } |
+ /** |
* Opens a web connection to USGS and returns the contents of a search for all water quality data for the specific station |
* @param stationID the USGS station ID for the current station |
* @param wqTest the 5 digit USGS water qualiyt (WQ) test code that the user has requested for download |
@@ -246,7 +477,7 @@ |
* @throws IOException |
* @throws InterruptedException |
*/ |
- public ArrayList<String> DownloadPartialWQwebpage(String stationID, String wqTest) throws IOException, InterruptedException { |
+ public ArrayList<String> getUSGSwqData_partial(String stationID, String wqTest) throws IOException, InterruptedException { |
|
//Specify flow website from inputs |
// String WQWebsite = "http://waterdata.usgs.gov/nwis/nwisman/?site_no=" + stationID + "&agency_cd=USGS"; |
@@ -299,110 +530,6 @@ |
return pageData; |
} |
/** |
- * Opens a web connection to USGS and returns the contents of a search for all water quality data for the specific station. |
- * Note, this function uses HtmlUnit because the above DownloadWQwebpage stopped working recently |
- * @param stationID the USGS station ID for the current station |
- * @return an ArrayList<String> containing the results of the search for water quality data using the above input |
- * @throws IOException |
- */ |
- public ArrayList<String> DownloadWQwebpage_HtmlUnit(String stationID, String wqTestCode) throws IOException, InterruptedException { |
- //Specify flow website from inputs |
- String WQWebsite = "http://waterdata.usgs.gov/nwis/nwisman/?site_no=" + stationID + "&agency_cd=USGS"; |
- |
- //Create Webclient with specific properties for STORET webpage |
- final LinkedList<WebWindow> windows = new LinkedList<WebWindow>(); |
- WebClient webClient = new WebClient(); |
- webClient.setThrowExceptionOnScriptError(false); |
- webClient.addWebWindowListener(new WebWindowListener(){ |
- public void webWindowClosed(WebWindowEvent event){ |
- } |
- public void webWindowContentChanged(WebWindowEvent event){ |
- } |
- public void webWindowOpened(WebWindowEvent event){ |
- windows.add(event.getWebWindow()); |
- } |
- }); |
- //Get webpage |
- HtmlPage mainPage = null; |
- try { |
- mainPage = webClient.getPage(WQWebsite); |
- }catch (FailingHttpStatusCodeException e) { |
- e.printStackTrace(); |
- }catch (MalformedURLException e) { |
- e.printStackTrace(); |
- }catch (IOException e) { |
- e.printStackTrace(); |
- } |
- if(mainPage == null){ |
- ArrayList<String> errorMessage = new ArrayList<String>(); |
- errorMessage.add("Error: USGS_readWQData_0001\n Error retriving webpage: " + WQWebsite + ""); |
- return errorMessage; |
- } |
- |
- HtmlPage wqPage = null; |
- List<?> linkList1 = (List<?>) mainPage.getByXPath("//a[@href='/nwis/qwdata/?site_no=" + stationID + "']"); |
- try{ |
- if(linkList1.size() == 1){ |
- HtmlAnchor selectAll = (HtmlAnchor) linkList1.get(0); |
- selectAll.focus(); |
- wqPage = selectAll.click(); |
- }else{ |
- ArrayList<String> errorMessage = new ArrayList<String>(); |
- errorMessage.add("Error: USGS_readWQData_0002\n There are no " + wqTestCode + " water quality tests for station: " + stationID); |
- return errorMessage; |
- } |
- }catch(IOException e){ |
- ArrayList<String> errorMessage = new ArrayList<String>(); |
- errorMessage.add("Error: USGS_readWQData_0003\n"); |
- errorMessage.add(e.toString()); |
- System.out.println(e.toString()); |
- return errorMessage; |
- } |
- |
-// System.out.println(wqPage.asXml()); |
- //Enter parameter code for data search |
- HtmlTextArea wqCodeSearch = (HtmlTextArea) wqPage.getElementById("radio_multiple_parm_cds"); |
- //Set focus on this element to allow the webpage's javascript to check the radio button corresponding to this element |
- wqCodeSearch.focus(); |
- wqCodeSearch.setText(wqTestCode); |
- |
- |
- //Change the data format to a better format |
- HtmlSelect dataFormat = (HtmlSelect) wqPage.getElementById("qw_sample_wide"); |
- //Set focus on this element to allow the webpage's javascript to check the radio button corresponding to this element |
- dataFormat.focus(); |
- dataFormat.setSelectedAttribute("One result per row", true); |
- |
- |
- //Change the download to display in browser |
- HtmlSelect downloadType = (HtmlSelect) wqPage.getElementById("rdb_compr_id"); |
- downloadType.setSelectedAttribute("Display in browser", true); |
- |
- //Get the result page |
- TextPage resultPage = null; |
- List<?> submitList = (List<?>) wqPage.getByXPath("//input[@value='Submit']"); |
- HtmlSubmitInput submitButton = (HtmlSubmitInput) submitList.get(0); |
- resultPage = submitButton.click(); |
- |
- |
- |
- //Extract data from result page |
- String resultPageContents = resultPage.getContent(); |
- if(resultPageContents.contains("No valid parameter codes")){ |
- ArrayList<String> errorMessage = new ArrayList<String>(); |
- errorMessage.add("Error: USGS_readWQData_0004\n There are no " + wqTestCode + " water quality tests for station: " + stationID); |
- return errorMessage; |
- } |
- String[] resultPageRows = resultPageContents.split("\n"); |
- ArrayList<String> pageData = new ArrayList<String>(); |
- |
- for(int i=0; i<resultPageRows.length; i++){ |
- pageData.add(resultPageRows[i]); |
- } |
- |
- return pageData; |
- } |
- /** |
* Reduces all water quality data to just that of the requested parameter |
* @param allData all water quality data for the earlier provided date range and station ID (column1 = date, column2 = wqTestcode, column3 = value) |
* @param wqTestCode the requested water quality parameter |
@@ -438,82 +565,238 @@ |
return reducedData; |
} |
/** |
- * Merges the two arrays into a single array of returnArray.length = (array1.length + array2.length). |
- * Note that this only combines the output array of "minimizeUSGSWQdata" which is a 2 column String[][] |
- * with dates in the first column and values in the second, which matches the output format |
- * @param array1 first String[][] array to be combined column1 = dates, column2 = values |
- * @param array2 second String[][] array to be combined column1 = dates, column2 = values |
- * @return a combined array of array1 and array2 with the same number of columns and returnArray.length = (array1.length + array2.length) |
+ * @param parameterCode USGS parameter code for a specific water quality test. |
+ * @return a string with the type of units for the current test. |
*/ |
- public String[][] mergeMinimizedWQdata(String[][] array1, String[][] array2){ |
- String[][] newArray = new String[array1.length + array2.length][2]; |
+ public String getUSGSwqUnits(String parameterCode) throws IOException{ |
+ URL webpage = new URL("http://nwis.waterdata.usgs.gov/usa/nwis/pmcodes?radio_pm_search=param_group&pm_group=All+--+include+all+parameter+groups&pm_search=&casrn_search=&srsname_search=&format=rdb&show=parameter_group_nm&show=parameter_nm&show=casrn&show=srsname&show=parameter_units"); |
+ URLConnection yc = webpage.openConnection(); |
+ BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream())); |
+ String inputLine; |
+ int line_length = 0; |
+ String units = "0"; |
+ //Find the units of the specified test |
+ while ((inputLine = in.readLine()) != null) { |
+ String[] f = inputLine.split("\t"); |
+ line_length = f.length; |
+ if((line_length >= 6) && (f[0].length() == 5)){ |
+ if(f[0].equals(parameterCode)){ |
+ units = f[5]; |
+ } |
+ } |
+ } |
+ return units; |
+ } |
+ /** |
+ * @param units the units of the current USGS water quality test. |
+ * @return a double with the correct conversion factor for the units. |
+ */ |
+ public double getUSGSwqConversion(String units){ |
+ double conversion = 0; |
+ if(units.equalsIgnoreCase("#/l")){ |
+ conversion = (1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("#/m3")){ |
+ conversion = (java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("#/ml")){ |
+ conversion = (1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("MPN/100 ml")){ |
+ conversion = (1.0/100.0)*(1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("MPN/100L")){ |
+ conversion = (1.0/100.0)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("cfu/100ml")){ |
+ conversion = (1.0/100.0)*(1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("cfu/mL")){ |
+ conversion = (1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("col/mL")){ |
+ conversion = (1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("cysts/100L")){ |
+ conversion = (1.0/100.0)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("cysts/10L")){ |
+ conversion = (1.0/10.0)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("g/cm3") || units.equalsIgnoreCase("g/mL @ 20C")){ |
+ conversion = (java.lang.Math.pow(10,-6))*(1/1)*(1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("g/m3")){ |
+ conversion = (java.lang.Math.pow(10,-6))*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("mg/l") || units.equalsIgnoreCase("mg/l CaCO3") || units.equalsIgnoreCase("mg/l NH4") || |
+ units.equalsIgnoreCase("mg/l NO3") || units.equalsIgnoreCase("mg/l PO4") || units.equalsIgnoreCase("mg/l SiO2") || |
+ units.equalsIgnoreCase("mg/l as H") || units.equalsIgnoreCase("mg/l as N") || units.equalsIgnoreCase("mg/l as Na") || |
+ units.equalsIgnoreCase("mg/l as P") || units.equalsIgnoreCase("mg/l as S") || units.equalsIgnoreCase("mgC3H6O2/L")){ |
+ conversion = (java.lang.Math.pow(10,-6))*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("mg/mL @25C")){ |
+ conversion = (java.lang.Math.pow(10,-6))*(1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("ml/l")){ |
+ conversion = (1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("ng/l") || units.equalsIgnoreCase("pg/mL")){ |
+ conversion = (java.lang.Math.pow(10,-12))*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("ng/m3") || units.equalsIgnoreCase("pg/l")){ |
+ conversion = (java.lang.Math.pow(10,-12))*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("ocyst/100L")){ |
+ conversion = (1.0/100.0)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("oocyst/10L")){ |
+ conversion = (1.0/10.0)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("pfu/100L")){ |
+ conversion = (1.0/100.0)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("pfu/100ml")){ |
+ conversion = (1.0/100.0)*(1000)*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("pg/m3")){ |
+ conversion = (java.lang.Math.pow(10,-15))*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("ug/L 2,4-D") || units.equalsIgnoreCase("ug/L U3O8") || units.equalsIgnoreCase("ug/L as As") || |
+ units.equalsIgnoreCase("ug/L as Cl") || units.equalsIgnoreCase("ug/L as N") || units.equalsIgnoreCase("ug/L as P") || |
+ units.equalsIgnoreCase("ug/l") || units.equalsIgnoreCase("ugAtrazn/L")){ |
+ conversion = (java.lang.Math.pow(10,-9))*(1000)*(java.lang.Math.pow(0.3048,3))*(86400); |
+ }else if(units.equalsIgnoreCase("ug/m3")){ |
+ conversion = (java.lang.Math.pow(10,-9))*(java.lang.Math.pow(0.3048,3))*(86400); |
+ } |
+ return conversion; |
+ } |
+ /** |
+ * @param units the units of the current USGS water quality test. |
+ * @return a string with the end result units of the conversion. |
+ */ |
+ public String getUSGSwqEndUnits(String units){ |
+ String endUnits = "No Units"; |
+ if(units.equalsIgnoreCase("#/l") || units.equalsIgnoreCase("#/m3") || units.equalsIgnoreCase("#/ml")){ |
+ endUnits = "#/day"; |
+ }else if(units.equalsIgnoreCase("MPN/100 ml") || units.equalsIgnoreCase("MPN/100L")){ |
+ endUnits = "MPN/day"; |
+ }else if(units.equalsIgnoreCase("cfu/100ml") || units.equalsIgnoreCase("cfu/mL")){ |
+ endUnits = "cfu/day"; |
+ }else if(units.equalsIgnoreCase("col/mL")){ |
+ endUnits = "col/day"; |
+ }else if(units.equalsIgnoreCase("cysts/100L") || units.equalsIgnoreCase("cysts/10L")){ |
+ endUnits = "cysts/day";//= cysts/100L*cfs |
+ }else if(units.equalsIgnoreCase("mg/l") || units.equalsIgnoreCase("mg/l CaCO3") || units.equalsIgnoreCase("mg/l NH4") || |
+ units.equalsIgnoreCase("mg/l NO3") || units.equalsIgnoreCase("mg/l PO4") || units.equalsIgnoreCase("mg/l SiO2") || |
+ units.equalsIgnoreCase("mg/l as H") || units.equalsIgnoreCase("mg/l as N") || units.equalsIgnoreCase("mg/l as Na") || |
+ units.equalsIgnoreCase("mg/l as P") || units.equalsIgnoreCase("mg/l as S") || units.equalsIgnoreCase("mgC3H6O2/L") || |
+ units.equalsIgnoreCase("g/cm3") || units.equalsIgnoreCase("g/mL @ 20C") || units.equalsIgnoreCase("g/m3") || |
+ units.equalsIgnoreCase("mg/mL @25C") || units.equalsIgnoreCase("ng/l") || units.equalsIgnoreCase("pg/mL") || |
+ units.equalsIgnoreCase("ng/m3") || units.equalsIgnoreCase("pg/l") || units.equalsIgnoreCase("pg/m3") || |
+ units.equalsIgnoreCase("ug/L 2,4-D") || units.equalsIgnoreCase("ug/L U3O8") || units.equalsIgnoreCase("ug/L as As") || |
+ units.equalsIgnoreCase("ug/L as Cl") || units.equalsIgnoreCase("ug/L as N") || units.equalsIgnoreCase("ug/L as P") || |
+ units.equalsIgnoreCase("ug/l") || units.equalsIgnoreCase("ugAtrazn/L") || units.equalsIgnoreCase("ug/m3")){ |
+ endUnits = "kg/day";//= mg/l*cfs |
+ }else if(units.equalsIgnoreCase("ml/l")){ |
+ endUnits = "ml/day";//= mg/l*cfs |
+ }else if(units.equalsIgnoreCase("pfu/100L") || units.equalsIgnoreCase("pfu/100ml")){ |
+ endUnits = "pfu/day"; |
+ }else if(units.equalsIgnoreCase("ocyst/100L")){ |
+ endUnits = "ocyst/day"; |
+ }else if(units.equalsIgnoreCase("oocyst/10L")){ |
+ endUnits = "oocyst/day"; |
+ } |
+ return endUnits; |
+ } |
+ /** |
+ * Extract the USGS water quality tests of 00061 (discharge in cfs) and 30209 (discharge in cms) and combine them |
+ * with the provided flow data set within the provided date range |
+ * @param flowData existing stream flow data (column 1 = dates, column 2 = flow values) |
+ * @param allWQdata all existing stream water quality data (column 1 = dates, column 2 = flow values) |
+ * @param beginDate the start of the desired data date range |
+ * @param endDate the end of the desired data date range |
+ * @return |
+ * @throws IOException |
+ */ |
+ public String[][] getUSGSwqFlowData(String[][] flowData, String[][] allWQdata, String beginDate, String endDate) throws IOException{ |
+ DoubleArray doubleArray = new DoubleArray(); |
+ |
+ //Extract USGS water quality code 00061 for dischage in cfs |
+ String[][] WQFlow1 = minimizeUSGSWQdata(allWQdata, "00061", beginDate, endDate); |
+ //Extract USGS water quality code 30209 for discharge test in m^3/s (cms) |
+ String[][] WQFlow2 = minimizeUSGSWQdata(allWQdata, "30209", beginDate, endDate); |
|
- for(int i=0; i<newArray.length; i++){ |
- if(i<array1.length){ |
- newArray[i][0] = array1[i][0]; |
- newArray[i][1] = array1[i][1]; |
- }else{ |
- newArray[i][0] = array1[i-array1.length][0]; |
- newArray[i][1] = array1[i-array1.length][1]; |
- } |
+ //Convert the m^3 to ft^3/s |
+ for(int i=0; i<WQFlow2.length; i++){ |
+ WQFlow2[i][1] = Double.toString((Double.parseDouble(WQFlow2[i][1])*(3.2808399*3.2808399*3.2808399))); |
} |
|
- return newArray; |
+ //combine the WQ flows (cfs and the converted cms data) into a single variable to then combine with the Flowdata |
+ String[][] WQDataflows = doubleArray.mergeData(WQFlow1, WQFlow2, "public");//The "public" attribute keeps the first dataset in case of duplicates |
+ |
+ //Combine flow data and WQ flow data into a variable of dates and flow values to be sorted |
+ flowData = doubleArray.mergeData(flowData, WQDataflows, "public");//The "public" attribute keeps the first dataset in case of duplicates |
+ |
+ return flowData; |
} |
/** |
- * Get the water quality webpage and loop through and pull out the water quality data for the current station |
+ * Get the rating curve (stage-discharge relationship) data for use by the user |
* @param stationID the USGS station ID for the current station |
- * @return a String[][] containing column1 = date(yyyy-mm-dd), column2 = flowValue |
+ * @return a double[][] containing column1 = discharge(ft3/s), column2 = depth(ft) |
* @throws IOException |
- * @throws InterruptedException |
*/ |
- public String[][] USGS_read_LDC(String stationID) throws IOException, InterruptedException{ |
- |
+ public Object[] getUSGSratingCurve(String stationID) throws IOException{ |
//Get the webpage of data for the USGS flow station |
- ArrayList<String> webpageAll = DownloadWQwebpage(stationID); |
-// ArrayList<String> webpageAll = DownloadWQwebpage_HtmlUnit(stationID, wqTestCode); |
- |
+ ArrayList<String> webpageAll = DownloadRatingCurveWebpage(stationID); |
+ |
//Pull out new arraylist of only the desired data from the arraylist to return as the web page result |
Iterator<String> iterate = webpageAll.iterator( ); |
ArrayList<String> textData = new ArrayList<String>(); |
+ |
while(iterate.hasNext()){ |
String temp_pageData = (String) iterate.next(); |
String[] f = temp_pageData.split("\t"); |
- |
- if ( (f.length >= 15) && (f[0].equals("USGS")) ) { |
- String WQSample_code = f[12]; |
- String WQSample_result = f[14]; |
- boolean A = WQSample_code.equals(""); |
- boolean B = WQSample_result.equals(""); |
- if (!A && !B){ |
- //Count only the rows which contain the desired values of "agency_cd site_no sample_dt... |
- // sample_tm sample_end_dt sample_end_tm sample_start_time_datum_cd tm_datum_rlbty_cd... |
- // coll_ent_cd medium_cd tu_id body_part_id parm_cd remark_cd result_va" |
- |
- //Pull out only the data needed to pass between sub-functions |
- //f[1] = stationID |
- //f[2] = date |
- //f[12] = water quality test code |
- //f[14] = water quality test value |
- textData.add(f[1] + "\t" + f[2] + "\t" + f[12] + "\t" + f[14]); |
+ try{ |
+ String text1 = temp_pageData.substring(0,1); |
+ if((f.length >= 3) && (!"#".equals(text1))){ |
+ try{ |
+ double depth = Double.parseDouble(f[0]); |
+ double shift = Double.parseDouble(f[1]); |
+ double discharge = Double.parseDouble(f[2]); |
+ textData.add(f[0] + "\t" + f[1] + "\t" + f[2]); |
+ }catch(NumberFormatException e){ |
+ //Skip this entry and move on to the next line in the file |
+ } |
} |
+ |
+ }catch(IndexOutOfBoundsException e){ |
+ //Move on to the next line in the file |
} |
+ |
} |
|
- //convert Array list into String[][] array (column1 = date, column2 = value) |
- String[][] stringArray = new String[textData.size()][3]; |
- for(int i=0; i<textData.size(); i++){ |
+ //convert Array list into double[][] array (column1 = depth, column2 = discharge) |
+ double[][] ratingCurveData = new double[textData.size()][2]; |
+ for(int i=0; i<ratingCurveData.length; i++){ |
String[] currentColumns = textData.get(i).split("\t"); |
- //currentColumns[0] = stationID |
- //currentColumns[1] = date |
- //currentColumns[2] = water quality test code |
- //currentColumns[3] = water quality test value |
+ //currentColumns[0] = depth |
+ //currentColumns[1] = shift |
+ //currentColumns[2] = discharge |
+ ratingCurveData[i][0] = Double.parseDouble(currentColumns[2]); |
+ ratingCurveData[i][1] = Double.parseDouble(currentColumns[0]); |
+ } |
+ |
+ Object[] returnArray = {webpageAll, ratingCurveData}; |
+ return returnArray; |
+ } |
+ /** |
+ * Opens a web connection to USGS and returns the contents of a search for 15 minute flow data for the specific station and date range |
+ * @param stationID the USGS station ID for the current station |
+ * @return an ArrayList<String> containing the results of the search for 15 minute flow data using the above inputs |
+ * @throws IOException |
+ */ |
+ public ArrayList<String> DownloadRatingCurveWebpage(String stationID) throws IOException { |
+ //Specify rating curve website from inputs |
+ String ratingCurveWebsite = "http://nwis.waterdata.usgs.gov/nwisweb/data/ratings/exsa/USGS." + stationID + ".exsa.rdb"; |
+ //"http://nwis.waterdata.usgs.gov/nwisweb/data/ratings/exsa/USGS.06741510.exsa.rdb" |
+ |
+ //Open the provided website |
+ URL webpage = new URL(ratingCurveWebsite); |
+ URLConnection yc = webpage.openConnection(); |
+ ArrayList<String> pageData = new ArrayList<String>(); |
+ try{ |
+ BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream())); |
+ //Read out all of the webpage out into an ArrayList<String> |
+ String inputLine; |
|
- stringArray[i][0] = currentColumns[1];//date |
- stringArray[i][1] = currentColumns[2];//test code |
- stringArray[i][2] = currentColumns[3];//value |
+ while((inputLine = in.readLine()) != null){ |
+ pageData.add(inputLine); |
+ } |
+ in.close(); |
+ |
+ return pageData; |
+ }catch(FileNotFoundException e){ |
+ //This station has no rating curve data, so return nothing |
+ return pageData; |
} |
- |
- return stringArray; |
} |
} |
\ No newline at end of file |