@@ -1,3 +1,4 @@ |
+ |
package org.rti.timeseries; |
|
import java.io.IOException; |
@@ -16,327 +17,369 @@ |
import java.util.stream.Stream; |
|
class ParseData { |
- |
- String TSID, dataType, desc, units, missing, comment; |
- int numTS; |
- boolean flagNumTs, flagTSID, flagDataType, flagDesc, flagUnits, flagMissing, flagStart, flagEnd, verifyFirstDate, flagComment; |
- Date start, end; |
- List<Date> dateObjects; |
- List<Float> values; |
- AtomicInteger count; |
- public ParseData() { |
- numTS = -1; |
- TSID = ""; |
- dataType = "default"; |
- desc = "default"; |
- units = "default"; |
- missing = "default"; |
- comment = "#"; |
- start = null; |
- end = null; |
- dateObjects = new ArrayList<Date>(); |
- values = new ArrayList<Float>(); |
- count = new AtomicInteger(0); |
- |
- flagNumTs = false; |
- flagTSID = false; |
- flagDataType = false; |
- flagDesc = false; |
- flagUnits = false; |
- flagMissing = false; |
- flagStart = false; |
- flagEnd = false; |
- flagComment = false; |
- verifyFirstDate = false; |
- |
- } |
- |
- public void parseFile(String fileName, String targetFile) throws Exception { |
- try { |
- Stream<String> stream = Files.lines(Paths.get(fileName)); |
- Iterator iterator = stream.iterator(); |
- FileChannel rwChannel = new RandomAccessFile(targetFile, "rw").getChannel(); |
- String sample_line = "2015-10-30 10:00:00:00 0.3\n"; |
- Stream<String> temp = Files.lines(Paths.get(fileName)); |
- ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, sample_line.getBytes().length*temp.count()); |
- String line; |
- while (iterator.hasNext()) { |
- line = (String) iterator.next(); |
- |
- //check for Comment |
- if (!flagComment) |
- if(line.indexOf("#") != -1) { |
- this.comment = line; |
- flagComment = true; |
- continue; |
- } |
- |
- //check for numTS |
- if (!flagNumTs) |
- if(line.indexOf("NumTS") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("NumTS=") == 0) { |
- setNumTS( Integer.parseInt(line.substring("NumTS=".length(),line.length())) ); |
- flagNumTs = true; |
- continue; |
- } |
- } |
- |
- //check for TSID |
- if (!flagTSID) |
- if(line.indexOf("TSID") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("TSID=") == 0) { |
- setTSID( (line.substring("TSID=".length(),line.length())) ); |
- flagTSID =true; |
- continue; |
- } |
- } |
- |
- //check for dataType |
- if (!flagDataType) |
- if(line.indexOf("DataType") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("DataType=") == 0) { |
- setDataType( (line.substring("DataType=".length(),line.length())) ); |
- flagDataType =true; |
- continue; |
- } |
- } |
- |
- //check for Description |
- if (!flagDesc) |
- if(line.indexOf("Description") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("Description=") == 0) { |
- setDesc( (line.substring("Description=".length(),line.length())) ); |
- flagDesc =true; |
- continue; |
- } |
- } |
- |
- //check for units |
- if (!flagUnits) |
- if(line.indexOf("Units") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("Units=") == 0) { |
- setUnits( (line.substring("Units=".length(),line.length())) ); |
- flagUnits =true; |
- continue; |
- } |
- } |
- |
- //check for Missing |
- if (!flagMissing) |
- if(line.indexOf("Missing") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("Missing=") == 0) { |
- setMissing( (line.substring("Missing=".length(),line.length())) ); |
- flagMissing =true; |
- continue; |
- } |
- } |
- |
- //check for Start |
- if (!flagStart) |
- if(line.indexOf("Start") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("Start=") == 0) { |
- String _start = (line.substring("Start=".length(),line.length())); |
- try { |
- start = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_start); |
- flagStart =true; |
- continue; |
- } catch (ParseException ex) { |
- rwChannel.close(); |
- stream.close(); |
- temp.close(); |
- wrBuf.clear(); |
- throw new Exception("The Start Date format is not specified correctly"); |
- } |
- } |
- } |
- |
- //check for End |
- if (!flagEnd) |
- if(line.indexOf("End") != -1) { |
- line = line.replace(" ", ""); |
- line = line.replace("\t", ""); |
- if(line.indexOf("End=") == 0) { |
- String _end = (line.substring("End=".length(),line.length())); |
- try { |
- end = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_end); |
- flagEnd =true; |
- continue; |
- } catch (ParseException ex) { |
- rwChannel.close(); |
- stream.close(); |
- temp.close(); |
- wrBuf.clear(); |
- throw new Exception("The End Date format is not specified correctly"); |
- } |
- } |
- } |
- |
- if (!verifyFirstDate) { |
- int len = "Description".length(); |
- if (flagComment) |
- wrBuf.put( (getComment() + "\n").getBytes()); |
- if (flagNumTs) |
- wrBuf.put( (padString("Num TS", len) + " = "+ getNumTS() + "\n").getBytes() ); |
- if (flagTSID) |
- wrBuf.put( (padString("TSID", len) + " = "+ getTSID() + "\n").getBytes() ); |
- if (flagDataType) |
- wrBuf.put( (padString("Data Type", len) + " = "+ getDataType() + "\n").getBytes() ); |
- if (flagDesc) |
- wrBuf.put( (padString("Description", len) + " = "+ getDesc() + "\n").getBytes() ); |
- if (flagUnits) |
- wrBuf.put( (padString("Units", len) + " = "+ getUnits() + "\n").getBytes() ); |
- if (flagMissing) |
- wrBuf.put( (padString("Missing", len) + " = "+ getMissing() + "\n").getBytes() ); |
- if (flagStart) |
- wrBuf.put( (padString("Start", len) + " = "+ getStartDate() + "\n").getBytes() ); |
- if (flagEnd) |
- wrBuf.put( (padString("End", len) + " = "+ getEndDate() + "\n").getBytes() ); |
- verifyFirstDate = true; |
- Date verify = new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").parse(line.substring(0,"yyyy-mm-dd hh:mm:ss:SS".length())); |
- if (!verify.equals(start)) { |
- rwChannel.close(); |
- stream.close(); |
- temp.close(); |
- wrBuf.clear(); |
- throw new Exception("Start Date not equal to the first entry in the data"); |
- } |
- } |
- |
- if (flagStart && flagEnd && flagTSID) { |
- line+="\n"; |
- wrBuf.put(line.getBytes()); |
- } else { |
- rwChannel.close(); |
- stream.close(); |
- temp.close(); |
- wrBuf.clear(); |
- throw new Exception("The headers are not defined correctly. Make sure the headers are defined correctly"); |
- } |
- } |
- |
- rwChannel.close(); |
- stream.close(); |
- temp.close(); |
- wrBuf.clear(); |
- |
- } catch (IOException e) { |
- e.printStackTrace(); |
- } |
- |
- } |
- |
- protected List<Float> getValues() { |
- return this.values; |
- } |
- |
- protected List<Date> getDateObjects() { |
- return this.dateObjects; |
- } |
- |
- private void setNumTS(int numTS) { |
- this.numTS = numTS; |
- } |
- |
- public int getNumTS() { |
- return numTS; |
- } |
- |
- private void setTSID(String TSID) { |
- this.TSID = TSID; |
- } |
- |
- public String getTSID() { |
- return this.TSID; |
- } |
- |
- private void setDataType(String dataType) { |
- this.dataType = dataType; |
- } |
- |
- public String getDataType() { |
- return this.dataType; |
- } |
- |
- private void setDesc(String desc) { |
- this.desc = desc; |
- } |
- |
- public String getDesc() { |
- return this.desc; |
- } |
- |
- private void setUnits(String units) { |
- this.units = units; |
- } |
- |
- public String getUnits() { |
- return this.units; |
- } |
- |
- private void setMissing(String missing) { |
- this.missing = missing; |
- } |
- |
- public String getMissing() { |
- return this.missing; |
- } |
- |
- private String getStartDate() { |
- return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.start); |
- } |
- |
- private String getEndDate() { |
- return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.end); |
- } |
- |
- public Date getStart() { |
- return this.start; |
- } |
- |
- public Date getEnd() { |
- return this.end; |
- } |
- |
- public String getComment() { |
- return this.comment; |
- } |
- |
- public String padString(String str, int len) { |
- for( int i = str.length() ; i <= len; i++) { |
- str+=" "; |
- } |
- return str; |
- } |
- |
+ |
+ String TSID, dataType, desc, units, missing, comment; |
+ int numTS; |
+ boolean flagNumTs, flagTSID, flagDataType, flagDesc, flagUnits, flagMissing, flagStart, flagEnd, verifyFirstDate, flagComment; |
+ Date start, end; |
+ List<Date> dateObjects; |
+ List<Float> values; |
+ AtomicInteger count; |
+ |
+ |
+ public ParseData() { |
+ numTS = -1; |
+ TSID = ""; |
+ dataType = "default"; |
+ desc = "default"; |
+ units = "default"; |
+ missing = "default"; |
+ comment = "#"; |
+ start = null; |
+ end = null; |
+ dateObjects = new ArrayList<Date>(); |
+ values = new ArrayList<Float>(); |
+ count = new AtomicInteger(0); |
+ |
+ flagNumTs = false; |
+ flagTSID = false; |
+ flagDataType = false; |
+ flagDesc = false; |
+ flagUnits = false; |
+ flagMissing = false; |
+ flagStart = false; |
+ flagEnd = false; |
+ flagComment = false; |
+ verifyFirstDate = false; |
+ |
+ } |
+ |
+ |
+ public void parseFile(String fileName, String targetFile) throws Exception { |
+ try { |
+ Stream<String> stream = Files.lines(Paths.get(fileName)); |
+ Iterator iterator = stream.iterator(); |
+ |
+ FileChannel rwChannel = new RandomAccessFile(targetFile, "rw").getChannel(); |
+ String sample_line = "2015-10-30 10:00:00:00 0.3\n"; |
+ Stream<String> temp = Files.lines(Paths.get(fileName)); |
+ ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, sample_line.getBytes().length * temp.count()); |
+ |
+ String line; |
+ while (iterator.hasNext()) { |
+ line = (String) iterator.next(); |
+ |
+ //check for Comment |
+ if (!flagComment) { |
+ if (line.indexOf("#") != -1) { |
+ this.comment = line; |
+ flagComment = true; |
+ continue; |
+ } |
+ } |
+ |
+ //check for numTS |
+ if (!flagNumTs) { |
+ if (line.indexOf("NumTS") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("NumTS=") == 0) { |
+ setNumTS(Integer.parseInt(line.substring("NumTS=".length(), line.length()))); |
+ flagNumTs = true; |
+ continue; |
+ } |
+ } |
+ } |
+ |
+ //check for TSID |
+ if (!flagTSID) { |
+ if (line.indexOf("TSID") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("TSID=") == 0) { |
+ setTSID((line.substring("TSID=".length(), line.length()))); |
+ flagTSID = true; |
+ continue; |
+ } |
+ } |
+ } |
+ |
+ //check for dataType |
+ if (!flagDataType) { |
+ if (line.indexOf("DataType") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("DataType=") == 0) { |
+ setDataType((line.substring("DataType=".length(), line.length()))); |
+ flagDataType = true; |
+ continue; |
+ } |
+ } |
+ } |
+ |
+ //check for Description |
+ if (!flagDesc) { |
+ if (line.indexOf("Description") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("Description=") == 0) { |
+ setDesc((line.substring("Description=".length(), line.length()))); |
+ flagDesc = true; |
+ continue; |
+ } |
+ } |
+ } |
+ |
+ //check for units |
+ if (!flagUnits) { |
+ if (line.indexOf("Units") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("Units=") == 0) { |
+ setUnits((line.substring("Units=".length(), line.length()))); |
+ flagUnits = true; |
+ continue; |
+ } |
+ } |
+ } |
+ |
+ //check for Missing |
+ if (!flagMissing) { |
+ if (line.indexOf("Missing") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("Missing=") == 0) { |
+ setMissing((line.substring("Missing=".length(), line.length()))); |
+ flagMissing = true; |
+ continue; |
+ } |
+ } |
+ } |
+ |
+ //check for Start |
+ if (!flagStart) { |
+ if (line.indexOf("Start") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("Start=") == 0) { |
+ String _start = (line.substring("Start=".length(), line.length())); |
+ try { |
+ start = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_start); |
+ flagStart = true; |
+ continue; |
+ } catch (ParseException ex) { |
+ rwChannel.close(); |
+ stream.close(); |
+ temp.close(); |
+ wrBuf.clear(); |
+ throw new Exception("The Start Date format is not specified correctly"); |
+ } |
+ } |
+ } |
+ } |
+ |
+ //check for End |
+ if (!flagEnd) { |
+ if (line.indexOf("End") != -1) { |
+ line = line.replace(" ", ""); |
+ line = line.replace("\t", ""); |
+ if (line.indexOf("End=") == 0) { |
+ String _end = (line.substring("End=".length(), line.length())); |
+ try { |
+ end = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_end); |
+ flagEnd = true; |
+ continue; |
+ } catch (ParseException ex) { |
+ rwChannel.close(); |
+ stream.close(); |
+ temp.close(); |
+ wrBuf.clear(); |
+ throw new Exception("The End Date format is not specified correctly"); |
+ } |
+ } |
+ } |
+ } |
+ |
+ if (!verifyFirstDate) { |
+ int len = "Description".length(); |
+ if (flagComment) { |
+ wrBuf.put((getComment() + "\n").getBytes()); |
+ } |
+ if (flagNumTs) { |
+ wrBuf.put((padString("Num TS", len) + " = " + getNumTS() + "\n").getBytes()); |
+ } |
+ if (flagTSID) { |
+ wrBuf.put((padString("TSID", len) + " = " + getTSID() + "\n").getBytes()); |
+ } |
+ if (flagDataType) { |
+ wrBuf.put((padString("Data Type", len) + " = " + getDataType() + "\n").getBytes()); |
+ } |
+ if (flagDesc) { |
+ wrBuf.put((padString("Description", len) + " = " + getDesc() + "\n").getBytes()); |
+ } |
+ if (flagUnits) { |
+ wrBuf.put((padString("Units", len) + " = " + getUnits() + "\n").getBytes()); |
+ } |
+ if (flagMissing) { |
+ wrBuf.put((padString("Missing", len) + " = " + getMissing() + "\n").getBytes()); |
+ } |
+ if (flagStart) { |
+ wrBuf.put((padString("Start", len) + " = " + getStartDate() + "\n").getBytes()); |
+ } |
+ if (flagEnd) { |
+ wrBuf.put((padString("End", len) + " = " + getEndDate() + "\n").getBytes()); |
+ } |
+ verifyFirstDate = true; |
+ Date verify = new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").parse(line.substring(0, "yyyy-mm-dd hh:mm:ss:SS".length())); |
+ if (!verify.equals(start)) { |
+ rwChannel.close(); |
+ stream.close(); |
+ temp.close(); |
+ wrBuf.clear(); |
+ throw new Exception("Start Date not equal to the first entry in the data"); |
+ } |
+ } |
+ |
+ if (flagStart && flagEnd && flagTSID) { |
+ line += "\n"; |
+ wrBuf.put(line.getBytes()); |
+ } else { |
+ rwChannel.close(); |
+ stream.close(); |
+ temp.close(); |
+ wrBuf.clear(); |
+ throw new Exception("The headers are not defined correctly. Make sure the headers are defined correctly"); |
+ } |
+ } |
+ |
+ rwChannel.close(); |
+ stream.close(); |
+ temp.close(); |
+ wrBuf.clear(); |
+ |
+ } catch (IOException e) { |
+ e.printStackTrace(); |
+ } |
+ |
+ } |
+ |
+ |
+ protected List<Float> getValues() { |
+ return this.values; |
+ } |
+ |
+ |
+ protected List<Date> getDateObjects() { |
+ return this.dateObjects; |
+ } |
+ |
+ |
+ private void setNumTS(int numTS) { |
+ this.numTS = numTS; |
+ } |
+ |
+ |
+ public int getNumTS() { |
+ return numTS; |
+ } |
+ |
+ |
+ private void setTSID(String TSID) { |
+ this.TSID = TSID; |
+ } |
+ |
+ |
+ public String getTSID() { |
+ return this.TSID; |
+ } |
+ |
+ |
+ private void setDataType(String dataType) { |
+ this.dataType = dataType; |
+ } |
+ |
+ |
+ public String getDataType() { |
+ return this.dataType; |
+ } |
+ |
+ |
+ private void setDesc(String desc) { |
+ this.desc = desc; |
+ } |
+ |
+ |
+ public String getDesc() { |
+ return this.desc; |
+ } |
+ |
+ |
+ private void setUnits(String units) { |
+ this.units = units; |
+ } |
+ |
+ |
+ public String getUnits() { |
+ return this.units; |
+ } |
+ |
+ |
+ private void setMissing(String missing) { |
+ this.missing = missing; |
+ } |
+ |
+ |
+ public String getMissing() { |
+ return this.missing; |
+ } |
+ |
+ |
+ private String getStartDate() { |
+ return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.start); |
+ } |
+ |
+ |
+ private String getEndDate() { |
+ return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.end); |
+ } |
+ |
+ |
+ public Date getStart() { |
+ return this.start; |
+ } |
+ |
+ |
+ public Date getEnd() { |
+ return this.end; |
+ } |
+ |
+ |
+ public String getComment() { |
+ return this.comment; |
+ } |
+ |
+ |
+ public String padString(String str, int len) { |
+ for (int i = str.length(); i <= len; i++) { |
+ str += " "; |
+ } |
+ return str; |
+ } |
+ |
} |
|
-public class ParseTimeSeries{ |
- |
- public static void main(String args[])throws IOException, Exception, ParseException { |
- Date startTime = new Date(); |
- ParseData parseData = new ParseData(); |
- parseData.parseFile("./data/TimeSeries/MCAN3.NHDES.MAP.1HOUR", "textFile.txt"); |
- Date endTime = new Date(); |
- long timetaken = endTime.getTime() - startTime.getTime(); |
- System.out.println("Time taken for reading and writing 1 file is " + timetaken + "ms" ); |
- System.out.println("Time taken for reading and writing 800,000 files is " + timetaken*80 + "seconds"); |
- System.out.println("Time taken for reading and writing 800,000 files is " + timetaken*80/60 + "minutes"); |
- System.out.println("Time taken for reading and writing 800,000 files is " + timetaken*80/3600 + "hours"); |
- |
- |
- } |
+public class ParseTimeSeries { |
+ |
+ public static void main(String args[]) throws IOException, Exception, ParseException { |
+ Date startTime = new Date(); |
+ ParseData parseData = new ParseData(); |
+ parseData.parseFile("./data/TimeSeries/MCAN3.NHDES.MAP.1HOUR", "textFile.txt"); |
+ Date endTime = new Date(); |
+ long timetaken = endTime.getTime() - startTime.getTime(); |
+ System.out.println("Time taken for reading and writing 1 file is " + timetaken + "ms"); |
+ System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 + "seconds"); |
+ System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 / 60 + "minutes"); |
+ System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 / 3600 + "hours"); |
+ |
+ } |
} |