ParseTimeSeries.java [src/java/org/rti/timeseries] Revision: Date:
package org.rti.timeseries;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
class ParseData {
String TSID, dataType, desc, units, missing, comment;
int numTS;
boolean flagNumTs, flagTSID, flagDataType, flagDesc, flagUnits, flagMissing, flagStart, flagEnd, verifyFirstDate, flagComment;
Date start, end;
List<Date> dateObjects;
List<Float> values;
AtomicInteger count;
public ParseData() {
numTS = -1;
TSID = "";
dataType = "default";
desc = "default";
units = "default";
missing = "default";
comment = "#";
start = null;
end = null;
dateObjects = new ArrayList<Date>();
values = new ArrayList<Float>();
count = new AtomicInteger(0);
flagNumTs = false;
flagTSID = false;
flagDataType = false;
flagDesc = false;
flagUnits = false;
flagMissing = false;
flagStart = false;
flagEnd = false;
flagComment = false;
verifyFirstDate = false;
}
public void parseFile(String fileName, String targetFile) throws Exception {
try {
Stream<String> stream = Files.lines(Paths.get(fileName));
Iterator iterator = stream.iterator();
FileChannel rwChannel = new RandomAccessFile(targetFile, "rw").getChannel();
String sample_line = "2015-10-30 10:00:00:00 0.3\n";
Stream<String> temp = Files.lines(Paths.get(fileName));
ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, sample_line.getBytes().length * temp.count());
String line;
while (iterator.hasNext()) {
line = (String) iterator.next();
//check for Comment
if (!flagComment) {
if (line.indexOf("#") != -1) {
this.comment = line;
flagComment = true;
continue;
}
}
//check for numTS
if (!flagNumTs) {
if (line.indexOf("NumTS") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("NumTS=") == 0) {
setNumTS(Integer.parseInt(line.substring("NumTS=".length(), line.length())));
flagNumTs = true;
continue;
}
}
}
//check for TSID
if (!flagTSID) {
if (line.indexOf("TSID") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("TSID=") == 0) {
setTSID((line.substring("TSID=".length(), line.length())));
flagTSID = true;
continue;
}
}
}
//check for dataType
if (!flagDataType) {
if (line.indexOf("DataType") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("DataType=") == 0) {
setDataType((line.substring("DataType=".length(), line.length())));
flagDataType = true;
continue;
}
}
}
//check for Description
if (!flagDesc) {
if (line.indexOf("Description") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("Description=") == 0) {
setDesc((line.substring("Description=".length(), line.length())));
flagDesc = true;
continue;
}
}
}
//check for units
if (!flagUnits) {
if (line.indexOf("Units") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("Units=") == 0) {
setUnits((line.substring("Units=".length(), line.length())));
flagUnits = true;
continue;
}
}
}
//check for Missing
if (!flagMissing) {
if (line.indexOf("Missing") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("Missing=") == 0) {
setMissing((line.substring("Missing=".length(), line.length())));
flagMissing = true;
continue;
}
}
}
//check for Start
if (!flagStart) {
if (line.indexOf("Start") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("Start=") == 0) {
String _start = (line.substring("Start=".length(), line.length()));
try {
start = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_start);
flagStart = true;
continue;
} catch (ParseException ex) {
rwChannel.close();
stream.close();
temp.close();
wrBuf.clear();
throw new Exception("The Start Date format is not specified correctly");
}
}
}
}
//check for End
if (!flagEnd) {
if (line.indexOf("End") != -1) {
line = line.replace(" ", "");
line = line.replace("\t", "");
if (line.indexOf("End=") == 0) {
String _end = (line.substring("End=".length(), line.length()));
try {
end = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_end);
flagEnd = true;
continue;
} catch (ParseException ex) {
rwChannel.close();
stream.close();
temp.close();
wrBuf.clear();
throw new Exception("The End Date format is not specified correctly");
}
}
}
}
if (!verifyFirstDate) {
int len = "Description".length();
if (flagComment) {
wrBuf.put((getComment() + "\n").getBytes());
}
if (flagNumTs) {
wrBuf.put((padString("Num TS", len) + " = " + getNumTS() + "\n").getBytes());
}
if (flagTSID) {
wrBuf.put((padString("TSID", len) + " = " + getTSID() + "\n").getBytes());
}
if (flagDataType) {
wrBuf.put((padString("Data Type", len) + " = " + getDataType() + "\n").getBytes());
}
if (flagDesc) {
wrBuf.put((padString("Description", len) + " = " + getDesc() + "\n").getBytes());
}
if (flagUnits) {
wrBuf.put((padString("Units", len) + " = " + getUnits() + "\n").getBytes());
}
if (flagMissing) {
wrBuf.put((padString("Missing", len) + " = " + getMissing() + "\n").getBytes());
}
if (flagStart) {
wrBuf.put((padString("Start", len) + " = " + getStartDate() + "\n").getBytes());
}
if (flagEnd) {
wrBuf.put((padString("End", len) + " = " + getEndDate() + "\n").getBytes());
}
verifyFirstDate = true;
Date verify = new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").parse(line.substring(0, "yyyy-mm-dd hh:mm:ss:SS".length()));
if (!verify.equals(start)) {
rwChannel.close();
stream.close();
temp.close();
wrBuf.clear();
throw new Exception("Start Date not equal to the first entry in the data");
}
}
if (flagStart && flagEnd && flagTSID) {
line += "\n";
wrBuf.put(line.getBytes());
} else {
rwChannel.close();
stream.close();
temp.close();
wrBuf.clear();
throw new Exception("The headers are not defined correctly. Make sure the headers are defined correctly");
}
}
rwChannel.close();
stream.close();
temp.close();
wrBuf.clear();
} catch (IOException e) {
e.printStackTrace();
}
}
protected List<Float> getValues() {
return this.values;
}
protected List<Date> getDateObjects() {
return this.dateObjects;
}
private void setNumTS(int numTS) {
this.numTS = numTS;
}
public int getNumTS() {
return numTS;
}
private void setTSID(String TSID) {
this.TSID = TSID;
}
public String getTSID() {
return this.TSID;
}
private void setDataType(String dataType) {
this.dataType = dataType;
}
public String getDataType() {
return this.dataType;
}
private void setDesc(String desc) {
this.desc = desc;
}
public String getDesc() {
return this.desc;
}
private void setUnits(String units) {
this.units = units;
}
public String getUnits() {
return this.units;
}
private void setMissing(String missing) {
this.missing = missing;
}
public String getMissing() {
return this.missing;
}
private String getStartDate() {
return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.start);
}
private String getEndDate() {
return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.end);
}
public Date getStart() {
return this.start;
}
public Date getEnd() {
return this.end;
}
public String getComment() {
return this.comment;
}
public String padString(String str, int len) {
for (int i = str.length(); i <= len; i++) {
str += " ";
}
return str;
}
}
public class ParseTimeSeries {
public static void main(String args[]) throws IOException, Exception, ParseException {
Date startTime = new Date();
ParseData parseData = new ParseData();
parseData.parseFile("./data/TimeSeries/MCAN3.NHDES.MAP.1HOUR", "textFile.txt");
Date endTime = new Date();
long timetaken = endTime.getTime() - startTime.getTime();
System.out.println("Time taken for reading and writing 1 file is " + timetaken + "ms");
System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 + "seconds");
System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 / 60 + "minutes");
System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 / 3600 + "hours");
}
}