ParseTimeSeries.java [src/java/org/rti/timeseries] Revision:   Date:
package org.rti.timeseries;

import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;

class ParseData {

  String TSID, dataType, desc, units, missing, comment;
  int numTS;
  boolean flagNumTs, flagTSID, flagDataType, flagDesc, flagUnits, flagMissing, flagStart, flagEnd, verifyFirstDate, flagComment;
  Date start, end;
  List<Date> dateObjects;
  List<Float> values;
  AtomicInteger count;


  public ParseData() {
    numTS = -1;
    TSID = "";
    dataType = "default";
    desc = "default";
    units = "default";
    missing = "default";
    comment = "#";
    start = null;
    end = null;
    dateObjects = new ArrayList<Date>();
    values = new ArrayList<Float>();
    count = new AtomicInteger(0);

    flagNumTs = false;
    flagTSID = false;
    flagDataType = false;
    flagDesc = false;
    flagUnits = false;
    flagMissing = false;
    flagStart = false;
    flagEnd = false;
    flagComment = false;
    verifyFirstDate = false;

  }


  public void parseFile(String fileName, String targetFile) throws Exception {
    try {
      Stream<String> stream = Files.lines(Paths.get(fileName));
      Iterator iterator = stream.iterator();

      FileChannel rwChannel = new RandomAccessFile(targetFile, "rw").getChannel();
      String sample_line = "2015-10-30 10:00:00:00 0.3\n";
      Stream<String> temp = Files.lines(Paths.get(fileName));
      ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, sample_line.getBytes().length * temp.count());
      
      String line;
      while (iterator.hasNext()) {
        line = (String) iterator.next();

        //check for Comment
        if (!flagComment) {
          if (line.indexOf("#") != -1) {
            this.comment = line;
            flagComment = true;
            continue;
          }
        }

        //check for numTS
        if (!flagNumTs) {
          if (line.indexOf("NumTS") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("NumTS=") == 0) {
              setNumTS(Integer.parseInt(line.substring("NumTS=".length(), line.length())));
              flagNumTs = true;
              continue;
            }
          }
        }

        //check for TSID
        if (!flagTSID) {
          if (line.indexOf("TSID") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("TSID=") == 0) {
              setTSID((line.substring("TSID=".length(), line.length())));
              flagTSID = true;
              continue;
            }
          }
        }

        //check for dataType
        if (!flagDataType) {
          if (line.indexOf("DataType") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("DataType=") == 0) {
              setDataType((line.substring("DataType=".length(), line.length())));
              flagDataType = true;
              continue;
            }
          }
        }

        //check for Description
        if (!flagDesc) {
          if (line.indexOf("Description") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("Description=") == 0) {
              setDesc((line.substring("Description=".length(), line.length())));
              flagDesc = true;
              continue;
            }
          }
        }

        //check for units
        if (!flagUnits) {
          if (line.indexOf("Units") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("Units=") == 0) {
              setUnits((line.substring("Units=".length(), line.length())));
              flagUnits = true;
              continue;
            }
          }
        }

        //check for Missing
        if (!flagMissing) {
          if (line.indexOf("Missing") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("Missing=") == 0) {
              setMissing((line.substring("Missing=".length(), line.length())));
              flagMissing = true;
              continue;
            }
          }
        }

        //check for Start
        if (!flagStart) {
          if (line.indexOf("Start") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("Start=") == 0) {
              String _start = (line.substring("Start=".length(), line.length()));
              try {
                start = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_start);
                flagStart = true;
                continue;
              } catch (ParseException ex) {
                rwChannel.close();
                stream.close();
                temp.close();
                wrBuf.clear();
                throw new Exception("The Start Date format is not specified correctly");
              }
            }
          }
        }

        //check for End
        if (!flagEnd) {
          if (line.indexOf("End") != -1) {
            line = line.replace(" ", "");
            line = line.replace("\t", "");
            if (line.indexOf("End=") == 0) {
              String _end = (line.substring("End=".length(), line.length()));
              try {
                end = new SimpleDateFormat("yyyy-mm-ddhh:mm:ss:SS").parse(_end);
                flagEnd = true;
                continue;
              } catch (ParseException ex) {
                rwChannel.close();
                stream.close();
                temp.close();
                wrBuf.clear();
                throw new Exception("The End Date format is not specified correctly");
              }
            }
          }
        }

        if (!verifyFirstDate) {
          int len = "Description".length();
          if (flagComment) {
            wrBuf.put((getComment() + "\n").getBytes());
          }
          if (flagNumTs) {
            wrBuf.put((padString("Num TS", len) + " = " + getNumTS() + "\n").getBytes());
          }
          if (flagTSID) {
            wrBuf.put((padString("TSID", len) + " = " + getTSID() + "\n").getBytes());
          }
          if (flagDataType) {
            wrBuf.put((padString("Data Type", len) + " = " + getDataType() + "\n").getBytes());
          }
          if (flagDesc) {
            wrBuf.put((padString("Description", len) + " = " + getDesc() + "\n").getBytes());
          }
          if (flagUnits) {
            wrBuf.put((padString("Units", len) + " = " + getUnits() + "\n").getBytes());
          }
          if (flagMissing) {
            wrBuf.put((padString("Missing", len) + " = " + getMissing() + "\n").getBytes());
          }
          if (flagStart) {
            wrBuf.put((padString("Start", len) + " = " + getStartDate() + "\n").getBytes());
          }
          if (flagEnd) {
            wrBuf.put((padString("End", len) + " = " + getEndDate() + "\n").getBytes());
          }
          verifyFirstDate = true;
          Date verify = new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").parse(line.substring(0, "yyyy-mm-dd hh:mm:ss:SS".length()));
          if (!verify.equals(start)) {
            rwChannel.close();
            stream.close();
            temp.close();
            wrBuf.clear();
            throw new Exception("Start Date not equal to the first entry in the data");
          }
        }

        if (flagStart && flagEnd && flagTSID) {
          line += "\n";
          wrBuf.put(line.getBytes());
        } else {
          rwChannel.close();
          stream.close();
          temp.close();
          wrBuf.clear();
          throw new Exception("The headers are not defined correctly. Make sure the headers are defined correctly");
        }
      }

      rwChannel.close();
      stream.close();
      temp.close();
      wrBuf.clear();

    } catch (IOException e) {
      e.printStackTrace();
    }

  }


  protected List<Float> getValues() {
    return this.values;
  }


  protected List<Date> getDateObjects() {
    return this.dateObjects;
  }


  private void setNumTS(int numTS) {
    this.numTS = numTS;
  }


  public int getNumTS() {
    return numTS;
  }


  private void setTSID(String TSID) {
    this.TSID = TSID;
  }


  public String getTSID() {
    return this.TSID;
  }


  private void setDataType(String dataType) {
    this.dataType = dataType;
  }


  public String getDataType() {
    return this.dataType;
  }


  private void setDesc(String desc) {
    this.desc = desc;
  }


  public String getDesc() {
    return this.desc;
  }


  private void setUnits(String units) {
    this.units = units;
  }


  public String getUnits() {
    return this.units;
  }


  private void setMissing(String missing) {
    this.missing = missing;
  }


  public String getMissing() {
    return this.missing;
  }


  private String getStartDate() {
    return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.start);
  }


  private String getEndDate() {
    return new SimpleDateFormat("yyyy-mm-dd hh:mm:ss:SS").format(this.end);
  }


  public Date getStart() {
    return this.start;
  }


  public Date getEnd() {
    return this.end;
  }


  public String getComment() {
    return this.comment;
  }


  public String padString(String str, int len) {
    for (int i = str.length(); i <= len; i++) {
      str += " ";
    }
    return str;
  }

}

public class ParseTimeSeries {

  public static void main(String args[]) throws IOException, Exception, ParseException {
    Date startTime = new Date();
    ParseData parseData = new ParseData();
    parseData.parseFile("./data/TimeSeries/MCAN3.NHDES.MAP.1HOUR", "textFile.txt");
    Date endTime = new Date();
    long timetaken = endTime.getTime() - startTime.getTime();
    System.out.println("Time taken for reading and writing 1 file is " + timetaken + "ms");
    System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 + "seconds");
    System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 / 60 + "minutes");
    System.out.println("Time taken for reading and writing 800,000 files is " + timetaken * 80 / 3600 + "hours");

  }
}