TextParser.java [src/csip/utils] Revision: default  Date:
/*
 * $Id$
 *
 * This file is part of the Cloud Services Integration Platform (CSIP),
 * a Model-as-a-Service framework, API and application suite.
 *
 * 2012-2022, Olaf David and others, OMSLab, Colorado State University.
 *
 * OMSLab licenses this file to you under the MIT license.
 * See the LICENSE file in the project root for more information.
 */
package csip.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.regex.Pattern;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;

/**
 * Extract tokens fromIndex an ASCII file (usually some kind of model output)
 *
 * @author od
 */
public class TextParser implements AutoCloseable {

  public static final String WS_SEP = "\\s+";
  public static final String COMMA_SEP = "\\s*,\\s*";

  BufferedReader r;
  String line = "";
  String name;
  int lineno;
  boolean autoclose = true;
  boolean isClosed = false;


  public TextParser(File file) throws FileNotFoundException {
    r = new BufferedReader(new FileReader(file));
    name = file.toString();
  }


  public TextParser(File file, int buffsz) throws FileNotFoundException {
    r = new BufferedReader(new FileReader(file), buffsz);
    name = file.toString();
  }


  public TextParser(String content, String name) {
    r = new BufferedReader(new StringReader(content));
    this.name = name;
  }


  public TextParser nextLine() throws IOException {
    return nextLine(1);
  }


  public TextParser nextLineSkipEmpty() throws IOException {
    do {
      nextLine(1);
    } while (line != null && line.trim().isEmpty());
    return this;
  }


  public TextParser nextLine(int lines) throws IOException {
    if (lines < 1) 
      throw new IllegalArgumentException("error: lines < 1");
    
    String err = "Cannot skip " + lines + " in " + name;
    for (int i = 0; i < lines; i++) {
      line = readLine(err);
    }
    return this;
  }


  public TextParser toLineContaining(String text) throws IOException {
    String err = "Not found in " + name + ": " + text;
    do {
      line = readLine(err);
    } while (line != null && !line.contains(text));
    return this;
  }


  public TextParser toLineStaringWith(String text) throws IOException {
    String err = "Not found in " + name + ": " + text;
    do {
      line = readLine(err);
    } while (line != null && !line.startsWith(text));
    return this;
  }


  public TextParser toLineMatching(String regex) throws IOException {
    String err = "No match found in " + name + ": " + regex;
    Pattern p = Pattern.compile(regex);
    do {
      line = readLine(err);
    } while (line != null && !p.matcher(line).matches());
    return this;
  }


  public TextParser skipLinesContaining(String text) throws IOException {
    String err = "Skipping lines for" + name + ": " + text;
    do {
      line = readLine(err);
    } while (line.contains(text));
    return this;
  }


  public TextParser skipLinesStartingWith(String text) throws IOException {
    String err = "Skipping lines starting for" + name + ": " + text;
    do {
      line = readLine(err);
    } while (line.startsWith(text));
    return this;
  }


  /**
   * Set this to false if terminators should not close the stream
   * @param autoclose if the terminating operation closes the stream
   * @return this instance
   */
  public TextParser autoClose(boolean autoclose) {
    this.autoclose = autoclose;
    return this;
  }


  /**
   * Get the part of the line string right of the text argument.
   * @param text the text to look for
   * @return the substring within the line.
   */
  public TextParser rightOfFirst(String text) {
    int st = line.indexOf(text);
    if (st == -1)
      throw new IllegalArgumentException("text not found in " + line + ": " + text);
    line = line.substring(st + text.length());
    return this;
  }


  public TextParser rightOfLast(String text) {
    int st = line.lastIndexOf(text);
    if (st == -1)
      throw new IllegalArgumentException("text not found in " + line + ": " + text);
    line = line.substring(st + text.length());
    return this;
  }


  /**
   * Get the part of the line string left of the text argument.
   * @param text the text to look for
   * @return the substring within the line.
   */
  public TextParser leftOfFirst(String text) {
    int st = line.indexOf(text);
    if (st == -1)
      throw new IllegalArgumentException("text not found in " + line + ": " + text);
    line = line.substring(0, st);
    return this;
  }


  public TextParser leftOfLast(String text) {
    int st = line.lastIndexOf(text);
    if (st == -1)
      throw new IllegalArgumentException("text not found in " + line + ": " + text);
    line = line.substring(0, st);
    return this;
  }


  /**
   * Create a new (!) TextParser with the all content from the current position
   * until the text. 'text' is included.
   * @param text the text to look for
   * @return a new instance. 
   * @throws IOException if reading fails
   */
  public TextParser allUntil(String text) throws IOException {
    String err = "Not found in " + name + ": " + text;
    String subContent = line + '\n';
    do {
      line = readLine(err);
      subContent += line + '\n';

    } while (line != null && !line.contains(text));
    if (line == null)
      return null;
    return new TextParser(subContent, "until");
  }


  // Terminating operations. they close the stream.
  //
  public Tokens tokens() {
    return tokens(WS_SEP);
  }


  public Tokens tokens(String regex) {
    autoclose();
    return new Tokens(line, regex);
  }


  public String asString() {
    autoclose();
    return line;
  }


  public double asDouble() {
    autoclose();
    return Double.parseDouble(line);
  }


  public int asInteger() {
    autoclose();
    return Integer.parseInt(line.trim());
  }


  public int getLineNo() {
    autoclose();
    return lineno;
  }


  public String getWsTokenAt(int col) {
    autoclose();
    return extractToken(line, col);
  }


  private void autoclose() {
    if (autoclose)
      close();
  }


  @Override
  public synchronized void close() {
    if (r != null) {
      try {
        r.close();
      } catch (IOException ex) {
      }
      isClosed = true;
    }
  }


  public boolean notEOF() {
    return !isClosed;
  }


  public boolean isEOF() {
    return isClosed;
  }


  private String readLine(String err) throws IOException {
    if (isClosed)
      throw new IOException(name + ": already closed. Use autoClose(false) to allow for successive reads. Do "
          + "not forget to close the stream at the end.\n" + err);
    String l = r.readLine();
    if (l == null)
      close();
    lineno++;
    return l;
  }


  @Override
  public String toString() {
    return lineno + ":  '" + line + "'";
  }


  /**
   * Fast token extract. tokens are separated by any number of white spaces.
   * Tokens cannot contain any white spaces, as in csv.
   *
   * @param line
   * @param col the column, starts with 0.
   * @return the token at column 'col'
   */
  static String extractToken(String line, int col) {
    if (col < 0)
      throw new IllegalArgumentException("col argument < 0.");

    int idx = -1;
    int tokens = -1;
    int len = line.length() - 1;
    while (++idx < len) {
      while (Character.isWhitespace(line.charAt(idx)) && idx++ < len);
      int start = idx;
      while ((!Character.isWhitespace(line.charAt(idx))) && idx++ < len);
      if (idx == len)
        idx++;
      if (++tokens == col)
        return line.substring(start, idx);

    }
    return null;
  }

//////////////
  public static class Tokens {

    String[] tok;
    static final String[] EMPTY = new String[]{};


    private Tokens(String line, String regex) {
      tok = (line == null) ? EMPTY : line.trim().split(regex);
    }


    public int count() {
      return tok.length;
    }


    /**
     * Find the index of val. in the token array.
     *
     * @param val the string to match (equals)
     * @return the index of val, or -1 if not existent.
     */
    public int indexOf(String val) {
      for (int i = 0; i < tok.length; i++) {
        if (tok[i].equals(val))
          return i;
      }
      return -1;
    }


    public Tokens fromIndex(int b) {
      if (b < 0 || b > tok.length - 1)
        throw new IllegalArgumentException("invalid begin index:" + b);
      if (b == 0)
        return this;
      tok = Arrays.copyOfRange(tok, b, tok.length);
      return this;
    }


    /**
     * Get the Tokens until the end.
     * 
     * @param e must be negative (starts from the end of the string)
     * @return this instance
     */
    public Tokens toIndex(int e) {
      if (e > 0 || -e > tok.length - 1)
        throw new IllegalArgumentException("invalid end index:" + e);
      if (e == 0)
        return this;
      tok = Arrays.copyOfRange(tok, 0, tok.length + e);
      return this;
    }


    private DoubleStream dstream() {
      return Arrays.stream(tok).mapToDouble(Double::parseDouble);
    }


    private IntStream istream() {
      return Arrays.stream(tok).mapToInt(Integer::parseInt);
    }


    public double sum() {
      return dstream().sum();
    }


    public double average() {
      return dstream().average().getAsDouble();
    }


    public double min() {
      return dstream().min().getAsDouble();
    }


    public double max() {
      return dstream().max().getAsDouble();
    }


    public double[] asDoubleArray() {
      return dstream().toArray();
    }


    public int[] asIntArray() {
      return istream().toArray();
    }


    public String[] asStringArray() {
      return tok;
    }
  }

}