TextParser.java [src/csip/utils] Revision: default Date:
/*
* $Id$
*
* This file is part of the Cloud Services Integration Platform (CSIP),
* a Model-as-a-Service framework, API and application suite.
*
* 2012-2022, Olaf David and others, OMSLab, Colorado State University.
*
* OMSLab licenses this file to you under the MIT license.
* See the LICENSE file in the project root for more information.
*/
package csip.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.regex.Pattern;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;
/**
* Extract tokens fromIndex an ASCII file (usually some kind of model output)
*
* @author od
*/
public class TextParser implements AutoCloseable {
public static final String WS_SEP = "\\s+";
public static final String COMMA_SEP = "\\s*,\\s*";
BufferedReader r;
String line = "";
String name;
int lineno;
boolean autoclose = true;
boolean isClosed = false;
public TextParser(File file) throws FileNotFoundException {
r = new BufferedReader(new FileReader(file));
name = file.toString();
}
public TextParser(File file, int buffsz) throws FileNotFoundException {
r = new BufferedReader(new FileReader(file), buffsz);
name = file.toString();
}
public TextParser(String content, String name) {
r = new BufferedReader(new StringReader(content));
this.name = name;
}
public TextParser nextLine() throws IOException {
return nextLine(1);
}
public TextParser nextLineSkipEmpty() throws IOException {
do {
nextLine(1);
} while (line != null && line.trim().isEmpty());
return this;
}
public TextParser nextLine(int lines) throws IOException {
if (lines < 1)
throw new IllegalArgumentException("error: lines < 1");
String err = "Cannot skip " + lines + " in " + name;
for (int i = 0; i < lines; i++) {
line = readLine(err);
}
return this;
}
public TextParser toLineContaining(String text) throws IOException {
String err = "Not found in " + name + ": " + text;
do {
line = readLine(err);
} while (line != null && !line.contains(text));
return this;
}
public TextParser toLineStaringWith(String text) throws IOException {
String err = "Not found in " + name + ": " + text;
do {
line = readLine(err);
} while (line != null && !line.startsWith(text));
return this;
}
public TextParser toLineMatching(String regex) throws IOException {
String err = "No match found in " + name + ": " + regex;
Pattern p = Pattern.compile(regex);
do {
line = readLine(err);
} while (line != null && !p.matcher(line).matches());
return this;
}
public TextParser skipLinesContaining(String text) throws IOException {
String err = "Skipping lines for" + name + ": " + text;
do {
line = readLine(err);
} while (line.contains(text));
return this;
}
public TextParser skipLinesStartingWith(String text) throws IOException {
String err = "Skipping lines starting for" + name + ": " + text;
do {
line = readLine(err);
} while (line.startsWith(text));
return this;
}
/**
* Set this to false if terminators should not close the stream
* @param autoclose if the terminating operation closes the stream
* @return this instance
*/
public TextParser autoClose(boolean autoclose) {
this.autoclose = autoclose;
return this;
}
/**
* Get the part of the line string right of the text argument.
* @param text the text to look for
* @return the substring within the line.
*/
public TextParser rightOfFirst(String text) {
int st = line.indexOf(text);
if (st == -1)
throw new IllegalArgumentException("text not found in " + line + ": " + text);
line = line.substring(st + text.length());
return this;
}
public TextParser rightOfLast(String text) {
int st = line.lastIndexOf(text);
if (st == -1)
throw new IllegalArgumentException("text not found in " + line + ": " + text);
line = line.substring(st + text.length());
return this;
}
/**
* Get the part of the line string left of the text argument.
* @param text the text to look for
* @return the substring within the line.
*/
public TextParser leftOfFirst(String text) {
int st = line.indexOf(text);
if (st == -1)
throw new IllegalArgumentException("text not found in " + line + ": " + text);
line = line.substring(0, st);
return this;
}
public TextParser leftOfLast(String text) {
int st = line.lastIndexOf(text);
if (st == -1)
throw new IllegalArgumentException("text not found in " + line + ": " + text);
line = line.substring(0, st);
return this;
}
/**
* Create a new (!) TextParser with the all content from the current position
* until the text. 'text' is included.
* @param text the text to look for
* @return a new instance.
* @throws IOException if reading fails
*/
public TextParser allUntil(String text) throws IOException {
String err = "Not found in " + name + ": " + text;
String subContent = line + '\n';
do {
line = readLine(err);
subContent += line + '\n';
} while (line != null && !line.contains(text));
if (line == null)
return null;
return new TextParser(subContent, "until");
}
// Terminating operations. they close the stream.
//
public Tokens tokens() {
return tokens(WS_SEP);
}
public Tokens tokens(String regex) {
autoclose();
return new Tokens(line, regex);
}
public String asString() {
autoclose();
return line;
}
public double asDouble() {
autoclose();
return Double.parseDouble(line);
}
public int asInteger() {
autoclose();
return Integer.parseInt(line.trim());
}
public int getLineNo() {
autoclose();
return lineno;
}
public String getWsTokenAt(int col) {
autoclose();
return extractToken(line, col);
}
private void autoclose() {
if (autoclose)
close();
}
@Override
public synchronized void close() {
if (r != null) {
try {
r.close();
} catch (IOException ex) {
}
isClosed = true;
}
}
public boolean notEOF() {
return !isClosed;
}
public boolean isEOF() {
return isClosed;
}
private String readLine(String err) throws IOException {
if (isClosed)
throw new IOException(name + ": already closed. Use autoClose(false) to allow for successive reads. Do "
+ "not forget to close the stream at the end.\n" + err);
String l = r.readLine();
if (l == null)
close();
lineno++;
return l;
}
@Override
public String toString() {
return lineno + ": '" + line + "'";
}
/**
* Fast token extract. tokens are separated by any number of white spaces.
* Tokens cannot contain any white spaces, as in csv.
*
* @param line
* @param col the column, starts with 0.
* @return the token at column 'col'
*/
static String extractToken(String line, int col) {
if (col < 0)
throw new IllegalArgumentException("col argument < 0.");
int idx = -1;
int tokens = -1;
int len = line.length() - 1;
while (++idx < len) {
while (Character.isWhitespace(line.charAt(idx)) && idx++ < len);
int start = idx;
while ((!Character.isWhitespace(line.charAt(idx))) && idx++ < len);
if (idx == len)
idx++;
if (++tokens == col)
return line.substring(start, idx);
}
return null;
}
//////////////
public static class Tokens {
String[] tok;
static final String[] EMPTY = new String[]{};
private Tokens(String line, String regex) {
tok = (line == null) ? EMPTY : line.trim().split(regex);
}
public int count() {
return tok.length;
}
/**
* Find the index of val. in the token array.
*
* @param val the string to match (equals)
* @return the index of val, or -1 if not existent.
*/
public int indexOf(String val) {
for (int i = 0; i < tok.length; i++) {
if (tok[i].equals(val))
return i;
}
return -1;
}
public Tokens fromIndex(int b) {
if (b < 0 || b > tok.length - 1)
throw new IllegalArgumentException("invalid begin index:" + b);
if (b == 0)
return this;
tok = Arrays.copyOfRange(tok, b, tok.length);
return this;
}
/**
* Get the Tokens until the end.
*
* @param e must be negative (starts from the end of the string)
* @return this instance
*/
public Tokens toIndex(int e) {
if (e > 0 || -e > tok.length - 1)
throw new IllegalArgumentException("invalid end index:" + e);
if (e == 0)
return this;
tok = Arrays.copyOfRange(tok, 0, tok.length + e);
return this;
}
private DoubleStream dstream() {
return Arrays.stream(tok).mapToDouble(Double::parseDouble);
}
private IntStream istream() {
return Arrays.stream(tok).mapToInt(Integer::parseInt);
}
public double sum() {
return dstream().sum();
}
public double average() {
return dstream().average().getAsDouble();
}
public double min() {
return dstream().min().getAsDouble();
}
public double max() {
return dstream().max().getAsDouble();
}
public double[] asDoubleArray() {
return dstream().toArray();
}
public int[] asIntArray() {
return istream().toArray();
}
public String[] asStringArray() {
return tok;
}
}
}