<meta name="decorator" content="popup"/>
<meta name="useModuleDefaults" content="true"/>
<meta name="module" content="sources"/>
<meta name="pageTitle" content="MongoAccess.java [src/java/utils], Revision: default"/>

<FONT CLASS="titlenormal">MongoAccess.java [src/java/utils] Revision: default&nbsp;&nbsp;Date: </FONT>
<HR NOSHADE>
<PRE><PRE CLASS="editor">/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package utils;

import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.client.AggregateIterable;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.MongoIterable;
import com.mongodb.client.gridfs.GridFSBucket;
import com.mongodb.client.gridfs.GridFSBuckets;
import com.mongodb.client.gridfs.GridFSDownloadStream;
import com.mongodb.client.gridfs.model.GridFSUploadOptions;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Updates;
import csip.Config;
import csip.ServiceException;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import m.ann.training.scale.DataSetIndices;
import org.apache.commons.lang.SerializationUtils;

import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.types.ObjectId;
import org.encog.neural.neat.NEATNetwork;

import utils.MongoUtils.Sorting;
import utils.MongoUtils.ServiceFunction;

/*
Examples for mongo shell, or robomongo

find one:
  db.raw.findOne({&quot;name&quot;: &quot;erosion&quot;})

size of a bson:
  Object.bsonsize(db.raw.findOne({&quot;name&quot;: &quot;erosion&quot;}))

get the erosion values as JSON Array:
  db.raw.findOne({ name:&quot;erosion&quot; }).values

get the erosion values as String Array: 
  db.raw.findOne({name:&quot;erosion&quot;}).values.toString()

get the erosion values as JSON, one line:
  tojsononeline(db.raw.findOne({name:&quot;erosion&quot;}).values)

 */
/**
 *
 * @author od
 */
public class MongoAccess {

  public static final String VALUES_COUNT = &quot;count&quot;;
  public static final String NAME = &quot;name&quot;;
  public static final String TYPE = &quot;type&quot;;
  public static final String MIN = &quot;min&quot;;
  public static final String MAX = &quot;max&quot;;
  public static final String MIN_INDEX = &quot;min_index&quot;;
  public static final String MAX_INDEX = &quot;max_index&quot;;
  public static final String NORM = &quot;norm&quot;;
  public static final String NORM_MIN = &quot;norm_min&quot;;
  public static final String NORM_MAX = &quot;norm_max&quot;;
  public static final String VAL_ID = &quot;values_id&quot;;
  public static final String IN = &quot;in&quot;;
  public static final String OUT = &quot;out&quot;;
  public static final String VALUES = &quot;values&quot;;
  public static final String METADATA = &quot;metadata&quot;;
  public static final String TIMESTAMP = &quot;timestamp&quot;;
  public static final String FILES = &quot;files&quot;;
  public static final String SUID = &quot;suid&quot;;

  // ANN properties
  public static final String ANN_ID = &quot;_id&quot;;
  public static final String POPULATION = &quot;population&quot;;
  public static final String EPOCHS = &quot;epochs&quot;;
  public static final String RECOVERY_EPOCHS = &quot;recovery_epochs&quot;;
  public static final String MAX_EPOCHS = &quot;max_epochs&quot;;
  public static final String SCORE = &quot;score&quot;;
  public static final String SCORES = &quot;scores&quot;;
  public static final String SCALE_MECHANISM = &quot;scale_mechanism&quot;;
  public static final String EXIT_STRATEGY = &quot;exit_strategy&quot;;
  public static final String CONNECTION_DENSITY = &quot;connection_density&quot;;
  public static final String VARIABLES = &quot;variables&quot;;
  public static final String TRAINING_PERC = &quot;training_perc&quot;;
  public static final String TRAINING_ERROR = &quot;training_error&quot;;
  public static final String BEST_NET_STRUCTURE = &quot;best_net_structure&quot;;
  public static final String STRUCTURE_ID = &quot;network_structure_id&quot;;
  public static final String LINK_NUM = &quot;link_number&quot;;
  public static final String IN_NODE_NUM = &quot;input_nodes&quot;;
  public static final String OUT_NODE_NUM = &quot;output_nodes&quot;;
  public static final String HIDE_NODE_NUM = &quot;hidden_nodes&quot;;
  public static final String STRUCTURE = &quot;structure&quot;;

  // DB documents
  //public static final String MODEL_STAT = &quot;model_stat&quot;;
  public static final String PERFORMANCE = &quot;performance&quot;;
  public static final String HISTORY = &quot;history&quot;;
  public static final String HYPERPARAMS = &quot;hyper_params&quot;;

  //Collections
  public static final String RAW = &quot;raw&quot;;
  public static final String VALIDATION = &quot;validation&quot;;
  public static final String VALIDNORMALIZED = &quot;validNormalized&quot;;
  public static final String NORMALIZED = &quot;normalized&quot;;
  public static final String TRAINED = &quot;trained&quot;;
  public static final String SELECTED = &quot;selected&quot;;

  static final String METADATA__VAL_ID = MongoUtils.nestedDocuments(METADATA, VAL_ID);
  static final String METADATA__NAME = MongoUtils.nestedDocuments(METADATA, NAME);
  static final String TRAINED__FILES = MongoUtils.nestedDocuments(TRAINED, FILES);
  static final String METADATA__SERVICE = MongoUtils.nestedDocuments(METADATA, &quot;service&quot;);

  private static MongoClient mongo;


  synchronized static MongoClient getMongo() {
    // &quot;mongodb://user:pass@host:port/db&quot;
    if (mongo == null) {
      String s = Config.getString(&quot;ann.db.uri&quot;, &quot;mongodb://localhost:27017&quot;);
      MongoClientURI u = new MongoClientURI(s);
      String dbname = u.getDatabase();
      if (dbname != null) {
        throw new IllegalArgumentException(&quot;remove database in config!&quot;);
      }
      mongo = new MongoClient(u);
    }
    return mongo;
  }


  public synchronized static void closeMongo() {
    if (mongo != null) {
      mongo.close();
      mongo = null;
    }
  }


  public static String getCollectionHash(String annName, String collection) {
    checkAnnExists(annName);
    MongoDatabase db = getMongo().getDatabase(annName);
    Document collStatsResults = db.runCommand(new Document(&quot;dbHash&quot;, 1).append(&quot;collections&quot;, collection));
    Document coll = (Document) collStatsResults.get(&quot;collections&quot;);
    String h = coll.getString(collection);
    return h;
  }


  private static MongoDatabase getDatabase(String annName) {
    checkAnnExists(annName);
    return getMongo().getDatabase(annName);
  }


  private static MongoCollection&lt;Document&gt; getCollection(String annName, String collection) {
    MongoCollection&lt;Document&gt; mongoCollection = getDatabase(annName).getCollection(collection);
    if (mongoCollection == null) {
      String msg = &quot;Null collection &quot; + collection + &quot; from database &quot; + annName;
      throw new NullPointerException(msg);
    }
    return mongoCollection;
  }


  private static void checkAnnExists(String annName) {
    if (!getMongo().listDatabaseNames().into(new ArrayList&lt;&gt;()).contains(annName)) {
      throw new IllegalArgumentException(&quot;No such ann: &quot; + annName);
    }
  }


  /**
   * Get Anns.
   *
   * @return The list of ANNs
   */
  public static List&lt;String&gt; getAnns() {
    List&lt;String&gt; l = new ArrayList&lt;&gt;();
    MongoIterable&lt;String&gt; dbNames = getMongo().listDatabaseNames();
    for (String dbName : dbNames) {
      if (getMongo().getDatabase(dbName).listCollectionNames().into(new ArrayList&lt;&gt;()).contains(RAW)) {
        l.add(dbName);
      }
    }
    return l;
  }


  private static FindIterable&lt;Document&gt; findDocumentsInCollection(String annName, String collection) {
    return findDocumentsInCollection(annName, collection, null);
  }


  private static FindIterable&lt;Document&gt; findDocumentsInCollection(String annName, String collection, Bson filter) {
    FindIterable&lt;Document&gt; iterableDocument = (filter == null) ? getCollection(annName, collection).find()
        : getCollection(annName, collection).find(filter);
    if (iterableDocument == null) {
      String msg = &quot;Null iterable document for collection &quot; + collection + &quot; from database &quot; + annName;
      throw new NullPointerException(msg);
    }
    return iterableDocument;
  }


  private static Document findFirstDocumentInCollection(String annName, String collection) {
    return findFirstDocumentInCollection(annName, collection, null);
  }


  private static Document findFirstDocumentInCollection(String annName, String collection, Bson filter) {
    Document d = findDocumentsInCollection(annName, collection, filter).first();
    if (d == null) {
      String msg = &quot;Null document for collection &quot; + TRAINED__FILES + &quot; from database &quot; + annName;
      throw new NullPointerException(msg);
    }
    return d;
  }


  public static String getTrainingStats(String ann, String suid) {
    Bson filter = Filters.eq(METADATA__SERVICE, suid);
    Document d = findFirstDocumentInCollection(ann, TRAINED__FILES, filter);
    return d.toJson();
  }


  /**
   * get the values.
   *
   * @param ann
   * @param phase
   * @param var
   * @return
   */
  static List&lt;Number&gt; getValues(String ann, String phase, String var) throws ServiceException {
    Bson filter = Filters.eq(NAME, var);
    Document d = findFirstDocumentInCollection(ann, phase, filter);
    if (d == null) {
      throw new ServiceException(&quot;No such variable in &quot; + ann + &quot;/&quot; + phase + &quot;: &quot; + var);
    }
    if (!d.containsKey(VALUES)) {
      throw new ServiceException(&quot;No values in &quot; + ann + &quot;/&quot; + phase + &quot;: &quot; + var);
    }
    List&lt;Number&gt; values = d.get(VALUES, List.class);
    return values;
  }


  public static Iterable&lt;Document&gt; getSortedNormalizedData(String ann, String normCollection,
      String field, Sorting sort) throws ServiceException {
    return ((FindIterable&lt; Document&gt;) getNormalizedData(ann, normCollection))
        .sort(new Document(MongoUtils.nestedDocuments(METADATA, field), sort.getOrder()));
  }


  /**
   * Get the normalized data for the ANN
   *
   * @param ann
   */
  static Iterable&lt;Document&gt; getNormalizedData(String ann, String normCollection) throws ServiceException {
    return findDocumentsInCollection(ann, normCollection);
  }


  public static String getValidValuesId(String ann) {
    return findFirstDocumentInCollection(ann, RAW).get(METADATA, Document.class).getString(VAL_ID);
  }


  public static List&lt;Bson&gt; extractMetadata(Iterable&lt;Document&gt; d) {
    List&lt;Bson&gt; metadata = new ArrayList&lt;&gt;();
    for (Document doc : d) {
      Bson m = doc.get(METADATA, Document.class);
      if (m == null) {
        throw new NullPointerException(&quot;Metadata not available for object &quot; + d.toString());
      }
      metadata.add(doc.get(METADATA, Document.class));
    }
    return metadata;
  }


  public static Iterable&lt;Document&gt; retrieveSortedANNsDocuments(String annName, String field, Sorting sort) {
//    Bson filter = Filters.ne(MongoUtils.nestedDocuments(METADATA, &quot;exit_strategy&quot;), &quot;rec&quot;);
    Bson sorting = new Document(MongoUtils.nestedDocuments(METADATA, field), sort.getOrder());
//    Iterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, TRAINED__FILES, filter).sort(sorting);
    Iterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, TRAINED__FILES).sort(sorting);
    if (iterable == null) {
      throw new NullPointerException(&quot;No documents returned from files collection of &quot; + annName);
    }
    return iterable;
  }


  public static double[] retrieveANNsNonRec(String annName, String varName, String field) {
    Bson filter = Filters.ne(MongoUtils.nestedDocuments(METADATA, &quot;exit_strategy&quot;), &quot;rec&quot;);
    Iterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, TRAINED__FILES, filter);
    return getErrors(iterable, varName, field);
  }


  public static double[] retrieveANNsErrors(String annName, String varName, String field) {
    Iterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, TRAINED__FILES);
    return getErrors(iterable, varName, field);
  }


  private static double[] getErrors(Iterable&lt;Document&gt; iterable, String varName, String field) {
    List&lt;Double&gt; errors = new ArrayList&lt;&gt;();
    for (Document d : iterable) {
      errors.add(d.get(METADATA, Document.class).get(PERFORMANCE, Document.class)
          .get(varName, Document.class).getDouble(field));
    }
    return errors.stream().mapToDouble(Double::doubleValue).toArray();
  }


  private static List&lt;Object&gt; getMetadata(Iterable&lt;Document&gt; iterable, String metadata) {
    List&lt;Object&gt; errors = new ArrayList&lt;&gt;();
    for (Document d : iterable) {
      errors.add(d.get(METADATA, Document.class).get(metadata));
    }
    return errors;
  }


  public static List&lt;Object&gt; retrieveANNsNonRecInfo(String annName, String metadata) {
    Bson filter = Filters.ne(MongoUtils.nestedDocuments(METADATA, &quot;exit_strategy&quot;), &quot;rec&quot;);
    Iterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, TRAINED__FILES, filter);
    return getMetadata(iterable, metadata);
  }


  public static Iterable&lt;Document&gt; retrieveANNsDocuments(String annName) {
    return findDocumentsInCollection(annName, TRAINED__FILES);
  }


  // why select the first?
  public static double retrieveMinMaxPerc(String annName) {
    Bson sorting = new Document(TIMESTAMP, Sorting.DESCENDING.getOrder());
    FindIterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, SELECTED).sort(sorting);
    Document d = iterable.first();
    return d.getDouble(&quot;percentage btw min-max&quot;);
  }


  public static double retrieveQuartPerc(String annName) {
    Bson sorting = new Document(TIMESTAMP, Sorting.DESCENDING.getOrder());
    FindIterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, SELECTED).sort(sorting);
    Document d = iterable.first();
    return d.getDouble(&quot;percentage btw quartiles&quot;);
  }


  public static List&lt;NEATNetwork&gt; retrieveANNs(String annName) {
    Bson sorting = new Document(TIMESTAMP, Sorting.DESCENDING.getOrder());
    FindIterable&lt;Document&gt; iterable = findDocumentsInCollection(annName, SELECTED).sort(sorting);
    Document d = iterable.first();
    List&lt;ObjectId&gt; ids = d.get(&quot;selected_id&quot;, List.class);
    List&lt;NEATNetwork&gt; nn = new ArrayList&lt;&gt;();
    ids.forEach((id) -&gt; {
      nn.add(retrieveANN(annName, id));
    });
    return nn;
  }


  public static NEATNetwork retrieveANN(String annName, ObjectId id) {
    MongoDatabase db = getDatabase(annName);
    GridFSBucket gridFSBucket = GridFSBuckets.create(db, TRAINED);
    byte[] file;
    try (GridFSDownloadStream stream = gridFSBucket.openDownloadStream(id)) {
      long fileLength = stream.getGridFSFile().getLength();
      file = new byte[(int) fileLength];
      stream.read(file);
    }
    NEATNetwork nn = (NEATNetwork) SerializationUtils.deserialize(file);
    if (nn == null) {
      throw new NullPointerException(&quot;Network not found in ann '&quot; + annName + &quot;': &quot; + id);
    }
    return nn;
  }


  /**
   * Store ANN as file with metadata
   *
   *
   * @param ann_in
   * @param ann_out
   * @param network
   * @param meta
   * @throws IOException
   */
  // get the metadata: mongo r2 --quiet --eval  &quot;db.trained.files.findOne({'filename':'r2-hello.ann'}).metadata&quot; get the
  // get the file:     mongofiles -d r2 --prefix=trained get r2-hello.ann
  public static void storeANN(String ann_in, byte[] network, String ann_out,
      Document meta, String nn_id) throws IOException {

    GridFSBucket gridFSBucket = GridFSBuckets.create(getDatabase(ann_out), TRAINED);
    Iterable&lt;Document&gt; d = findDocumentsInCollection(ann_out, TRAINED__FILES);
    for (Document dd : d) {
      if (dd.get(MongoAccess.METADATA, Document.class).getString(&quot;nn_id&quot;).equals(nn_id)) {
        ObjectId objid = dd.getObjectId(MongoAccess.ANN_ID);
        gridFSBucket.delete(objid);
      }
    }

    // 1.check nn with id is stored already
    // 2.if is stored, get ObjectId and delete it gridFSBucket.delete(objectId)
    // 3.save it
    GridFSUploadOptions options = new GridFSUploadOptions().metadata(meta);
    InputStream is = new ByteArrayInputStream(network);
    gridFSBucket.uploadFromStream(ann_in + &quot;-&quot; + ann_out, is, options);
    is.close();
  }


  public static boolean collectionExist(String annName, String collection) {
    MongoIterable&lt;String&gt; col = getDatabase(annName).listCollectionNames();
    for (String coll : col) {
      if (coll.toLowerCase().equals(collection.toLowerCase())) {
        return true;
      }
    }
    return false;
  }


  /**
   * Normalize the raw data db-side
   *
   * @param annName
   */
  public static void normalize(String annName) {
    checkAnnExists(annName);
    // if a normalized database exists
    MongoCollection&lt;Document&gt; col = getCollection(annName, RAW);
    List&lt;Bson&gt; list = MongoAggregations.normalize(MongoAccess.NORMALIZED);
    AggregateIterable&lt;Document&gt; output = col.aggregate(list);
    output.toCollection();
  }


  public static void pushSelectedANNs(String ann, List&lt;ObjectId&gt; id, ErrorEstimate ee) {
    MongoCollection&lt;Document&gt; col = getCollection(ann, SELECTED);
    Document d = new Document(&quot;timestamp&quot;, new Date())
        .append(&quot;percentage btw quartiles&quot;, ee.getQuartilesPerc())
        .append(&quot;percentage btw min-max&quot;, ee.getMinMaxPerc())
        .append(&quot;selected_id&quot;, id);
    col.insertOne(d);
  }


  public static void validatePipeline(String ann, String currentService) {
    String validValuesID = getValidValuesId(ann);
    String currentID = findFirstDocumentInCollection(ann, currentService).get(METADATA, Document.class).getString(VAL_ID);

    // maybe create a FeNS Exception?
    if (!currentID.equals(validValuesID)) {
      String msg = &quot;BROKEN PIPELINE: valid ID &quot; + validValuesID
          + &quot; differ from current ID &quot; + currentID;
      throw new RuntimeException(msg);
    }
  }


  /**
   * Collect raw data.
   *
   * @param ann The name of the ann-dedicated database
   * @param paramNames List of parameters names
   * @param getVal List of parameters values
   * @param getDescr List of parameters metadata
   * @throws ServiceException
   */
  public static void collectRawData(String ann,
      Set&lt;String&gt; paramNames,
      ServiceFunction&lt;String, Number&gt; getVal,
      ServiceFunction&lt;String, String&gt; getDescr)
      throws ServiceException {

    MongoCollection&lt;Document&gt; col = getMongo().getDatabase(ann).getCollection(RAW);
    String val_id = ObjectId.get().toString();
    Date date = new Date();
    for (String name : paramNames) {
      String type = null;

      // normalization default range
      double norm_min = 0;
      double norm_max = 1;

      try {
        type = getDescr.apply(name);
      } catch (ServiceException E) {
        continue;
      }
      if (type != null &amp;&amp; (type.contains(IN) || type.contains(OUT))) {
        Boolean norm = Boolean.FALSE;
        if (type.contains(NORM)) {
          norm = Boolean.TRUE;
        }
        if (MongoUtils.checkForNormRange(type)) {
          String norm_range = type.substring(type.indexOf(&quot;[&quot;) + 1, type.indexOf(&quot;]&quot;));
          norm_range = norm_range.replaceAll(&quot;\\s+&quot;, &quot;&quot;);
          String[] minmax = norm_range.split(&quot;,&quot;);
          norm_min = Double.parseDouble(minmax[0]);
          norm_max = Double.parseDouble(minmax[1]);
          // make sure min, max are different
          if (norm_min == norm_max) {
            throw new IllegalArgumentException(&quot;Min max for norm range are identical&quot;);
          }
          // make sure min, max are not reversed
          if (norm_min &gt; norm_max) {
            double tmp_min = norm_max;
            norm_max = norm_min;
            norm_min = tmp_min;
          }
        }
        type = type.contains(IN) ? IN : OUT;
        collect(col, val_id, name, getVal.apply(name), norm, norm_min, norm_max, type, date);
      } // add exception if IN and OUT are not available
    }
  }


  /**
   * Collect an entire csv of data column by column and push it to database
   *
   * @param annName The name of the ann-dedicated database
   * @param paramNames
   * @param getDescr Metadata of each parameter
   * @param file csv file
   *
   * @throws IOException
   * @throws ServiceException
   */
  public static void collectFromFile(String annName, Set&lt;String&gt; paramNames,
      ServiceFunction&lt;String, String&gt; getDescr, File file,
      int blockSize) throws IOException, ServiceException {

    // 32 K buffer
    try (BufferedReader r = new BufferedReader(new FileReader(file), 8192 * 4)) {
      Pattern p = Pattern.compile(&quot;\\s*,\\s*&quot;);

      String[] names = p.split(r.readLine());
      String[] types = new String[names.length];
      Boolean[] norm = new Boolean[names.length];
      double[] norm_min = new double[names.length];
      double[] norm_max = new double[names.length];
      for (int i = 0; i &lt; names.length; i++) {
        norm_max[i] = 1.0;
      }
      List&lt;Integer&gt; indices = new ArrayList&lt;&gt;();

      for (int i = 0; i &lt; names.length; i++) {
        if (paramNames.contains(names[i])) {
          types[i] = getDescr.apply(names[i]);
          if (types[i] != null &amp;&amp; (types[i].contains(IN) || types[i].contains(OUT))) {
            indices.add(i);
            norm[i] = Boolean.FALSE;
            if (types[i].contains(NORM)) {
              norm[i] = Boolean.TRUE;
            }
            if (MongoUtils.checkForNormRange(types[i])) {
              String norm_range = types[i].substring(types[i].indexOf(&quot;[&quot;) + 1, types[i].indexOf(&quot;]&quot;));
              norm_range = norm_range.replaceAll(&quot;\\s+&quot;, &quot;&quot;);
              String[] minmax = norm_range.split(&quot;,&quot;);
              norm_min[i] = Double.parseDouble(minmax[0]);
              norm_max[i] = Double.parseDouble(minmax[1]);

              // make sure min, max are different
              if (norm_min == norm_max) {
                throw new IllegalArgumentException(&quot;Min max for norm range are identical&quot;);
              }
              // make sure min, max are not reversed
              if (norm_min[i] &gt; norm_max[i]) {
                double tmp_min = norm_max[i];
                norm_max[i] = norm_min[i];
                norm_min[i] = tmp_min;
              }
            }
            types[i] = types[i].contains(IN) ? IN : OUT;
          }
        }
      }

      getMongo().dropDatabase(annName);
      MongoCollection&lt;Document&gt; col = getMongo().getDatabase(annName).getCollection(RAW);

      String val_id = ObjectId.get().toString();
      Date date = new Date();

      List&lt;Object&gt;[] block = new ArrayList[names.length];
      for (int i = 0; i &lt; block.length; i++) {
        block[i] = new ArrayList&lt;&gt;();
      }

      int rows = 0;
      String line = r.readLine();
      while (line != null) {
        String[] row = p.split(line);
        for (int i : indices) {
          block[i].add(new Double(row[i]));
        }
        line = r.readLine();
        if ((++rows % blockSize == 0) || line == null) {
          for (int i : indices) {
            collectInBlocks(col, val_id, names[i], block[i], norm[i], norm_min[i], norm_max[i], types[i], date);
            block[i].clear();
          }
        }
      }
      List&lt;Bson&gt; list = MongoAggregations.computeMinMax(RAW);
      AggregateIterable&lt;Document&gt; output = col.aggregate(list);
      output.toCollection();
    }
  }


  /**
   * Collect an entire csv of data column by column and push it to database
   *
   * @param paramNames
   * @param getDescr Metadata of each parameter
   * @param file csv file
   *
   * @throws IOException
   * @throws ServiceException
   */
  public static Data collectFromFile(Set&lt;String&gt; paramNames,
      ServiceFunction&lt;String, String&gt; getDescr, File file) throws IOException, ServiceException {

    try (BufferedReader r = new BufferedReader(new FileReader(file))) {
      Pattern p = Pattern.compile(&quot;\\s*,\\s*&quot;);

      String[] names = p.split(r.readLine());
      String[] types = new String[names.length];
      Boolean[] norm = new Boolean[names.length];
      double[] norm_min = new double[names.length];
      double[] norm_max = new double[names.length];
      for (int i = 0; i &lt; names.length; i++) {
        norm_max[i] = 1.0;
      }

      for (int i = 0; i &lt; names.length; i++) {
        if (paramNames.contains(names[i])) {
          types[i] = getDescr.apply(names[i]);
          if (types[i] != null &amp;&amp; (types[i].contains(IN) || types[i].contains(OUT))) {
            norm[i] = Boolean.FALSE;
            if (types[i].contains(NORM)) {
              norm[i] = Boolean.TRUE;
            }
            if (MongoUtils.checkForNormRange(types[i])) {
              String norm_range = types[i].substring(types[i].indexOf(&quot;[&quot;) + 1, types[i].indexOf(&quot;]&quot;));
              norm_range = norm_range.replaceAll(&quot;\\s+&quot;, &quot;&quot;);
              String[] minmax = norm_range.split(&quot;,&quot;);
              norm_min[i] = Double.parseDouble(minmax[0]);
              norm_max[i] = Double.parseDouble(minmax[1]);

              // make sure min, max are different
              if (norm_min == norm_max) {
                throw new IllegalArgumentException(&quot;Min max for norm range are identical&quot;);
              }
              // make sure min, max are not reversed
              if (norm_min[i] &gt; norm_max[i]) {
                double tmp_min = norm_max[i];
                norm_max[i] = norm_min[i];
                norm_min[i] = tmp_min;
              }
            }
            types[i] = types[i].contains(IN) ? IN : OUT;
          }
        }
      }
      Data data = new Data(names, norm, norm_min, norm_max, types);
      String line = null;
      int index = 0;
      while ((line = r.readLine()) != null) {
        String[] row = p.split(line);
        double[] tmpData = Arrays.stream(row).mapToDouble(Double::valueOf).toArray();
        data.put(index, tmpData);
        index++;
      }
      return data;
    }
  }


  public static void pushToDB(String annName, Data data, DataSetIndices dataIndices) {
    MongoCollection&lt;Document&gt; rawCol = getMongo().getDatabase(annName).getCollection(RAW);
    MongoCollection&lt;Document&gt; validationCol = getMongo().getDatabase(annName).getCollection(VALIDATION);
    String val_id = ObjectId.get().toString();
    Date date = new Date();

    for (int rowIndex : dataIndices.getTraining()) {
      double[] rowData = data.getDataPerRow(rowIndex);
      dataPush(data, rowData, val_id, date, rawCol);
    }
    for (int rowIndex : dataIndices.getValidation()) {
      double[] rowData = data.getDataPerRow(rowIndex);
      dataPush(data, rowData, val_id, date, validationCol);
    }
  }


  private static void dataPush(Data data, double[] rowData, String val_id, Date date, MongoCollection&lt;Document&gt; col) {
    for (int colIndex = 0; colIndex &lt; rowData.length; colIndex++) {
      String name = data.getName(colIndex);
      boolean norm = data.getNorm(colIndex);
      double norm_min = data.getNormMin(colIndex);
      double norm_max = data.getNormMax(colIndex);
      double min = data.getMin(colIndex);
      double max = data.getMax(colIndex);
      String type = data.getType(colIndex);
      collect(col, val_id, name, rowData[colIndex], norm, norm_min, norm_max, min, max, type, date);
    }
  }


  private static void collect(MongoCollection&lt;Document&gt; col, String val_id, String name, Object val,
      Boolean norm, Double norm_min, Double norm_max, Double min, Double max, String type, Date tstamp) {
    Bson filter = Filters.eq(METADATA__NAME, name);
    if (col.find(filter).first() == null) {
      Document d = new Document(NAME, name);
      d.append(TYPE, type);
      d.append(NORM, norm);
      d.append(NORM_MIN, norm_min);
      d.append(NORM_MAX, norm_max);
      d.append(MIN, min);
      d.append(MAX, max);
      Document app = new Document(TIMESTAMP, new Date())
          .append(METADATA, d);
      col.insertOne(app);
    }
    col.updateOne(filter, Updates.combine(Updates.push(VALUES, val),
        Updates.set(METADATA__VAL_ID, val_id), Updates.set(TIMESTAMP, tstamp)));
  }


  private static void collectInBlocks(MongoCollection&lt;Document&gt; col,
      String val_id, String name, List&lt;Object&gt; val,
      Boolean norm, Double norm_min, Double norm_max, String type, Date tstamp) {
    Bson filter = Filters.eq(METADATA__NAME, name);
    if (col.find(filter).first() == null) {
      Document d = new Document(NAME, name);
      d.append(TYPE, type);
      d.append(NORM, norm);
      d.append(NORM_MIN, norm_min);
      d.append(NORM_MAX, norm_max);
      Document app = new Document(TIMESTAMP, new Date())
          .append(METADATA, d);
      col.insertOne(app);
    }
    col.updateOne(filter, Updates.combine(Updates.pushEach(VALUES, val),
        Updates.set(METADATA__VAL_ID, val_id), Updates.set(TIMESTAMP, tstamp)));
  }


  private static void collect(MongoCollection&lt;Document&gt; col, String val_id,
      String name, Object val, Boolean norm, Double norm_min, Double norm_max,
      String type, Date tstamp) {
    Bson filter = Filters.eq(METADATA__NAME, name);
    if (col.find(filter).first() == null) {
      Document d = new Document(NAME, name);
      d.append(TYPE, type);
      d.append(NORM, norm);
      d.append(NORM_MIN, norm_min);
      d.append(NORM_MAX, norm_max);
      Document app = new Document(TIMESTAMP, new Date())
          .append(METADATA, d);
      col.insertOne(app);
    }
    col.updateOne(filter, Updates.combine(Updates.push(VALUES, val),
        Updates.set(METADATA__VAL_ID, val_id), Updates.set(TIMESTAMP, tstamp)));
  }

}
</PRE>
</PRE>