ParseDataset.java example

Explorer
h2o-3-master
package water.parser;

import jsr166y.CountedCompleter;
import jsr166y.ForkJoinTask;
import jsr166y.RecursiveAction;
import water.*;
import water.H2O.H2OCountedCompleter;
import water.exceptions.H2OIllegalArgumentException;
import water.exceptions.H2OIllegalValueException;
import water.fvec.*;
import water.fvec.Vec.VectorGroup;
import water.nbhm.NonBlockingHashMap;
import water.nbhm.NonBlockingSetInt;
import water.util.*;

import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import static water.parser.DefaultParserProviders.*;

public final class ParseDataset {
  public Job<Frame> _job;
  private MultiFileParseTask _mfpt; // Access to partially built vectors for cleanup after parser crash

  // Keys are limited to ByteVec Keys and Frames-of-1-ByteVec Keys
  public static Frame parse(Key okey, Key... keys) { return parse(okey,keys,true, false, ParseSetup.GUESS_HEADER); }

  // Guess setup from inspecting the first Key only, then parse.
  // Suitable for e.g. testing setups, where the data is known to be sane.
  // NOT suitable for random user input!
  public static Frame parse(Key okey, Key[] keys, boolean deleteOnDone, boolean singleQuote, int checkHeader) {
    return parse(okey,keys,deleteOnDone,ParseSetup.guessSetup(keys, singleQuote, checkHeader));
  }
  public static Frame parse(Key okey, Key[] keys, boolean deleteOnDone, ParseSetup globalSetup) {
    return parse(okey,keys,deleteOnDone,globalSetup,true)._job.get();
  }
  public static ParseDataset parse(Key okey, Key[] keys, boolean deleteOnDone, ParseSetup globalSetup, boolean blocking) {
    ParseDataset pds = forkParseDataset(okey, keys, globalSetup, deleteOnDone);
    if( blocking )
      pds._job.get();
    return pds;
  }

  // Allow both ByteVec keys and Frame-of-1-ByteVec
  static ByteVec getByteVec(Key key) {
    Iced ice = DKV.getGet(key);
    if(ice == null)
      throw new H2OIllegalArgumentException("Missing data","Did not find any data under key " + key);
    return (ByteVec)(ice instanceof ByteVec ? ice : ((Frame)ice).vecs()[0]);
  }
  static String [] getColumnNames(int ncols, String[] colNames) {
    if(colNames == null) { // no names, generate
      colNames = new String[ncols];
      for(int i=0; i < ncols; i++ )
        colNames[i] = "C" + Integer.toString(i+1);
    } else { // some or all names exist, fill in blanks
      HashSet<String> nameSet = new HashSet<>(Arrays.asList(colNames));
      colNames = Arrays.copyOf(colNames, ncols);
      for(int i=0; i < ncols; i++ ) {
        if (colNames[i] == null || colNames[i].equals("")) {
          String tmp = "C" + Integer.toString(i+1);
          while (nameSet.contains(tmp)) // keep building name until unique
            tmp = tmp + tmp;
          colNames[i] = tmp;
        }
      }
    }
    return colNames;
  }

  public static Job forkParseSVMLight(final Key<Frame> dest, final Key [] keys, final ParseSetup setup) {
    int nchunks = 0;
    Vec v = null;
    // set the parse chunk size for files
    for( int i = 0; i < keys.length; ++i ) {
      Iced ice = DKV.getGet(keys[i]);
      if(ice instanceof FileVec) {
        if(i == 0) v = ((FileVec) ice);
        ((FileVec) ice).setChunkSize(setup._chunk_size);
        nchunks += ((FileVec) ice).nChunks();
        Log.info("Parse chunk size " + setup._chunk_size);
      } else if(ice instanceof Frame && ((Frame)ice).vec(0) instanceof FileVec) {
        if(i == 0) v = ((Frame)ice).vec(0);
        ((FileVec) ((Frame) ice).vec(0)).setChunkSize((Frame) ice, setup._chunk_size);
        nchunks += (((Frame) ice).vec(0)).nChunks();
        Log.info("Parse chunk size " + setup._chunk_size);
      }
    }
    final VectorGroup vg = v.group();
    final ParseDataset pds = new ParseDataset(dest);
    new Frame(pds._job._result,new String[0],new Vec[0]).delete_and_lock(pds._job); // Write-Lock BEFORE returning
    return pds._job.start(new H2OCountedCompleter() {
      @Override
      public void compute2() {
        ParseDataset.parseAllKeys(pds,keys,setup,true);
        tryComplete();
      }
    },nchunks);
  }

  /**
   * The entry-point for data set parsing.
   *
   * @param dest  name for destination key
   * @param keys  input keys
   * @param parseSetup  a generic parser setup
   * @param deleteOnDone  delete input data when finished
   * @return a new parse job
   */
  public static ParseDataset forkParseDataset(final Key<Frame> dest, final Key[] keys, final ParseSetup parseSetup, boolean deleteOnDone) {
    // Get a parser specific setup
    // FIXME: ParseSetup should be separated into two classes - one for using via Rest API as user setup
    //        and another as an internal parser setup to drive parsing.
    final ParseSetup setup = parseSetup.getFinalSetup(keys, parseSetup);

    HashSet<String> conflictingNames = setup.checkDupColumnNames();
    for( String x : conflictingNames )
    if ( x != null && !x.equals(""))
      throw new IllegalArgumentException("Found duplicate column name "+x);
    // Some quick sanity checks: no overwriting your input key, and a resource check.
    long totalParseSize=0;
    for( int i=0; i<keys.length; i++ ) {
      Key k = keys[i];
      if( dest.equals(k) )
        throw new IllegalArgumentException("Destination key "+dest+" must be different from all sources");
      if( deleteOnDone )
        for( int j=i+1; j<keys.length; j++ )
          if( k==keys[j] )
            throw new IllegalArgumentException("Source key "+k+" appears twice, deleteOnDone must be false");

      // estimate total size in bytes
      totalParseSize += getByteVec(k).length();
    }
    Log.info("Total file size: "+ PrettyPrint.bytes(totalParseSize));


    // no need to set this for ORC, it is already done:
    if (!setup.getParseType().name().contains("ORC")) {
      for( int i = 0; i < keys.length; ++i ) {
        Iced ice = DKV.getGet(keys[i]);

        // set the parse chunk size for files
        if (ice instanceof FileVec) {
          ((FileVec) ice).setChunkSize(setup._chunk_size);
          Log.info("Parse chunk size " + setup._chunk_size);
        } else if (ice instanceof Frame && ((Frame) ice).vec(0) instanceof FileVec) {
          ((FileVec) ((Frame) ice).vec(0)).setChunkSize((Frame) ice, setup._chunk_size);
          Log.info("Parse chunk size " + setup._chunk_size);
        }
      }
    } else Log.info("Orc Parse chunk sizes may be different across files");

    long memsz = H2O.CLOUD.free_mem();
    if( totalParseSize > memsz*4 )
      throw new IllegalArgumentException("Total input file size of "+PrettyPrint.bytes(totalParseSize)+" is much larger than total cluster memory of "+PrettyPrint.bytes(memsz)+", please use either a larger cluster or smaller data.");

    if (H2O.GA != null)
      GAUtils.logParse(totalParseSize, keys.length, setup._number_columns);

    // Fire off the parse
    ParseDataset pds = new ParseDataset(dest);
    new Frame(pds._job._result,new String[0],new Vec[0]).delete_and_lock(pds._job); // Write-Lock BEFORE returning
    for( Key k : keys ) Lockable.read_lock(k,pds._job); // Read-Lock BEFORE returning
    ParserFJTask fjt = new ParserFJTask(pds, keys, setup, deleteOnDone); // Fire off background parse
    pds._job.start(fjt, totalParseSize);
    return pds;
  }

  // Setup a private background parse job
  private ParseDataset(Key<Frame> dest) {
    _job = new Job(dest,Frame.class.getName(), "Parse");
  }

  // -------------------------------
  // Simple internal class doing background parsing, with trackable Job status
  public static class ParserFJTask extends water.H2O.H2OCountedCompleter {
    final ParseDataset _pds;
    final Key[] _keys;
    final ParseSetup _setup;
    final boolean _deleteOnDone;

    public ParserFJTask( ParseDataset pds, Key[] keys, ParseSetup setup, boolean deleteOnDone) {
      _pds = pds;
      _keys = keys;
      _setup = setup;
      _deleteOnDone = deleteOnDone;
    }
    @Override public void compute2() {
      parseAllKeys(_pds, _keys, _setup, _deleteOnDone);
      tryComplete();
    }

    // Took a crash/NPE somewhere in the parser.  Attempt cleanup.
    @Override public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller){
      parseCleanup();           // Can get called many tims
      return true;
    }

    @Override public void onCompletion(CountedCompleter caller) {
      if( _pds._job.stop_requested() )
        parseCleanup();
      _pds._mfpt = null;
    }

    private void parseCleanup() {
      assert !_pds._job.isStopped(); // Job still running till job.onExCompletion returns
      Futures fs = new Futures();
      // Find & remove all partially-built output vecs & chunks.
      // Since this is racily called, perhaps multiple times, read _mfpt only exactly once.
      MultiFileParseTask mfpt = _pds._mfpt;
      _pds._mfpt = null;        // Read once, test for null once.
      if (mfpt != null) mfpt.onExceptionCleanup(fs);
      // Assume the input is corrupt - or already partially deleted after
      // parsing.  Nuke it all - no partial Vecs lying around.
      for (Key k : _keys) Keyed.remove(k, fs);
      Keyed.remove(_pds._job._result,fs);
      fs.blockForPending();
    }
  }

  private static class CategoricalUpdateMap extends Iced {
    final int [][] map;
    public CategoricalUpdateMap(int[][] map){this.map = map;}
  }
  // --------------------------------------------------------------------------
  // Top-level parser driver
  private static ParseDataset parseAllKeys(ParseDataset pds, Key[] fkeys, ParseSetup setup, boolean deleteOnDone) {
    final Job<Frame> job = pds._job;
    assert setup._number_columns > 0;
    if( setup._column_names != null &&
        ( (setup._column_names.length == 0) ||
          (setup._column_names.length == 1 && setup._column_names[0].isEmpty())) )
      setup._column_names = null; // // FIXME: annoyingly front end sends column names as String[] {""} even if setup returned null
    if(setup._na_strings != null && setup._na_strings.length != setup._number_columns) setup._na_strings = null;
    if( fkeys.length == 0) { job.stop();  return pds;  }

    job.update(0, "Ingesting files.");
    VectorGroup vg = getByteVec(fkeys[0]).group();
    MultiFileParseTask mfpt = pds._mfpt = new MultiFileParseTask(vg,setup,job._key,fkeys,deleteOnDone);
    mfpt.doAll(fkeys);
    Log.trace("Done ingesting files.");
    if( job.stop_requested() ) return pds;

    final AppendableVec [] avs = mfpt.vecs();
    setup._column_names = getColumnNames(avs.length, setup._column_names);

    Frame fr = null;
    // Calculate categorical domain
    // Filter down to columns with some categoricals
    int n = 0;
    int[] ecols2 = new int[avs.length];
    for( int i = 0; i < avs.length; ++i )
      if( avs[i].get_type()==Vec.T_CAT  ) // Intended type is categorical (even though no domain has been set)?
        ecols2[n++] = i;
    final int[] ecols = Arrays.copyOf(ecols2, n);
    // If we have any, go gather unified categorical domains
    if( n > 0 ) {
      if (!setup.getParseType().isDomainProvided) { // Domains are not provided via setup we need to collect them
        job.update(0, "Collecting categorical domains across nodes.");
        {
          GatherCategoricalDomainsTask gcdt = new GatherCategoricalDomainsTask(mfpt._cKey, ecols).doAllNodes();
          //Test domains for excessive length.
          List<String> offendingColNames = new ArrayList<>();
          for (int i = 0; i < ecols.length; i++) {
            if (gcdt.getDomainLength(i) < Categorical.MAX_CATEGORICAL_COUNT) {
              if( gcdt.getDomainLength(i)==0 ) avs[ecols[i]].setBad(); // The all-NA column
              else avs[ecols[i]].setDomain(gcdt.getDomain(i));
            } else
              offendingColNames.add(setup._column_names[ecols[i]]);
          }
          if (offendingColNames.size() > 0)
            throw new H2OParseException("Exceeded categorical limit on columns "+ offendingColNames+".   Consider reparsing these columns as a string.");
        }
        Log.trace("Done collecting categorical domains across nodes.");
      } else {
        // Ignore offending domains
        for (int i = 0; i < ecols.length; i++) {
          avs[ecols[i]].setDomain(setup._domains[ecols[i]]);
        }
      }

      job.update(0, "Compressing data.");
      fr = new Frame(job._result, setup._column_names, AppendableVec.closeAll(avs));
      fr.update(job);
      Log.trace("Done compressing data.");
      if (!setup.getParseType().isDomainProvided) {
        // Update categoricals to the globally agreed numbering
        Vec[] evecs = new Vec[ecols.length];
        for( int i = 0; i < evecs.length; ++i ) evecs[i] = fr.vecs()[ecols[i]];
        job.update(0, "Unifying categorical domains across nodes.");
        {
          // new CreateParse2GlobalCategoricalMaps(mfpt._cKey).doAll(evecs);
          // Using Dtask since it starts and returns faster than an MRTask
          CreateParse2GlobalCategoricalMaps[] fcdt = new CreateParse2GlobalCategoricalMaps[H2O.CLOUD.size()];
          RPC[] rpcs = new RPC[H2O.CLOUD.size()];
          for (int i = 0; i < fcdt.length; i++){
            H2ONode[] nodes = H2O.CLOUD.members();
            fcdt[i] = new CreateParse2GlobalCategoricalMaps(mfpt._cKey, fr._key, ecols);
            rpcs[i] = new RPC<>(nodes[i], fcdt[i]).call();
          }
          for (RPC rpc : rpcs)
            rpc.get();

          new UpdateCategoricalChunksTask(mfpt._cKey, mfpt._chunk2ParseNodeMap).doAll(evecs);
          MultiFileParseTask._categoricals.remove(mfpt._cKey);
        }
        Log.trace("Done unifying categoricals across nodes.");
      }
    } else {                    // No categoricals case
      job.update(0,"Compressing data.");
      fr = new Frame(job._result, setup._column_names,AppendableVec.closeAll(avs));
      Log.trace("Done closing all Vecs.");
    }
    // Check for job cancellation
    if ( job.stop_requested() ) return pds;

    // SVMLight is sparse format, there may be missing chunks with all 0s, fill them in
    if (setup._parse_type.equals(SVMLight_INFO))
      new SVFTask(fr).doAllNodes();

    // Check for job cancellation
    if ( job.stop_requested() ) return pds;

    ParseWriter.ParseErr [] errs = ArrayUtils.append(setup._errs,mfpt._errors);
    if(errs.length > 0) {
      // compute global line numbers for warnings/errs
      HashMap<String, Integer> fileChunkOffsets = new HashMap<>();
      for (int i = 0; i < mfpt._fileChunkOffsets.length; ++i)
        fileChunkOffsets.put(fkeys[i].toString(), mfpt._fileChunkOffsets[i]);
      long[] espc = fr.anyVec().espc();
      for (int i = 0; i < errs.length; ++i) {
        if(fileChunkOffsets.containsKey(errs[i]._file)) {
          int espcOff = fileChunkOffsets.get(errs[i]._file);
          errs[i]._gLineNum = espc[espcOff + errs[i]._cidx] + errs[i]._lineNum;
          errs[i]._lineNum = errs[i]._gLineNum - espc[espcOff];
        }
      }
      SortedSet<ParseWriter.ParseErr> s = new TreeSet<>(new Comparator<ParseWriter.ParseErr>() {
        @Override
        public int compare(ParseWriter.ParseErr o1, ParseWriter.ParseErr o2) {
          long res = o1._gLineNum - o2._gLineNum;
          if (res == 0) res = o1._byteOffset - o2._byteOffset;
          if (res == 0) return o1._err.compareTo(o2._err);
          return (int) res < 0 ? -1 : 1;
        }
      });
      Collections.addAll(s, errs);
      String[] warns = new String[s.size()];
      int i = 0;
      for (ParseWriter.ParseErr err : s)
        Log.warn(warns[i++] = err.toString());
      job.setWarnings(warns);
    }
    job.update(0,"Calculating data summary.");
    logParseResults(fr);
    // Release the frame for overwriting
    fr.update(job);
    Frame fr2 = DKV.getGet(fr._key);
    assert fr2._names.length == fr2.numCols();
    fr.unlock(job);
    // Remove CSV files from H2O memory
    if( deleteOnDone )
      for( Key k : fkeys ) {
        DKV.remove(k);
        assert DKV.get(k) == null : "Input key " + k + " not deleted during parse";
      }
    return pds;
  }
  private static class CreateParse2GlobalCategoricalMaps extends DTask<CreateParse2GlobalCategoricalMaps> {
    private final Key   _parseCatMapsKey;
    private final Key   _frKey;
    private final int[] _ecol;

    private CreateParse2GlobalCategoricalMaps(Key parseCatMapsKey, Key key, int[] ecol) {
      _parseCatMapsKey = parseCatMapsKey;
      _frKey = key;
      _ecol = ecol;
    }

    @Override public void compute2() {
      Frame _fr = DKV.getGet(_frKey);
      // get the node local category->ordinal maps for each column from initial parse pass
      if( !MultiFileParseTask._categoricals.containsKey(_parseCatMapsKey) ) {
        tryComplete();
        return;
      }
        final Categorical[] parseCatMaps = MultiFileParseTask._categoricals.get(_parseCatMapsKey);
        int[][] _nodeOrdMaps = new int[_ecol.length][];

        // create old_ordinal->new_ordinal map for each cat column
        for (int eColIdx = 0; eColIdx < _ecol.length; eColIdx++) {
          int colIdx = _ecol[eColIdx];
          if (parseCatMaps[colIdx].size() != 0) {
            _nodeOrdMaps[eColIdx] = MemoryManager.malloc4(parseCatMaps[colIdx].maxId() + 1);
            Arrays.fill(_nodeOrdMaps[eColIdx], -1);
            //Bulk String->BufferedString conversion is slightly faster, but consumes memory
            final BufferedString[] unifiedDomain = BufferedString.toBufferedString(_fr.vec(colIdx).domain());
            //final String[] unifiedDomain = _fr.vec(colIdx).domain();
            for (int i = 0; i < unifiedDomain.length; i++) {
              //final BufferedString cat = new BufferedString(unifiedDomain[i]);
              if (parseCatMaps[colIdx].containsKey(unifiedDomain[i])) {
                _nodeOrdMaps[eColIdx][parseCatMaps[colIdx].getTokenId(unifiedDomain[i])] = i;
              }
            }
          } else {
            Log.debug("Column " + colIdx + " was marked as categorical but categorical map is empty!");
          }
        }
        // Store the local->global ordinal maps in DKV by node parse categorical key and node index
        DKV.put(Key.make(_parseCatMapsKey.toString() + "parseCatMapNode" + H2O.SELF.index()), new CategoricalUpdateMap(_nodeOrdMaps));
      tryComplete();
    }
  }

  // --------------------------------------------------------------------------
  /** Task to update categorical (categorical) values to match the global numbering scheme.
   *  Performs update in place so that values originally numbered using
   *  node-local unordered numbering will be numbered using global numbering.
   *  @author tomasnykodym
   */
  private static class UpdateCategoricalChunksTask extends MRTask<UpdateCategoricalChunksTask> {
    private final Key _parseCatMapsKey;
    private final int  [] _chunk2ParseNodeMap;

    private UpdateCategoricalChunksTask(Key parseCatMapsKey, int[] chunk2ParseNodeMap) {
      _parseCatMapsKey = parseCatMapsKey;
      _chunk2ParseNodeMap = chunk2ParseNodeMap;
    }

    @Override public void map(Chunk [] chks){
      CategoricalUpdateMap temp = DKV.getGet(Key.make(_parseCatMapsKey.toString() + "parseCatMapNode" + _chunk2ParseNodeMap[chks[0].cidx()]));
      if ( temp == null || temp.map == null)
        throw new H2OIllegalValueException("Missing categorical update map",this);
      int[][] _parse2GlobalCatMaps = temp.map;

      //update the chunk with the new map
      final int cidx = chks[0].cidx();
      for(int i = 0; i < chks.length; ++i) {
        Chunk chk = chks[i];
        if (!(chk instanceof CStrChunk)) {
          for( int j = 0; j < chk._len; ++j){
            if( chk.isNA(j) )continue;
            final int old = (int) chk.at8(j);
            if (old < 0 || (_parse2GlobalCatMaps[i] != null && old >= _parse2GlobalCatMaps[i].length))
              chk.reportBrokenCategorical(i, j, old, _parse2GlobalCatMaps[i], _fr.vec(i).domain().length);
            if(_parse2GlobalCatMaps[i] != null && _parse2GlobalCatMaps[i][old] < 0)
              throw new H2OParseException("Error in unifying categorical values. This is typically "
                  +"caused by unrecognized characters in the data.\n The problem categorical value "
                  +"occurred in the " + PrettyPrint.withOrdinalIndicator(i+1)+ " categorical col, "
                  +PrettyPrint.withOrdinalIndicator(chk.start() + j) +" row.");
            if (_parse2GlobalCatMaps[i] != null)
              chk.set(j, _parse2GlobalCatMaps[i][old]);
          }
          Log.trace("Updated domains for "+PrettyPrint.withOrdinalIndicator(i+1)+ " categorical column.");
        }
        chk.close(cidx, _fs);
      }
    }
    @Override public void postGlobal() {
      for (int i=0; i < H2O.CLOUD.size(); i++)
        DKV.remove(Key.make(_parseCatMapsKey.toString() + "parseCatMapNode" + i));
    }
  }
  private static class GatherCategoricalDomainsTask extends MRTask<GatherCategoricalDomainsTask> {
    private final Key _k;
    private final int[] _catColIdxs;
    private byte[][] _packedDomains;

    private GatherCategoricalDomainsTask(Key k, int[] ccols) {
      _k = k;
      _catColIdxs = ccols;
    }

    @Override
    public void setupLocal() {
      if (!MultiFileParseTask._categoricals.containsKey(_k)) return;
      _packedDomains = new byte[_catColIdxs.length][];
      final BufferedString[][] _perColDomains = new BufferedString[_catColIdxs.length][];
      final Categorical[] _colCats = MultiFileParseTask._categoricals.get(_k);
      int i = 0;
      for (int col : _catColIdxs) {
        _colCats[col].convertToUTF8(col + 1);
        _perColDomains[i] = _colCats[col].getColumnDomain();
        Arrays.sort(_perColDomains[i]);
        _packedDomains[i] = PackedDomains.pack(_perColDomains[i]);
        i++;
      }
      Log.trace("Done locally collecting domains on each node.");
    }
    
    @Override
    public void reduce(final GatherCategoricalDomainsTask other) {
      if (_packedDomains == null) {
        _packedDomains = other._packedDomains;
      } else if (other._packedDomains != null) { // merge two packed domains
        H2OCountedCompleter[] domtasks = new H2OCountedCompleter[_catColIdxs.length];
        for (int i = 0; i < _catColIdxs.length; i++) {
          final int fi = i;
          domtasks[i] = new H2OCountedCompleter(currThrPriority()) {
            @Override
            public void compute2() {
              _packedDomains[fi] = PackedDomains.merge(_packedDomains[fi], other._packedDomains[fi]);
              tryComplete();
            }
          };
        }
        ForkJoinTask.invokeAll(domtasks);
      }
      Log.trace("Done merging domains.");
    }

    public int getDomainLength(int colIdx) {
      return _packedDomains == null ? 0 : PackedDomains.sizeOf(_packedDomains[colIdx]);
    }

    public String[] getDomain(int colIdx) {
      return _packedDomains == null ? null : PackedDomains.unpackToStrings(_packedDomains[colIdx]);
    }
  }

  // --------------------------------------------------------------------------
  // Run once on all nodes; fill in missing zero chunks
  private static class SVFTask extends MRTask<SVFTask> {
    private final Frame _f;
    private SVFTask( Frame f ) { _f = f; }
    @Override public void setupLocal() {
      if( _f.numCols() == 0 ) return;
      Vec v0 = _f.anyVec();
      ArrayList<RecursiveAction> rs = new ArrayList<RecursiveAction>();
      for( int i = 0; i < v0.nChunks(); ++i ) {
        if( !v0.chunkKey(i).home() ) continue;
        final int fi = i;
        rs.add(new RecursiveAction() {
          @Override
          protected void compute() {
            // First find the nrows as the # rows of non-missing chunks; done on
            // locally-homed chunks only - to keep the data distribution.
            int nlines = 0;
            for( Vec vec : _f.vecs() ) {
              Value val = Value.STORE_get(vec.chunkKey(fi)); // Local-get only
              if( val != null ) {
                nlines = ((Chunk)val.get())._len;
                break;
              }
            }
            final int fnlines = nlines;
            // Now fill in appropriate-sized zero chunks
            for(int j = 0; j < _f.numCols(); ++j) {
              Vec vec = _f.vec(j);
              Key k = vec.chunkKey(fi);
              Value val = Value.STORE_get(k);   // Local-get only
              if( val == null )         // Missing?  Fill in w/zero chunk
                H2O.putIfMatch(k, new Value(k, new C0LChunk(0, fnlines)), null);
            }
          }
        });
      }
      ForkJoinTask.invokeAll(rs);
    }
    @Override public void reduce( SVFTask drt ) {}
  }

  // --------------------------------------------------------------------------
  // We want to do a standard MRTask with a collection of file-keys (so the
  // files are parsed in parallel across the cluster), but we want to throttle
  // the parallelism on each node.
  private static class MultiFileParseTask extends MRTask<MultiFileParseTask> {
    // TOO_MANY_KEYS_COUNT specifies when to disable parallel parse. We want to cover a scenario when
    // we are working with too many keys made of small files - in this case the distributed parse
    // doesn't work well because of the way chunks are distributed to nodes. We should switch to a local
    // parse to make sure the work is uniformly distributed across the whole cluster.
    private static final int TOO_MANY_KEYS_COUNT = 128;
    // A file is considered to be small if it can fit into <SMALL_FILE_NCHUNKS> number of chunks.
    private static final int SMALL_FILE_NCHUNKS = 10;

    private final ParseSetup _parseSetup; // The expected column layout
    private final VectorGroup _vg;    // vector group of the target dataset
    private final int _vecIdStart;    // Start of available vector keys
    // Shared against all concurrent unrelated parses, a map to the node-local
    // categorical lists for each concurrent parse.
    private static NonBlockingHashMap<Key, Categorical[]> _categoricals = new NonBlockingHashMap<>();
    // The Key used to sort out *this* parse's Categorical[]
    private final Key _cKey = Key.make();
    // Eagerly delete Big Data
    private final boolean _deleteOnDone;
    // Mapping from Chunk# to node index holding the initial category mappings.
    // It is either self for all the non-parallel parses, or the Chunk-home for parallel parses.
    private int[] _chunk2ParseNodeMap;
    // Job Key, to unlock & remove raw parsed data; to report progress
    private final Key<Job> _jobKey;
    // A mapping of Key+ByteVec to rolling total Chunk counts.
    private final int[]  _fileChunkOffsets;

    // OUTPUT fields:
    FVecParseWriter[] _dout;

    int _reservedKeys;
    private ParseWriter.ParseErr[] _errors = new ParseWriter.ParseErr[0];

    MultiFileParseTask(VectorGroup vg,  ParseSetup setup, Key<Job> jobKey, Key[] fkeys, boolean deleteOnDone ) {
      _vg = vg; _parseSetup = setup;
      _vecIdStart = _vg.reserveKeys(_reservedKeys = _parseSetup._parse_type.equals(SVMLight_INFO) ? 100000000 : setup._number_columns);
      _deleteOnDone = deleteOnDone;
      _jobKey = jobKey;
      // A mapping of Key+ByteVec to rolling total Chunk counts.
      _fileChunkOffsets = new int[fkeys.length];
      int len = 0;
      for( int i = 0; i < fkeys.length; ++i ) {
        _fileChunkOffsets[i] = len;
        len += getByteVec(fkeys[i]).nChunks();
      }

      // Mapping from Chunk# to cluster-node-number
      _chunk2ParseNodeMap = MemoryManager.malloc4(len);
      Arrays.fill(_chunk2ParseNodeMap, -1);
    }

    private AppendableVec [] _vecs;

    @Override public void postGlobal(){
      Log.trace("Begin file parse cleanup.");
      // Compress nulls out of _dout array
      int n=0;
      for( int i=0; i<_dout.length; i++ )
        if( _dout[i] != null ) _dout[n++] = _dout[i];
      if( n < _dout.length )  _dout = Arrays.copyOf(_dout,n);
      // Fast path: only one Vec result, so never needs to have his Chunks renumbered
      if(_dout.length == 1) {
        _vecs = _dout[0]._vecs;
        return;
      }
      int nchunks = 0;          // Count chunks across all Vecs
      int nCols = 0;            // SVMLight special: find max columns
      for( FVecParseWriter dout : _dout ) {
        nchunks += dout._vecs[0]._tmp_espc.length;
        nCols = Math.max(dout._vecs.length,nCols);
      }
      // One Big Happy Shared ESPC
      long[] espc = MemoryManager.malloc8(nchunks);
      // AppendableVecs that are sized across the sum of all files.
      // Preallocated a bunch of Keys, but if we didn't get enough (for very
      // wide SVMLight) we need to get more here.
      if( nCols > _reservedKeys ) throw H2O.unimpl();
      AppendableVec[] res = new AppendableVec[nCols];
      if(_parseSetup._parse_type.equals(SVMLight_INFO)) {
        _parseSetup._number_columns = res.length;
        _parseSetup._column_types = new byte[res.length];
        Arrays.fill(_parseSetup._column_types,Vec.T_NUM);
      }
      for(int i = 0; i < res.length; ++i)
        res[i] = new AppendableVec(_vg.vecKey(_vecIdStart + i), espc, _parseSetup._column_types[i], 0);
      // Load the global ESPC from the file-local ESPCs
      for( FVecParseWriter fvpw : _dout ) {
        AppendableVec[] avs = fvpw._vecs;
        long[] file_local_espc = avs[0]._tmp_espc;
        // Quick assert that all partial AVs in each DOUT are sharing a common chunkOff, and common Vec Keys
        for( int j = 0; j < avs.length; ++j ) {
          assert res[j]._key.equals(avs[j]._key);
          assert avs[0]._chunkOff == avs[j]._chunkOff;
          assert file_local_espc == avs[j]._tmp_espc || Arrays.equals(file_local_espc,avs[j]._tmp_espc);
        }
        System.arraycopy(file_local_espc, 0, espc, avs[0]._chunkOff, file_local_espc.length);
      }

      _vecs = res;
      Log.trace("Finished file parse cleanup.");
    }
    private AppendableVec[] vecs(){ return _vecs; }

    @Override public void setupLocal() {
      _dout = new FVecParseWriter[_keys.length];
    }

    // Fetch out the node-local Categorical[] using _cKey and _categoricals hashtable
    private static Categorical[] categoricals(Key cKey, int ncols) {
      Categorical[] categoricals = _categoricals.get(cKey);
      if( categoricals != null ) return categoricals;
      categoricals = new Categorical[ncols];
      for( int i = 0; i < categoricals.length; ++i ) categoricals[i] = new Categorical();
      _categoricals.putIfAbsent(cKey, categoricals);
      return _categoricals.get(cKey); // Re-get incase lost insertion race
    }

    // Flag all chunk categoricals as being on local (self)
    private void chunksAreLocal( Vec vec, int chunkStartIdx, Key key ) {
      for(int i = 0; i < vec.nChunks(); ++i)
        _chunk2ParseNodeMap[chunkStartIdx + i] = H2O.SELF.index();
      // For Big Data, must delete data as eagerly as possible.
      Iced ice = DKV.get(key).get();
      if( ice==vec ) {
        if(_deleteOnDone) vec.remove();
      } else {
        Frame fr = (Frame)ice;
        if(_deleteOnDone) fr.delete(_jobKey,new Futures()).blockForPending();
        else if( fr._key != null ) fr.unlock(_jobKey);
      }
    }

    private FVecParseWriter makeDout(ParseSetup localSetup, int chunkOff, int nchunks) {
      AppendableVec [] avs = new AppendableVec[localSetup._number_columns];
      final long [] espc = MemoryManager.malloc8(nchunks);
      final byte[] ctypes = localSetup._column_types; // SVMLight only uses numeric types, sparsely represented as a null
      for(int i = 0; i < avs.length; ++i)
        avs[i] = new AppendableVec(_vg.vecKey(i + _vecIdStart), espc, ctypes==null ? /*SVMLight*/Vec.T_NUM : ctypes[i], chunkOff);
      return localSetup._parse_type.equals(SVMLight_INFO)
        ? new SVMLightFVecParseWriter(_vg, _vecIdStart,chunkOff, _parseSetup._chunk_size, avs)
        : new FVecParseWriter(_vg, chunkOff, categoricals(_cKey, localSetup._number_columns), localSetup._column_types, _parseSetup._chunk_size, avs);
    }

    // Called once per file
    @Override public void map( Key key ) {
      if( _jobKey.get().stop_requested() ) return;
      // FIXME: refactor parser setup to be configurable via parser object
      ParseSetup localSetup = (ParseSetup) _parseSetup.clone();
      ByteVec vec = getByteVec(key);
      final int chunkStartIdx = _fileChunkOffsets[_lo];
      Log.trace("Begin a map stage of a file parse with start index " + chunkStartIdx + ".");

      byte[] zips = vec.getFirstBytes();
      ZipUtil.Compression cpr = ZipUtil.guessCompressionMethod(zips);
      if (localSetup._check_header == ParseSetup.HAS_HEADER) //check for header on local file
        localSetup._check_header = localSetup.parser(_jobKey).fileHasHeader(ZipUtil.unzipBytes(zips,cpr, localSetup._chunk_size), localSetup);
      // Parse the file
      try {
        switch( cpr ) {
        case NONE:
          boolean disableParallelParse = (_keys.length > TOO_MANY_KEYS_COUNT) &&
                  (vec.nChunks() <= SMALL_FILE_NCHUNKS) && _parseSetup._parse_type.isStreamParseSupported();
          if( _parseSetup._parse_type.isParallelParseSupported() && (! disableParallelParse)) {
            new DistributedParse(_vg, localSetup, _vecIdStart, chunkStartIdx, this, key, vec.nChunks()).dfork(vec).getResult(false);
            for( int i = 0; i < vec.nChunks(); ++i )
              _chunk2ParseNodeMap[chunkStartIdx + i] = vec.chunkKey(i).home_node().index();
          } else {
            InputStream bvs = vec.openStream(_jobKey);
            _dout[_lo] = streamParse(bvs, localSetup, makeDout(localSetup,chunkStartIdx,vec.nChunks()), bvs);
            _errors = _dout[_lo].removeErrors();
            chunksAreLocal(vec,chunkStartIdx,key);
          }
          break;
        case ZIP: {
          // Zipped file; no parallel decompression;
          InputStream bvs = vec.openStream(_jobKey);
          ZipInputStream zis = new ZipInputStream(bvs);

          if (ZipUtil.isZipDirectory(key)) {  // file is a zip if multiple files
            zis.getNextEntry();          // first ZipEntry describes the directory
          }

          ZipEntry ze = zis.getNextEntry(); // Get the *FIRST* entry
          // There is at least one entry in zip file and it is not a directory.
          if( ze != null && !ze.isDirectory() )
            _dout[_lo] = streamParse(zis,localSetup, makeDout(localSetup,chunkStartIdx,vec.nChunks()), bvs);
            _errors = _dout[_lo].removeErrors();

          zis.close();       // Confused: which zipped file to decompress
          chunksAreLocal(vec,chunkStartIdx,key);
          break;
        }
        case GZIP: {
          InputStream bvs = vec.openStream(_jobKey);
          // Zipped file; no parallel decompression;
          _dout[_lo] = streamParse(new GZIPInputStream(bvs), localSetup, makeDout(localSetup,chunkStartIdx,vec.nChunks()),bvs);
          _errors = _dout[_lo].removeErrors();
          // set this node as the one which processed all the chunks
          chunksAreLocal(vec,chunkStartIdx,key);
          break;
        }
        }
        Log.trace("Finished a map stage of a file parse with start index "+chunkStartIdx+".");
      } catch( IOException ioe ) {
        throw new RuntimeException(ioe);
      } catch (H2OParseException pe0) {
        // Rebuild identical exception and stack trace, but add key to msg
        throw pe0.resetMsg(pe0.getMessage()+" for "+key);
      }
    }

    // Reduce: combine errors from across files.
    // Roll-up other meta data
    @Override public void reduce( MultiFileParseTask mfpt ) {
      assert this != mfpt;
      Log.trace("Begin a reduce stage of a file parse.");

      // Collect & combine columns across files
      if( _dout == null ) _dout = mfpt._dout;
      else if(_dout != mfpt._dout) _dout = ArrayUtils.append(_dout,mfpt._dout);
      if( _chunk2ParseNodeMap == null ) _chunk2ParseNodeMap = mfpt._chunk2ParseNodeMap;
      else if(_chunk2ParseNodeMap != mfpt._chunk2ParseNodeMap) { // we're sharing global array!
        for( int i = 0; i < _chunk2ParseNodeMap.length; ++i ) {
          if( _chunk2ParseNodeMap[i] == -1 ) _chunk2ParseNodeMap[i] = mfpt._chunk2ParseNodeMap[i];
          else assert mfpt._chunk2ParseNodeMap[i] == -1 : Arrays.toString(_chunk2ParseNodeMap) + " :: " + Arrays.toString(mfpt._chunk2ParseNodeMap);
        }
      }
      if(_errors == null)
        _errors = mfpt._errors;
      else if(_errors.length < 20) {
        _errors = ArrayUtils.append(_errors, mfpt._errors);
        if(_errors.length > 20)
          _errors = Arrays.copyOf(_errors,20);
      }
      Log.trace("Finished a reduce stage of a file parse.");
    }

    // ------------------------------------------------------------------------
    // Zipped file; no parallel decompression; decompress into local chunks,
    // parse local chunks; distribute chunks later.
    private FVecParseWriter streamParse(final InputStream is, final ParseSetup localSetup,FVecParseWriter dout, InputStream bvs) throws IOException {
      // All output into a fresh pile of NewChunks, one per column
      Parser p = localSetup.parser(_jobKey);

      // assume 2x inflation rate
      if(localSetup._parse_type.isParallelParseSupported())
        p.streamParseZip(is, dout, bvs);
      else
        p.streamParse(is,dout);
      // Parse all internal "chunks", until we drain the zip-stream dry.  Not
      // real chunks, just flipping between 32K buffers.  Fills up the single
      // very large NewChunk.
      dout.close(_fs);
      return dout;
    }

    // ------------------------------------------------------------------------
    private static class DistributedParse extends MRTask<DistributedParse> {
      private ParseSetup _setup;
      private final int _vecIdStart;
      private final int _startChunkIdx; // for multifile parse, offset of the first chunk in the final dataset
      private final VectorGroup _vg;
      private FVecParseWriter _dout;
      private final Key _cKey;  // Parse-local-categoricals key
      private final Key<Job> _jobKey;
      private transient final MultiFileParseTask _outerMFPT;
      private transient final Key _srckey; // Source/text file to delete on done
      private transient NonBlockingSetInt _visited;
      private transient long [] _espc;
      final int _nchunks;

      DistributedParse(VectorGroup vg, ParseSetup setup, int vecIdstart, int startChunkIdx, MultiFileParseTask mfpt, Key srckey, int nchunks) {
        super(null);
        _vg = vg;
        _setup = setup;
        _vecIdStart = vecIdstart;
        _startChunkIdx = startChunkIdx;
        _outerMFPT = mfpt;
        _cKey = mfpt._cKey;
        _jobKey = mfpt._jobKey;
        _srckey = srckey;
        _nchunks = nchunks;
      }
      @Override public void setupLocal(){
        super.setupLocal();
        _visited = new NonBlockingSetInt();
        _espc = MemoryManager.malloc8(_nchunks);
        _setup = ParserService.INSTANCE.getByInfo(_setup._parse_type).setupLocal(_fr.anyVec(),_setup);
      }
      @Override public void map( Chunk in ) {
        if( _jobKey.get().stop_requested() ) throw new Job.JobCancelledException();
        AppendableVec [] avs = new AppendableVec[_setup._number_columns];
        for(int i = 0; i < avs.length; ++i)
          if (_setup._column_types == null) // SVMLight
            avs[i] = new AppendableVec(_vg.vecKey(_vecIdStart + i), _espc, Vec.T_NUM, _startChunkIdx);
          else
            avs[i] = new AppendableVec(_vg.vecKey(_vecIdStart + i), _espc, _setup._column_types[i], _startChunkIdx);
        // Break out the input & output vectors before the parse loop
        FVecParseReader din = new FVecParseReader(in);
        FVecParseWriter dout;
        // Get a parser
        Parser p = _setup.parser(_jobKey);
        switch(_setup._parse_type.name()) {
        case "ARFF":
        case "CSV":
        case "PARQUET":
          Categorical [] categoricals = categoricals(_cKey, _setup._number_columns);
          dout = new FVecParseWriter(_vg,_startChunkIdx + in.cidx(), categoricals, _setup._column_types,
                  _setup._chunk_size, avs); //TODO: use _setup._domains instead of categoricals
          break;
        case "SVMLight":
          dout = new SVMLightFVecParseWriter(_vg, _vecIdStart, in.cidx() + _startChunkIdx, _setup._chunk_size, avs);
          break;
        case "ORC":  // setup special case for ORC
          Categorical [] orc_categoricals = categoricals(_cKey, _setup._number_columns);
          dout = new FVecParseWriter(_vg, in.cidx() + _startChunkIdx, orc_categoricals, _setup._column_types,
                  _setup._chunk_size, avs);
          break;
        default: // FIXME: should not be default and creation strategy should be forwarded to ParserProvider
          dout = new FVecParseWriter(_vg, in.cidx() + _startChunkIdx, null, _setup._column_types,
                  _setup._chunk_size, avs);
          break;
        }
        p.parseChunk(in.cidx(), din, dout);
        (_dout = dout).close(_fs);
        Job.update(in._len, _jobKey); // Record bytes parsed
        // remove parsed data right away
        freeMem(in);
      }

      /**
       * This marks parsed byteVec chunks as ready to be freed. If this is the second
       * time a chunk has been marked, it is freed. The reason two marks are required
       * is that each chunk parse typically needs to read the remaining bytes of the
       * current row from the next chunk.  Thus each task typically touches two chunks.
       *
       * @param in - chunk to be marked and possibly freed
       */
      private void freeMem(Chunk in) {
        int cidx = in.cidx();
        for(int i=0; i < 2; i++) {  // iterate over this chunk and the next one
          cidx += i;
          if (!_visited.add(cidx)) { // Second visit
            Value v = Value.STORE_get(in.vec().chunkKey(cidx));
            if (v == null || !v.isPersisted()) return; // Not found, or not on disk somewhere
            v.freePOJO();           // Eagerly toss from memory
            v.freeMem();
          }
        }
      }
      @Override public void reduce(DistributedParse dp) {
        _dout.reduce(dp._dout);

      }

      @Override public void postGlobal() {
        super.postGlobal();
        _outerMFPT._dout[_outerMFPT._lo] = _dout;
        if(_dout.hasErrors()) {
          ParseWriter.ParseErr [] errs = _dout.removeErrors();
          for(ParseWriter.ParseErr err:errs)err._file = FileVec.getPathForKey(_srckey).toString();
          Arrays.sort(errs, new Comparator<ParseWriter.ParseErr>() {
            @Override
            public int compare(ParseWriter.ParseErr o1, ParseWriter.ParseErr o2) {
              return (int)(o1._byteOffset - o2._byteOffset);
            }
          });
          _outerMFPT._errors = errs;
        }
        _dout = null;           // Reclaim GC eagerly
        // For Big Data, must delete data as eagerly as possible.
        Value val = DKV.get(_srckey);
        if( val == null ) return;
        Iced ice = val.get();
        if( ice instanceof ByteVec ) {
          if( _outerMFPT._deleteOnDone) ((ByteVec)ice).remove();
        } else {
          Frame fr = (Frame)ice;
          if( _outerMFPT._deleteOnDone) fr.delete(_outerMFPT._jobKey,new Futures()).blockForPending();
          else if( fr._key != null ) fr.unlock(_outerMFPT._jobKey);
        }
      }
    }

    // Find & remove all partially built output chunks & vecs
    Futures onExceptionCleanup(Futures fs) {
      int nchunks = _chunk2ParseNodeMap.length;
      int ncols = _parseSetup._number_columns;
      for( int i = 0; i < ncols; ++i ) {
        Key vkey = _vg.vecKey(_vecIdStart + i);
        Keyed.remove(vkey,fs);
        for( int c = 0; c < nchunks; ++c )
          DKV.remove(Vec.chunkKey(vkey,c),fs);
      }
      cancel(true);
      return fs;
    }
  }

  // ------------------------------------------------------------------------
  // Log information about the dataset we just parsed.
  public static void logParseResults(Frame fr) {
    long numRows = fr.anyVec().length();
    Log.info("Parse result for " + fr._key + " (" + Long.toString(numRows) + " rows):");
    // get all rollups started in parallell, otherwise this takes ages!
    Futures fs = new Futures();
    Vec[] vecArr = fr.vecs();
    for(Vec v:vecArr)  v.startRollupStats(fs);
    fs.blockForPending();

    int namelen = 0;
    for (String s : fr.names()) namelen = Math.max(namelen, s.length());
    String format = " %"+namelen+"s %7s %12.12s %12.12s %12.12s %12.12s %11s %8s %6s";
    Log.info(String.format(format, "ColV2", "type", "min", "max", "mean", "sigma", "NAs", "constant", "cardinality"));
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    for( int i = 0; i < vecArr.length; i++ ) {
      Vec v = vecArr[i];
      boolean isCategorical = v.isCategorical();
      boolean isConstant = v.isConst();
      String CStr = String.format("%"+namelen+"s:", fr.names()[i]);
      String typeStr;
      String minStr;
      String maxStr;
      String meanStr="";
      String sigmaStr="";

      switch( v.get_type() ) {
        case Vec.T_BAD :   typeStr = "all_NA" ;  minStr = "";  maxStr = "";  break;
        case Vec.T_UUID:  typeStr = "UUID"   ;  minStr = "";  maxStr = "";  break;
        case Vec.T_STR :  typeStr = "string" ;  minStr = "";  maxStr = "";  break;
        case Vec.T_NUM :  typeStr = "numeric";
          minStr = String.format("%g", v.min());
          maxStr = String.format("%g", v.max());
          meanStr = String.format("%g", v.mean());
          sigmaStr = String.format("%g", v.sigma());
          break;
        case Vec.T_CAT :  typeStr = "factor" ;  minStr = v.factor(0);  maxStr = v.factor(v.cardinality()-1); break;
        case Vec.T_TIME:  typeStr = "time"   ;  minStr = sdf.format(v.min());  maxStr = sdf.format(v.max());  break;
        default: throw H2O.unimpl();
      }

      long numNAs = v.naCnt();
      String naStr = (numNAs > 0) ? String.format("%d", numNAs) : "";
      String isConstantStr = isConstant ? "constant" : "";
      String numLevelsStr = isCategorical ? String.format("%d", v.domain().length) : "";

      boolean launchedWithHadoopJar = H2O.ARGS.launchedWithHadoopJar();
      boolean printLogSeparatorToStdout = false;
      boolean printColumnToStdout;
      {
        // Print information to stdout for this many leading columns.
        final int MAX_HEAD_TO_PRINT_ON_STDOUT = 10;

        // Print information to stdout for this many trailing columns.
        final int MAX_TAIL_TO_PRINT_ON_STDOUT = 10;

        if (launchedWithHadoopJar) {
          printColumnToStdout = true;
        } else if (vecArr.length <= (MAX_HEAD_TO_PRINT_ON_STDOUT + MAX_TAIL_TO_PRINT_ON_STDOUT)) {
          // For small numbers of columns, print them all.
          printColumnToStdout = true;
        } else if (i < MAX_HEAD_TO_PRINT_ON_STDOUT) {
          printColumnToStdout = true;
        } else if (i == MAX_HEAD_TO_PRINT_ON_STDOUT) {
          printLogSeparatorToStdout = true;
          printColumnToStdout = false;
        } else if ((i + MAX_TAIL_TO_PRINT_ON_STDOUT) < vecArr.length) {
          printColumnToStdout = false;
        } else {
          printColumnToStdout = true;
        }
      }

      if (printLogSeparatorToStdout)
        Log.info("Additional column information only sent to log file...");

      String s = String.format(format, CStr, typeStr, minStr, maxStr, meanStr, sigmaStr, naStr, isConstantStr, numLevelsStr);
      Log.info(s,printColumnToStdout);
    }
    Log.info(FrameUtils.chunkSummary(fr).toString());
  }
  public static class H2OParseException extends RuntimeException {
    public H2OParseException(String msg){super(msg);}
    public H2OParseException(String msg, Throwable cause){super(msg,cause);}
    public H2OParseException(Throwable cause){super(cause);}

    public H2OParseException resetMsg(String msg) {
      H2OParseException pe1 = new H2OParseException(msg,getCause());
      pe1.setStackTrace(getStackTrace());
      return pe1;
    }
  }
}