package water.parser;
import org.apache.commons.math3.analysis.function.Abs;
import java.util.List;
import water.Job;
import water.Key;
import water.fvec.ByteVec;
import water.util.Log;
/**
* Default parsers provided by H2O.
*
* The parser are registered via service providers interface into
* <code>{@link ParserService}</code>.
*/
public final class DefaultParserProviders {
/** Default parser handles */
public static final ParserInfo ARFF_INFO = new ParserInfo("ARFF", 0, true);
public static final ParserInfo XLS_INFO = new ParserInfo("XLS", 100, false);
public static final ParserInfo XLSX_INFO = new ParserInfo("XLSX", 102, false);
public static final ParserInfo SVMLight_INFO = new ParserInfo("SVMLight", 1000, true);
public static final ParserInfo CSV_INFO = new ParserInfo("CSV", Integer.MAX_VALUE, true);
public static final ParserInfo GUESS_INFO = new ParserInfo("GUESS", -10000, false);
/** Priority of non-core parsers should begin here.*/
public static final int MAX_CORE_PRIO = 10000;
public final static class ArffParserProvider extends AbstractParserProvide {
@Override
public ParserInfo info() {
return ARFF_INFO;
}
@Override
public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
return new ARFFParser(setup, jobKey);
}
@Override
public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
int checkHeader, String[] columnNames, byte[] columnTypes,
String[][] domains, String[][] naStrings) {
return ARFFParser.guessSetup(bits, sep, singleQuotes, columnNames, naStrings);
}
}
public final static class XlsParserProvider extends AbstractParserProvide {
@Override
public ParserInfo info() {
return XLS_INFO;
}
@Override
public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
return new XlsParser(setup, jobKey);
}
@Override
public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
int checkHeader, String[] columnNames, byte[] columnTypes,
String[][] domains, String[][] naStrings) {
return XlsParser.guessSetup(bits);
}
}
public final static class SVMLightParserProvider extends AbstractParserProvide {
@Override
public ParserInfo info() {
return SVMLight_INFO;
}
@Override
public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
return new SVMLightParser(setup, jobKey);
}
@Override
public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
int checkHeader, String[] columnNames, byte[] columnTypes,
String[][] domains, String[][] naStrings) {
return SVMLightParser.guessSetup(bits);
}
}
public final static class CsvParserProvider extends AbstractParserProvide {
@Override
public ParserInfo info() {
return CSV_INFO;
}
@Override
public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
return new CsvParser(setup, jobKey);
}
@Override
public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
int checkHeader, String[] columnNames, byte[] columnTypes,
String[][] domains, String[][] naStrings) {
return CsvParser.guessSetup(bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, naStrings);
}
}
public final static class GuessParserProvider extends AbstractParserProvide {
@Override
public ParserInfo info() {
return GUESS_INFO;
}
@Override
public Parser createParser(ParseSetup setup, Key<Job> jobKey) {
throw new UnsupportedOperationException("Guess parser provided does not know how to create a new parser! Use a specific parser!");
}
@Override
public ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, int ncols, boolean singleQuotes,
int checkHeader, String[] columnNames, byte[] columnTypes,
String[][] domains, String[][] naStrings) {
List<ParserProvider> pps = ParserService.INSTANCE.getAllProviders(true); // Sort them based on priorities
for (ParserProvider pp : pps) {
// Do not do recursive call
if (pp == this || pp.info().equals(GUESS_INFO)) continue;
// Else try to guess with given provider
try {
ParseSetup ps = pp.guessSetup(bv, bits, sep, ncols, singleQuotes, checkHeader, columnNames, columnTypes, domains, naStrings);
if( ps != null) {
return ps;
}
} catch( Throwable ignore ) {
/*ignore failed parse attempt*/
Log.trace("Guesser failed for parser type", pp.info(), ignore);
}
}
throw new ParseDataset.H2OParseException("Cannot determine file type.");
}
}
static abstract class AbstractParserProvide extends ParserProvider {
@Override
public ParseSetup createParserSetup(Key[] inputs, ParseSetup requiredSetup) {
return requiredSetup;
}
}
}