/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.types.parser; import java.util.HashMap; import java.util.Map; import eu.stratosphere.types.ByteValue; import eu.stratosphere.types.DoubleValue; import eu.stratosphere.types.FloatValue; import eu.stratosphere.types.IntValue; import eu.stratosphere.types.LongValue; import eu.stratosphere.types.ShortValue; import eu.stratosphere.types.StringValue; /** * A FieldParser is used parse a field from a sequence of bytes. Fields occur in a byte sequence and are terminated * by the end of the byte sequence or a delimiter. * <p> * The parsers do not throw exceptions in general, but set an error state. That way, they can be used in functions * that ignore invalid lines, rather than failing on them. * * @param <T> The type that is parsed. */ public abstract class FieldParser<T> { /** * An enumeration of different types of errors that may occur. */ public static enum ParseErrorState { /** No error occurred. */ NONE, /** The domain of the numeric type is not large enough to hold the parsed value. */ NUMERIC_VALUE_OVERFLOW_UNDERFLOW, /** A stand-alone sign was encountered while parsing a numeric type. */ NUMERIC_VALUE_ORPHAN_SIGN, /** An illegal character was encountered while parsing a numeric type. */ NUMERIC_VALUE_ILLEGAL_CHARACTER, /** The field was not in a correct format for the numeric type. */ NUMERIC_VALUE_FORMAT_ERROR, /** A quoted string was not terminated until the line end. */ UNTERMINATED_QUOTED_STRING, /** The parser found characters between the end of the quoted string and the delimiter. */ UNQUOTED_CHARS_AFTER_QUOTED_STRING } private ParseErrorState errorState = ParseErrorState.NONE; /** * Parses the value of a field from the byte array. * The start position within the byte array and the array's valid length is given. * The content of the value is delimited by a field delimiter. * * @param bytes The byte array that holds the value. * @param startPos The index where the field starts * @param limit The limit unto which the byte contents is valid for the parser. The limit is the * position one after the last valid byte. * @param delim Field delimiter character * @param reuse The an optional reusable field to hold the value * * @return The index of the next delimiter, if the field was parsed correctly. A value less than 0 otherwise. */ public abstract int parseField(byte[] bytes, int startPos, int limit, char delim, T reuse); /** * Gets the parsed field. This method returns the value parsed by the last successful invocation of * {@link #parseField(byte[], int, int, char, Object)}. It objects are mutable and reused, it will return * the object instance that was passed the the parse function. * * @return The latest parsed field. */ public abstract T getLastResult(); /** * Returns an instance of the parsed value type. * * @return An instance of the parsed value type. */ public abstract T createValue(); /** * Sets the error state of the parser. Called by subclasses of the parser to set the type of error * when failing a parse. * * @param error The error state to set. */ protected void setErrorState(ParseErrorState error) { this.errorState = error; } /** * Gets the error state of the parser, as a value of the enumeration {@link ParseErrorState}. * If no error occurred, the error state will be {@link ParseErrorState#NONE}. * * @return The current error state of the parser. */ public ParseErrorState getErrorState() { return this.errorState; } // -------------------------------------------------------------------------------------------- // Mapping from types to parsers // -------------------------------------------------------------------------------------------- /** * Gets the parser for the type specified by the given class. Returns null, if no parser for that class * is known. * * @param type The class of the type to get the parser for. * @return The parser for the given type, or null, if no such parser exists. */ public static <T> Class<FieldParser<T>> getParserForType(Class<T> type) { Class<? extends FieldParser<?>> parser = PARSERS.get(type); if (parser == null) { return null; } else { @SuppressWarnings("unchecked") Class<FieldParser<T>> typedParser = (Class<FieldParser<T>>) parser; return typedParser; } } private static final Map<Class<?>, Class<? extends FieldParser<?>>> PARSERS = new HashMap<Class<?>, Class<? extends FieldParser<?>>>(); static { // basic types PARSERS.put(Byte.class, ByteParser.class); PARSERS.put(Short.class, ShortParser.class); PARSERS.put(Integer.class, IntParser.class); PARSERS.put(Long.class, LongParser.class); PARSERS.put(String.class, AsciiStringParser.class); PARSERS.put(Float.class, FloatParser.class); PARSERS.put(Double.class, DoubleParser.class); // value types PARSERS.put(ByteValue.class, DecimalTextByteParser.class); PARSERS.put(ShortValue.class, DecimalTextShortParser.class); PARSERS.put(IntValue.class, DecimalTextIntParser.class); PARSERS.put(LongValue.class, DecimalTextLongParser.class); PARSERS.put(StringValue.class, VarLengthStringParser.class); PARSERS.put(FloatValue.class, DecimalTextFloatParser.class); PARSERS.put(DoubleValue.class, DecimalTextDoubleParser.class); } }