package prefuse.data.parser; import java.util.Arrays; /** * Factory class that maintains a collection of parser instances and returns * the appropriate parser based on a history of samples presented to the * factory. The {@link #sample(String)} method takes a text string and tests * it against all available parsers, updating whether or not the parsers can * successfully parse the value. This method is used in a more automated * fashion by the {@link TypeInferencer} class. * * @author <a href="http://jheer.org">jeffrey heer</a> * @see TypeInferencer */ public class ParserFactory implements Cloneable { private static final DataParser[] DEFAULT_PARSERS = new DataParser[] { new IntParser(), new LongParser(), new DoubleParser(), new FloatParser(), new BooleanParser(), new ColorIntParser(), new DateParser(), new TimeParser(), new DateTimeParser(), new IntArrayParser(), new LongArrayParser(), new FloatArrayParser(), new DoubleArrayParser(), new StringParser() }; private static ParserFactory DEFAULT_FACTORY = new ParserFactory(DEFAULT_PARSERS); private DataParser[] m_parsers; private boolean[] m_isCandidate; /** * Returns the default parser factory. The default factory tests for the * following data types (in the provided order of precedence): * int, long, double, float, boolean, Date, Time, DateTime, String. * @return the default parser factory. */ public static ParserFactory getDefaultFactory() { return DEFAULT_FACTORY; } /** * Sets the default parser factory. This factory will be used by default * by all readers to parse data values. * @param factory the new default parser factory. */ public static void setDefaultFactory(ParserFactory factory) { DEFAULT_FACTORY = factory; } /** * Constructor. Uses a default collection of parsers, testing for the * following data type in the followinf order of precedence: * int, long, double, float, boolean, Date, Time, DateTime, String. */ public ParserFactory() { this(DEFAULT_PARSERS); } /** * @see java.lang.Object#clone() */ public Object clone() { return new ParserFactory(m_parsers); } /** * <p>Constructor. Takes an array of parsers to test. After creating this * instance, sample data values can be passed in using the * <code>sample()</code> method, and this class will check the sample * against the parsers, computing which parsers can successfully parse the * sample. This process of elimination disregards inappropriate parsers. * After a series of samples, the <code>getParser()</code> * method can be used to retrieve the highest ranking candidate parser. * </p> * * <p> * If no parser can parse all samples, a null value will be returned by * getParser(). For this reason, it is recommended to always use a * StringParser as the last element of the input array, as it is guaranteed * to always parse successfully (by simply returning its input String). * </p> * * <p> * The ordering of parsers in the array is taken to be the desired order * of precendence of the parsers. For example, if both parser[0] and * parser[2] can parse all the available samples, parser[0] will be * returned. * </p> * @param parsers the input DataParsers to use. */ public ParserFactory(DataParser[] parsers) { // check integrity of input for ( int i=0; i<parsers.length; ++i ) { if ( parsers[i] == null ) { throw new IllegalArgumentException( "Input parsers must be non-null"); } } // initialize member variables m_parsers = parsers; m_isCandidate = new boolean[m_parsers.length]; reset(); } /** * Reset the candidate parser settings, making each parser * equally likely. */ protected void reset() { Arrays.fill(m_isCandidate, true); } /** * Sample a data value against the parsers, updating the * parser candidates. * @param val the String value to sample */ protected void sample(String val) { for ( int i=0; i<m_parsers.length; ++i ) { if ( m_isCandidate[i] ) { m_isCandidate[i] = m_parsers[i].canParse(val); } } } /** * Returns the highest ranking parser that successfully can * parse all the input samples viewed by this instance. If * no such parser exists, a null value is returned. * @return the highest-ranking data parser, or null if none */ protected DataParser getParser() { for ( int i=0; i<m_parsers.length; ++i ) { if ( m_isCandidate[i] ) { return m_parsers[i]; } } return null; } /** * Returns a parser for the specified data type. * @param type the Class for the data type to parse * @return a parser for the given data type, or null * if no such parser can be found. */ public DataParser getParser(Class type) { for ( int i=0; i<m_parsers.length; ++i ) { if ( m_parsers[i].getType().equals(type) ) { return m_parsers[i]; } } return null; } /** * Analyzes the given array of String values to determine an * acceptable parser data type. * @param data an array of String values to parse * @param startRow the row from which to begin analyzing the * data array, allowing header rows to be excluded. * @return the appropriate parser for the inferred data type, * of null if none. */ public DataParser getParser(String[] data, int startRow) { return getParser(new String[][] { data }, 0, startRow); } /** * Analyzes a column of the given array of String values to * determine an acceptable parser data type. * @param data an 2D array of String values to parse * @param col an index for the column to process * @param startRow the row from which to begin analyzing the * data array, allowing header rows to be excluded. * @return the appropriate parser for the inferred data type, * of null if none. */ public DataParser getParser(String[][] data, int col, int startRow) { // sanity check input if ( data == null || data.length == 0 ) return null; int nrows = data.length; // analyze each column in turn this.reset(); for ( int row=startRow; row<nrows; ++row ) { this.sample(data[row][col]); } DataParser parser = getParser(); return parser; } } // end of class ParserFactory