TabLoader.java example

Explorer
msInspect-master
/*
 * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fhcrc.cpl.toolbox.filehandler;

// UNDONE: should probably be in package org.labkey.common.util

import junit.framework.Test;
import junit.framework.TestFailure;
import junit.framework.TestResult;
import junit.framework.TestSuite;
import org.apache.commons.beanutils.*;
import org.apache.commons.collections.Transformer;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.datastructure.FloatArray;
import org.fhcrc.cpl.toolbox.datastructure.DoubleArray;
import org.fhcrc.cpl.toolbox.datastructure.IntegerArray;
import org.fhcrc.cpl.toolbox.datastructure.DoubleArray;
import org.fhcrc.cpl.toolbox.datastructure.FloatArray;
import org.fhcrc.cpl.toolbox.datastructure.IntegerArray;

import java.beans.PropertyDescriptor;
import java.io.*;
import java.lang.reflect.Array;
import java.lang.reflect.Method;
import java.util.*;


/**
 * TabLoader will load tab-delimited text into an array of objects.
 * Client can specify a bean class to load the objects into. If the class is java.util.Map
 * an Array
 * <p/>
 * NOTE: If a loader is been used to load an array of maps you should NOT change the column descriptors.
 * A single set of column descriptors is used to key all the maps. (ISSUE: Should probably use
 * ArrayListMap or clone the column desciptors instead).
 * <p/>
 * UNDONE: Would like to overflow bean properties into a map if the bean also implements map.
 * <p/>
 * UNDONE: Should probably integrate in some way with ObjectFactory
 * <p/>
 * User: migra
 * Date: Jun 28, 2004
 * Time: 2:25:19 PM
 */
public class TabLoader
{
    static Logger _log = Logger.getLogger(TabLoader.class);

    // source data
    private File _file = new File("Resource");
    private String _stringData = null;
    private Reader _reader;

    // CONSIDER: explicit flags for hasHeaders, inferHeaders, skipLines etc.
    protected int _skipLines = -1;      // -1 means infer headers
    protected int _scanAheadLineCount = 20; // number of lines to scan trying to infer data types
    protected ColumnDescriptor[] _columns;
    protected boolean _columnsInitialized = false;
    protected Class _returnElementClass = java.util.Map.class;
    /* this is a little hokey - it makes some later code work without mods */
    private Map<Object, Integer> _colMap = new HashMap<Object, Integer>();
    private Map<String, String> _comments = new HashMap<String, String>();
    private boolean _lowerCaseHeaders;

    protected char _chDelimiter = '\t';
    protected String _strDelimiter = null;
    protected boolean _parseQuotes = false;
    protected boolean _throwOnErrors = false;

    private Transformer _transformer = null;


    public TabLoader(File inputFile) throws IOException
    {
        setSource(inputFile);
    }


    public TabLoader(Reader reader, boolean hasColumnHeaders, Class returnClass)
    {
        if (returnClass != null)
            _returnElementClass = returnClass;

        setSource(reader);
        _skipLines = hasColumnHeaders ? 1 : 0;
    }


    public TabLoader(Reader reader, boolean hasColumnHeaders)
    {
        this(reader, hasColumnHeaders, null);
    }


    // infer whether there are columnHeaders
    public TabLoader(Reader reader)
    {
        this(reader, false, null);
    }


    public TabLoader(String src, boolean hasColumnHeaders)
    {
        _skipLines = hasColumnHeaders ? 1 : 0;
        setSource(src);
    }


    public TabLoader(String src)
    {
        setSource(src);
    }


    public TabLoader(File inputFile, int skipLines)
            throws IOException
    {
        setSource(inputFile);
        this._skipLines = skipLines;
    }


    public TabLoader(File inputFile, Class returnObjectClass)
            throws IOException
    {
        setSource(inputFile);
        _returnElementClass = returnObjectClass;
    }


    public TabLoader(File inputFile, int skipLines, Class returnObjectClass, ColumnDescriptor[] columns)
            throws IOException
    {
        _returnElementClass = returnObjectClass;
        setSource(inputFile);
        _skipLines = skipLines;
        _returnElementClass = returnObjectClass;
        _columns = columns;
    }


    public TabLoader(Reader reader, int skipLines, Class returnObjectClass, ColumnDescriptor[] columns)
    {
        _returnElementClass = returnObjectClass;
        setSource(reader);
        _skipLines = skipLines;
        _returnElementClass = returnObjectClass;
        _columns = columns;
    }


    protected void setSource(File inputFile) throws IOException
    {
        _file = inputFile;
        if (!_file.exists())
            throw new FileNotFoundException(_file.getPath());
        if (!_file.canRead())
            throw new IOException("Can't read file: " + _file.getPath());
    }


    protected void setSource(Reader reader)
    {
        if (reader.markSupported())
            _reader = reader;
        else
            _reader = new BufferedReader(reader);

        try
        {
            // shouldn't throw as we checked markSupported
            _reader.mark(1024 * 1024);
        }
        catch (IOException x)
        {
            throw new RuntimeException(x);
        }
    }


    protected void setSource(String src)
    {
        _stringData = src;
    }


    protected BufferedReader getReader() throws IOException
    {
        if (null != _reader)
        {
            // We don't close handed in readers
            _reader.reset();
            return new BufferedReader(_reader)
            {
                public void close()
                {
                }
            };
        }

        if (null != _stringData)
            return new BufferedReader(new StringReader(_stringData));

        return new BufferedReader(new FileReader(_file));
    }


    public void setLowerCaseHeaders(boolean lowerCaseHeaders)
    {
        _lowerCaseHeaders = lowerCaseHeaders;
    }


    public Map getComments()
    {
        //noinspection unchecked
        return Collections.unmodifiableMap(_comments);
    }


    private void prepareColumnInfo(BufferedReader reader) throws IOException
    {
        //Take our best guess since some columns won't map
        if (null == _columns)
            inferColumnInfo(reader);

        if (null != _returnElementClass)
            initColumnInfos(_returnElementClass);
    }


    private static Class[] convertClasses = new Class[]{Date.class, Integer.class, Double.class, String.class};

    /**
     * Look at first 5 lines of the file and infer col names, data types.
     * Most useful if maps are being returned, otherwise use inferColumnInfo(reader, clazz) to
     * use properties of a bean instead.
     *
     * @param reader
     * @throws IOException
     */
    private void inferColumnInfo(BufferedReader reader) throws IOException
    {
        reader.mark(4096 * _scanAheadLineCount);

        String[] lines = new String[_scanAheadLineCount + Math.max(_skipLines, 0)];
        int i;
        for (i = 0; i < lines.length;)
        {
            String line = reader.readLine();
            if (null == line)
                break;
            if (line.length() == 0 || line.charAt(0) == '#')
                continue;
            lines[i++] = line;
        }
        int nLines = i;
        reader.reset();
        if (nLines == 0)
        {
            _columns = new ColumnDescriptor[0];
            return;
        }

        int nCols = 0;
        String[][] lineFields = new String[nLines][];
        for (i = 0; i < nLines; i++)
        {
            lineFields[i] = parseLine(lines[i]);
            nCols = Math.max(nCols, lineFields[i].length);
        }

        ColumnDescriptor[] colDescs = new ColumnDescriptor[nCols];
        for (i = 0; i < nCols; i++)
            colDescs[i] = new ColumnDescriptor();

        //Try to infer types
        int inferStartLine = _skipLines == -1 ? 1 : _skipLines;
        for (int f = 0; f < nCols; f++)
        {
            int classIndex = -1;
            for (int line = inferStartLine; line < nLines; line++)
            {
                if (f >= lineFields[line].length)
                    continue;
                String field = lineFields[line][f];
                if ("".equals(field))
                    continue;

                for (int c = Math.max(classIndex, 0); c < convertClasses.length; c++)
                {
                    //noinspection EmptyCatchBlock
                    try
                    {
                        Object o = ConvertUtils.convert(field, convertClasses[c]);
                        //We found a type that works. If it is more general than
                        //what we had before, we must use it.
                        if (o != null && c > classIndex)
                            classIndex = c;
                        break;
                    }
                    catch (Exception x)
                    {
                    }
                }
            }
            colDescs[f].clazz = classIndex == -1 ? String.class : convertClasses[classIndex];
        }


        //If first line is compatible type for all fields, AND all fields not Strings (dhmay adding 20100502)
        // then there is no header row
        if (_skipLines == -1)
        {
            boolean firstLineCompat = true;
            boolean allStrings = true;
            String[] fields = lineFields[0];
            for (int f = 0; f < nCols; f++)
            {
                if ("".equals(fields[f]))
                    continue;
                if (colDescs[f].clazz.equals(Integer.TYPE) || colDescs[f].clazz.equals(Double.TYPE) ||
                     colDescs[f].clazz.equals(Float.TYPE))
                    allStrings = false;
                try
                {
                    Object o = ConvertUtils.convert(fields[f], colDescs[f].clazz);
                    if (null == o)
                    {
                        firstLineCompat = false;
                        break;
                    }
                }
                catch (Exception x)
                {
                    firstLineCompat = false;
                    break;
                }
            }
            if (firstLineCompat && !allStrings)
                _skipLines = 0;
            else
                _skipLines = 1;
        }

        if (_skipLines > 0)
        {
            String[] headers = lineFields[_skipLines - 1];
            for (int f = 0; f < nCols; f++)
                colDescs[f].name = (f >= headers.length || "".equals(headers[f])) ? "column" + f : headers[f];
        }
        else
        {
            for (int f = 0; f < colDescs.length; f++)
            {
                ColumnDescriptor colDesc = colDescs[f];
                colDesc.name = "column" + f;
            }
        }

        _columns = colDescs;
    }


    private void initColumnInfos(Class clazz)
    {
        PropertyDescriptor origDescriptors[] = PropertyUtils.getPropertyDescriptors(clazz);
        HashMap<String, PropertyDescriptor> mappedPropNames = new HashMap<String, PropertyDescriptor>();
        for (PropertyDescriptor origDescriptor : origDescriptors)
        {
            if (origDescriptor.getName().equals("class"))
                continue;

            mappedPropNames.put(origDescriptor.getName().toLowerCase(), origDescriptor);
        }

        boolean isMapClass = java.util.Map.class.isAssignableFrom(clazz);
        for (ColumnDescriptor column : _columns)
        {
            PropertyDescriptor prop = mappedPropNames.get(column.name.toLowerCase());
            if (null != prop)
            {
                column.name = prop.getName();
                column.clazz = prop.getPropertyType();
                column.isProperty = true;
                column.setter = prop.getWriteMethod();
                if (column.clazz.isPrimitive())
                {
                    if (Float.TYPE.equals(column.clazz))
                        column.missingValues = 0.0F;
                    else if (Double.TYPE.equals(column.clazz))
                        column.missingValues = 0.0;
                    else if (Boolean.TYPE.equals(column.clazz))
                        column.missingValues = Boolean.FALSE;
                    else
                        column.missingValues = 0; //Will get converted.
                }
            }
            else if (isMapClass)
            {
                column.isProperty = false;
            }
            else
            {
                column.load = false;
            }
        }
    }


    /**
     * Returns an array of objects one for each non-header row of the file.
     * By default the objects are maps, but may be java beans.
     */
    public Object[] load() throws IOException
    {
        getColumns();

        List<Object> rowList = new ArrayList<Object>();
        Iterator it = new TabLoaderIterator();
        while (it.hasNext())
            rowList.add(it.next());

        Object[] oarr = rowList.toArray((Object[]) Array.newInstance(_returnElementClass, rowList.size()));
        return oarr;
    }


    public Object[] loadColsAsArrays() throws IOException
    {
        initColNameMap();
        ColumnDescriptor[] columns = getColumns();
        Object[] valueLists = new Object[columns.length];

        for (int i = 0; i < valueLists.length; i++)
        {
            if (!columns[i].load)
                continue;

            Class clazz = columns[i].clazz;
            if (clazz.isPrimitive())
            {
                if (clazz.equals(Double.TYPE))
                    valueLists[i] = new DoubleArray();
                else if (clazz.equals(Float.TYPE))
                    valueLists[i] = new FloatArray();
                else if (clazz.equals(Integer.TYPE))
                    valueLists[i] = new IntegerArray();
            }
            else
            {
                valueLists[i] = new ArrayList();
            }
        }

        BufferedReader reader = null;
        try
        {
            reader = getReader();
            int line = 0;

            String s;
            for (int skip = 0; skip < _skipLines; skip++)
            {
                //noinspection UnusedAssignment
                s = reader.readLine();
                line++;
            }

            while ((s = reader.readLine()) != null)
            {
                line++;
                if ("".equals(s.trim()))
                    continue;

                String[] fields = parseLine(s);
                for (int i = 0; i < fields.length && i < columns.length; i++)
                {
                    if (!columns[i].load)
                        continue;

                    String value = fields[i];

                    Class clazz = columns[i].clazz;
                    if (clazz.isPrimitive())
                    {
                        if (clazz.equals(Double.TYPE))
                            ((DoubleArray) valueLists[i]).add(Double.parseDouble(value));
                        else if (clazz.equals(Float.TYPE))
                            ((FloatArray) valueLists[i]).add(Float.parseFloat(value));
                        else if (clazz.equals(Integer.TYPE))
                            ((IntegerArray) valueLists[i]).add(Integer.parseInt(value));
                    }
                    else
                    {
                        try
                        {
                            if ("".equals(value))
                                ((List<Object>) valueLists[i]).add(columns[i].missingValues);
                            else
                                ((List<Object>) valueLists[i]).add(ConvertUtils.convert(value, columns[i].clazz));
                        }
                        catch (Exception x)
                        {
                            if (_throwOnErrors)
                                throw new ConversionException("Conversion error: line " + line + " column " + (i+ 1), x);

                            ((List<Object>) valueLists[i]).add(columns[i].errorValues);
                        }
                    }

                }
            }
        }
        finally
        {
            if (null != reader)
                reader.close();
        }

        Object[] returnArrays = new Object[columns.length];
        for (int i = 0; i < columns.length; i++)
        {
            if (!columns[i].load)
                continue;

            Class clazz = columns[i].clazz;
            if (clazz.isPrimitive())
            {
                if (clazz.equals(Double.TYPE))
                    returnArrays[i] = ((DoubleArray) valueLists[i]).toArray(null);
                else if (clazz.equals(Float.TYPE))
                    returnArrays[i] = ((FloatArray) valueLists[i]).toArray(null);
                else if (clazz.equals(Integer.TYPE))
                    returnArrays[i] = ((IntegerArray) valueLists[i]).toArray(null);
            }
            else
            {
                Object[] values = (Object[]) Array.newInstance(columns[i].clazz, ((List) valueLists[i]).size());
                returnArrays[i] = ((List<Object>) valueLists[i]).toArray(values);
            }
        }

        return returnArrays;
    }


    /**
     * called for non-quoted strings
     * you could argue that TAB delimited string shouldn't have white space stripped, but
     * we always strip.
     */
    protected static String parseValue(String value)
    {
        value = StringUtils.trimToEmpty(value);
        if ("\\N".equals(value))
            return "";
        return value;
    }

    private ArrayList<String> listParse = new ArrayList<String>(30);


    /**
     * Note we don't handled values with embedded newlines
     *
     * @param s
     */
    protected String[] parseLine(String s)
    {
        if (!_parseQuotes)
        {
            if (_strDelimiter == null)
                _strDelimiter = new String(new char[]{_chDelimiter});
            String[] fields = s.split(_strDelimiter);
            for (int i = 0; i < fields.length; i++)
                fields[i] = parseValue(fields[i]);
            return fields;
        }

        s = s.trim();

        String field;
        int length = s.length();
        int start = 0;
        listParse.clear();
        while (start < length)
        {
            int end;
            char ch = s.charAt(start);
            if (ch == _chDelimiter)
            {
                end = start;
                field = "";
            }
            else if (ch == '"')
            {
                end = start;
                boolean hasQuotes = false;
                while (true)
                {
                    end = s.indexOf('"', end + 1);
                    if (end == -1)
                        throw new IllegalArgumentException("CSV can't parse line: " + s);
                    if (end == s.length() - 1 || s.charAt(end + 1) != '"')
                        break;
                    hasQuotes = true;
                    end++; // skip double ""
                }
                field = s.substring(start + 1, end);
                if (hasQuotes)
                    field = field.replaceAll("\"\"", "\"");
                // eat final " and any trailing white space
                end++;
                while (end < length && s.charAt(end) != _chDelimiter && Character.isWhitespace(s.charAt(end)))
                    end++;
            }
            else
            {
                end = s.indexOf(_chDelimiter, start);
                if (end == -1)
                    end = s.length();
                field = s.substring(start, end);
                field = parseValue(field);
            }
            listParse.add(field);

            // there should be a comma or an EOL here
            if (end < length && s.charAt(end) != _chDelimiter)
                throw new IllegalArgumentException("CSV can't parse line: " + s);
            end++;
            while (end < length && s.charAt(end) != _chDelimiter && Character.isWhitespace(s.charAt(end)))
                end++;
            start = end;
        }
        return listParse.toArray(new String[listParse.size()]);
    }


    public TabLoaderIterator iterator() throws IOException
    {
        TabLoaderIterator retVal = new TabLoader.TabLoaderIterator();
        return retVal;
    }


    private void initColNameMap() throws IOException
    {
        ColumnDescriptor[] columns = getColumns();
        for (int i = 0; i < columns.length; i++)
        {
            String colName = _lowerCaseHeaders ? columns[i].name.toLowerCase() : columns[i].name;
            _colMap.put(colName, i);
        }
    }

    /**
     * Load a tab delimited file into an array of arrays.
     * Every column in the file is loaded.
     * Number of columns in the file must be >= number of classes
     * Blank lines are skipped. Missing intrinsic values are converted to 0
     *
     * @param r
     * @param types
     * @param skipRows
     * @return
     * @throws IOException
     */
    public static Object[] loadColumnArrays(Reader r, Class[] types, int skipRows) throws IOException
    {
        ColumnDescriptor[] colDescs = new ColumnDescriptor[types.length];
        for (int i = 0; i < colDescs.length; i++)
        {
            ColumnDescriptor desc = new ColumnDescriptor();
            desc.clazz = types[i];
            desc.load = true;
            colDescs[i] = desc;
        }
        TabLoader loader = new TabLoader(r, skipRows, null, colDescs);
        Object[] arrays = loader.loadColsAsArrays();
        return arrays;
    }

    public static Object[] loadObjects(File file, Class returnClass) throws Exception
    {
        TabLoader loader = new TabLoader(file, returnClass);
        Object[] objects = loader.load();
        return objects;
    }

    public static Map[] loadMaps(File file) throws Exception
    {
        TabLoader loader = new TabLoader(file);
        return (Map[]) loader.load();
    }

    public int getSkipLines()
    {
        return _skipLines;
    }

    /**
     * @param skipLines -1 means infer headers, 0 means no headers, and 1 means there is one header line
     */
    public void setSkipLines(int skipLines)
    {
        this._skipLines = skipLines;
    }


    public ColumnDescriptor[] getColumns() throws IOException
    {
        if (!this._columnsInitialized)
        {
            BufferedReader r = null;
            try
            {
                r = getReader();
                prepareColumnInfo(r);
            }
            finally
            {
                if (null != r)
                    r.close();
            }
            _columnsInitialized = true;
        }
        return _columns;
    }


    public void setColumns(ColumnDescriptor[] columns)
    {
        this._columns = columns;
    }

    public Class getReturnElementClass()
    {
        return _returnElementClass;
    }

    public void setReturnElementClass(Class returnElementClass)
    {
        this._returnElementClass = returnElementClass;
    }


    public Transformer getTransformer()
    {
        return _transformer;
    }


    public void setTransformer(Transformer transformer)
    {
        this._transformer = transformer;
    }


    public void parseAsCSV()
    {
        _chDelimiter = ',';
        _parseQuotes = true;
    }


    public void setParseQuotes(boolean parseQuotes)
    {
        _parseQuotes = parseQuotes;
    }

    public boolean isThrowOnErrors()
    {
        return _throwOnErrors;
    }

    public void setThrowOnErrors(boolean throwOnErrors)
    {
        _throwOnErrors = throwOnErrors;
    }


    public static class ColumnDescriptor
    {
        public ColumnDescriptor()
        {
        }

        public ColumnDescriptor(String name, Class type)
        {
            this.name = name;
            this.clazz = type;
        }

        public ColumnDescriptor(String name, Class type, Object defaultValue)
        {
            this.name = name;
            this.clazz = type;
            this.missingValues = defaultValue;
        }

        public Class clazz = String.class;
        public String name = null;
        public boolean load = true;
        public boolean isProperty = false; //Load as a class property
        public Object missingValues = null;
        public Object errorValues = null;
        public Converter converter = null;
        public Method setter = null;
    }


    protected class _RowMap implements Map<Object, Object>
    {
        protected Object[] _values;

        _RowMap(Object[] values)
        {
            this._values = values;
        }

        public Object[] getArray()
        {
            return _values;
        }

        public int size()
        {
            return _values.length;
        }

        public boolean isEmpty()
        {
            return false;
        }

        public boolean containsKey(Object o)
        {
            if (o instanceof String && _lowerCaseHeaders)
                o = ((String) o).toLowerCase();
            Integer index = _colMap.get(o);
            return null != index && index < _values.length;
        }

        public boolean containsValue(Object o)
        {
            return false;
        }

        public Object get(Object o)
        {
            if (o instanceof String && _lowerCaseHeaders)
                o = ((String) o).toLowerCase();
            Integer col = _colMap.get(o);
            if (null == col)
                return null;
            int icol = col;
            if (icol < 0 || icol >= _values.length)
                return null;

            return _values[icol];
        }

        public Object put(Object o, Object o1)
        {
            if (o instanceof String && _lowerCaseHeaders)
                o = ((String) o).toLowerCase();
            Integer col = _colMap.get(o);
            if (null == col)
                throw new IllegalArgumentException("Can't find col: " + o);

            //This generally won't happen
            if (null == _values || _values.length <= col)
            {
                Object[] newValues = new Object[col + 1];
                if (null != _values)
                    System.arraycopy(_values, 0, newValues, 0, _values.length);

                _values = newValues;
            }

            Object oldValue = _values[col];
            _values[col] = o1;
            return oldValue;
        }

        public Object remove(Object o)
        {
            throw new UnsupportedOperationException();
        }

        public void putAll(Map map)
        {
            for (Object o : map.keySet())
                put(o, map.get(o));
        }

        public void clear()
        {
            _values = new Object[_columns.length];
        }

        public Set<Object> keySet()
        {
            return _colMap.keySet();
        }

        public Collection<Object> values()
        {
            return Collections.unmodifiableCollection(Arrays.asList(_values));
        }

        public Set<Map.Entry<Object, Object>> entrySet()
        {
            Set<Map.Entry<Object, Object>> s = new HashSet<Map.Entry<Object, Object>>();
            for (int i = 0; i < _columns.length; i++)
                s.add(new RowMapEntry(i));
            return s;
        }

        private class RowMapEntry implements Entry
        {
            int col;

            RowMapEntry(int col)
            {
                this.col = col;
            }

            public Object getKey()
            {
                return _columns[col].name;
            }

            public Object getValue()
            {
                return _values[col];
            }

            public Object setValue(Object o)
            {
                Object oldVal = _values[col];
                _values[col] = o;
                return oldVal;
            }
        }
    }


    public class TabLoaderIterator implements Iterator<Object>
    {
        public void close()
        {
            try
            {
                if (null != reader)
                    reader.close();
                reader = null;
            }
            catch (IOException x)
            {
                _log.error("Unexpected exception", x);
            }
        }

        BufferedReader reader = null;
        String line = null;
        int lineNo = 0;
        boolean returnMaps = true;

        protected TabLoaderIterator() throws IOException
        {
            initColNameMap();

            returnMaps = _returnElementClass == null || _returnElementClass.equals(java.util.Map.class);

            // UNDONE: _transformer is in parent class (ick)
            if (_transformer == null && !returnMaps)
                _transformer = new TabTransformer();

            // find a converter for each column type
            for (ColumnDescriptor column : _columns)
                column.converter = ConvertUtils.lookup(column.clazz);

            reader = TabLoader.this.getReader();
            String s;
            for (int skip = 0; skip < _skipLines;)
            {
                s = reader.readLine();
                if (null == s)
                    break;
                lineNo++;
                if (s.length() == 0 || s.charAt(0) == '#')
                {
                    int eq = s.indexOf('=');
                    if (eq != -1)
                    {
                        String key = s.substring(1, eq).trim();
                        String value = s.substring(eq + 1).trim();
                        if (key.length() > 0 || value.length() > 0)
                            _comments.put(key, value);
                    }
                    continue;
                }
                skip++;
            }
        }


        public boolean hasNext()
        {
            if (line != null)
                return true;    // throw illegalstate?

            try
            {
                do
                {
                    line = reader.readLine();
                    if (line == null)
                    {
                        close();
                        return false;
                    }
                    lineNo++;
                }
                while (null == StringUtils.trimToNull(line) || line.charAt(0) == '#');
            }
            catch (Exception e)
            {
                _log.error("unexpected io error", e);
                throw new RuntimeException(e);
            }

            return true;
        }


        public Object next()
        {
            if (line == null)
                return null;    // consider: throw IllegalState

            try
            {
                String s = line;
                line = null;

                String[] fields = parseLine(s);

                Object[] values = new Object[_columns.length];
                for (int i = 0; i < _columns.length; i++)
                {
                    ColumnDescriptor column = _columns[i];
                    if (!column.load)
                        continue;
                    if (i >= fields.length)
                    {
                        values[i] = column.missingValues;
                        continue;
                    }
                    try
                    {
                        String fld = fields[i];
                        values[i] = ("".equals(fld)) ?
                                column.missingValues :
                                column.converter.convert(column.clazz, fld);
                    }
                    catch (Exception x)
                    {
                        if (_throwOnErrors)
                            throw new ConversionException("Conversion error: line " + lineNo + " column " + (i+ 1) + " (" + column.name + ")", x);

                        values[i] = column.errorValues;
                    }
                }

                Map m = new _RowMap(values);
                if (null == _transformer)
                    return m;
                else
                    return _transformer.transform(m);
            }
            catch (Exception e)
            {
                if (_throwOnErrors)
                {
                    if (e instanceof ConversionException)
                        throw ((ConversionException) e);
                    else
                        throw new RuntimeException(e);
                }
                
                _log.error("failed loading file " + _file.getName() + " at line: " + lineNo + " " + e, e);
            }
            return null;
        }


        public void remove()
        {
            throw new UnsupportedOperationException("'remove()' is not defined for TabLoaderIterator");
        }
    }


    /**
     * NOTE: we don't use ObjectFactory, because that's not available in the tools build currently.  Hoewever, you
     * can easily wrap an ObjectFactory with the Transformer interface
     */
    class TabTransformer implements Transformer
    {
        public Object transform(Object o)
        {
            try
            {
                _RowMap m = (_RowMap) o;
//                _log.debug("transform cast as RowMap, about to create " + _returnElementClass.getName());

                Object bean = _returnElementClass.newInstance();
//                _log.debug("transform created new instance of " + _returnElementClass.getName());                


                for (int i = 0; i < _columns.length; i++)
                {

                    ColumnDescriptor column = _columns[i];
                    if (!column.load) continue;
                    // CONSIDER: explicit option to not skip blank/null values

                    Object value = m._values[i];
                    if (null == value)
                        continue;

                    if (column.isProperty)
                    {
                        try
                        {
                            if (null != column.setter)
                            {
                                column.setter.invoke(bean, value);
                            }
                            else
                            {
                                BeanUtils.setProperty(bean, column.name, value);
                            }
                        }
                        catch (Exception x)
                        {
                            if (null != _columns[i].errorValues)
                            {
                                BeanUtils.setProperty(bean, _columns[i].name, _columns[i].errorValues);
                            }
                        }
                    }
                    else
                    {
                        //dhmay correcting this check and making it do something, 7/17/06.
                        //This was only half-implemented, before.
                        if (java.util.Map.class.isAssignableFrom(bean.getClass()))
                        {
                            //cast is ok here because we're explicitly checking
                            ((Map) bean).put(column.name, value);
                        }

                    }
                }
                return bean;
            }
            catch (Exception x)
            {
                throw new RuntimeException(x);
            }
        }
    }


    public static class TabLoaderTestCase extends junit.framework.TestCase
    {
        String csvData =
                "# algorithm=org.fhcrc.cpas.viewer.feature.FeatureStrategyPeakClusters\n" +
                        "# date=Mon May 22 13:25:28 PDT 2006\n" +
                        "# java.vendor=Sun Microsystems Inc.\n" +
                        "# java.version=1.5.0_06\n" +
                        "# revision=rev1.1\n" +
                        "# user.name=Matthew\n" +
                        "date,scan,time,mz,accurateMZ,mass,intensity,charge,chargeStates,kl,background,median,peaks,scanFirst,scanLast,scanCount,totalIntensity,description\n" +
                        "1/2/2006,96,1543.3401,858.3246,FALSE,1714.6346,2029.6295,2,1,0.19630894,26.471083,12.982442,4,92,100,9,20248.762,description\n" +
/*empty int*/   "2/Jan/2006,,1560.348,858.37555,FALSE,1714.7366,1168.3536,2,1,0.033085547,63.493385,8.771278,5,101,119,19,17977.979,\"desc\"\"ion\"\n" +
/*empty date*/  ",25,1460.2411,745.39404,FALSE,744.3868,1114.4303,1,1,0.020280406,15.826528,12.413276,4,17,41,25,13456.231,\"des,crip,tion\"\n" +
                        "2-Jan-06,89,1535.602,970.9579,FALSE,1939.9012,823.70984,2,1,0.0228055,10.497823,2.5962036,5,81,103,23,9500.36,\n" +
                        "2 January 2006,164,1624.442,783.8968,FALSE,1565.779,771.20935,2,1,0.024676466,11.3547325,3.3645654,5,156,187,32,12656.351,\n" +
                        "\"January 2, 2006\",224,1695.389,725.39404,FALSE,2173.1604,6.278867,3,1,0.2767084,1.6497655,1.2496755,3,221,229,9,55.546417\n" +
                        "1/2/06,249,1724.5541,773.42175,FALSE,1544.829,5.9057474,2,1,0.5105971,0.67020833,1.4744527,2,246,250,5,29.369175\n" +
                        "# bar\n" +
                        "\n" +
                        "#";
        String tsvData =
                "# algorithm=org.fhcrc.cpas.viewer.feature.FeatureStrategyPeakClusters\n" +
                        "# date=Mon May 22 13:25:28 PDT 2006\n" +
                        "# java.vendor=Sun Microsystems Inc.\n" +
                        "# java.version=1.5.0_06\n" +
                        "# revision=rev1.1\n" +
                        "# user.name=Matthew\n" +
                        "date\tscan\ttime\tmz\taccurateMZ\tmass\tintensity\tcharge\tchargeStates\tkl\tbackground\tmedian\tpeaks\tscanFirst\tscanLast\tscanCount\ttotalIntensity\tdescription\n" +
                        "1/2/2006\t96\t1543.3401\t858.3246\tFALSE\t1714.6346\t2029.6295\t2\t1\t0.19630894\t26.471083\t12.982442\t4\t92\t100\t9\t20248.762\tdescription\n" +
/*empty int*/   "2/Jan/2006\t\t1560.348\t858.37555\tFALSE\t1714.7366\t1168.3536\t2\t1\t0.033085547\t63.493385\t8.771278\t5\t101\t119\t19\t17977.979\tdesc\"ion\n" +
/*empty date*/  "\t25\t1460.2411\t745.39404\tFALSE\t744.3868\t1114.4303\t1\t1\t0.020280406\t15.826528\t12.413276\t4\t17\t41\t25\t13456.231\tdes,crip,tion\n" +
                        "2-Jan-06\t89\t1535.602\t970.9579\tFALSE\t1939.9012\t823.70984\t2\t1\t0.0228055\t10.497823\t2.5962036\t5\t81\t103\t23\t9500.36\t\n" +
                        "2 January 2006\t164\t1624.442\t783.8968\tFALSE\t1565.779\t771.20935\t2\t1\t0.024676466\t11.3547325\t3.3645654\t5\t156\t187\t32\t12656.351\t\n" +
                        "January 2, 2006\t224\t1695.389\t725.39404\tFALSE\t2173.1604\t6.278867\t3\t1\t0.2767084\t1.6497655\t1.2496755\t3\t221\t229\t9\t55.546417\t\n" +
                        "1/2/06\t249\t1724.5541\t773.42175\tFALSE\t1544.829\t5.9057474\t2\t1\t0.5105971\t0.67020833\t1.4744527\t2\t246\t250\t5\t29.369175\t\n" +
                        "# foo\n" +
                        "\n" +
                        "#";


        private File _createTempFile(String data, String ext) throws IOException
        {
            File f = File.createTempFile("junit", ext);
            f.deleteOnExit();
            Writer w = new FileWriter(f);
            w.write(data);
            w.close();
            return f;
        }


        public TabLoaderTestCase()
        {
            this("TabLoader Test");
        }


        public TabLoaderTestCase(String name)
        {
            super(name);
        }


        public void testTSV() throws IOException
        {
        }

        public void testTSVFile() throws IOException
        {
            File csv = _createTempFile(tsvData, ".tsv");

            TabLoader l = new TabLoader(csv);
            Map[] maps = (Map[]) l.load();
            assertEquals(l.getColumns().length, 18);
            assertEquals(l.getColumns()[0].clazz, Date.class);
            assertEquals(l.getColumns()[1].clazz, Integer.class);
            assertEquals(l.getColumns()[2].clazz, Double.class);
            assertEquals(maps.length, 7);
            csv.delete();
        }


        public void testTSVReader() throws IOException
        {
            File csv = _createTempFile(tsvData, ".tsv");
            Reader r = new FileReader(csv);
            TabLoader l = new TabLoader(r, true);
            Map[] maps = (Map[]) l.load();
            assertEquals(l.getColumns().length, 18);
            assertEquals(maps.length, 7);
            r.close();
            csv.delete();
        }


        public void testCSVFile() throws IOException
        {
            File csv = _createTempFile(csvData, ".csv");

            TabLoader l = new TabLoader(csv);
            l.parseAsCSV();
            Map[] maps = (Map[]) l.load();
            assertEquals(l.getColumns().length, 18);
            assertEquals(l.getColumns()[0].clazz, Date.class);
            assertEquals(l.getColumns()[1].clazz, Integer.class);
            assertEquals(l.getColumns()[2].clazz, Double.class);
            assertEquals(maps.length, 7);
            csv.delete();
        }

        public void testCSVReader() throws IOException
        {
            File csv = _createTempFile(csvData, ".csv");
            Reader r = new FileReader(csv);
            TabLoader l = new TabLoader(r, true);
            l.parseAsCSV();
            Map[] maps = (Map[]) l.load();
            assertEquals(l.getColumns().length, 18);
            assertEquals(maps.length, 7);
            r.close();
            csv.delete();
        }


        public void compareTSVtoCSV() throws IOException
        {
            TabLoader lCSV = new TabLoader(csvData, true);
            lCSV.parseAsCSV();
            Map[] mapsCSV = (Map[]) lCSV.load();

            TabLoader lTSV = new TabLoader(tsvData, true);
            Map[] mapsTSV = (Map[]) lTSV.load();

            assertEquals(lCSV.getColumns().length, lTSV.getColumns().length);
            assertEquals(mapsCSV.length, mapsTSV.length);
            for (int i = 0; i < mapsCSV.length; i++)
                assertEquals(mapsCSV[i], mapsTSV[i]);
        }


        public void testObjects()
        {
            // UNDONE
        }


        public void testTransform()
        {
            // UNDONE
        }


        public static Test suite()
        {
            return new TestSuite(TabLoaderTestCase.class);
        }
    }


    public static void main(String[] args) throws Exception
    {
        try
        {
            Class c = Class.forName("org.fhcrc.cpas.data.ConvertHelper");
            c.getMethod("registerHelpers").invoke(null);

            Test test = TabLoaderTestCase.suite();
            TestResult result = new TestResult();
            test.run(result);
            System.out.println(result.wasSuccessful() ? "success" : "fail");

            Enumeration failures = result.failures();
            Throwable first = null;
            while (failures.hasMoreElements())
            {
                TestFailure failure = (TestFailure) failures.nextElement();
                System.err.println(failure.toString());
                if (first == null)
                    first = failure.thrownException();
            }
            Enumeration errors = result.errors();
            while (errors.hasMoreElements())
            {
                TestFailure error = (TestFailure) errors.nextElement();
                System.err.println(error.toString());
                if (first == null)
                    first = error.thrownException();
            }
            if (first != null)
            {
                System.err.println("first exception");
                first.printStackTrace(System.err);
            }
        }
        catch (Throwable t)
        {
            t.printStackTrace(System.err);
        }
    }

    /**
     * Set the number of lines to look ahead in the file when infering the data types of the columns.
     */
    public void setScanAheadLineCount(int count)
    {
        _scanAheadLineCount = count;
    }
}