AbstractValueFileParser.java example

Explorer
crezoo-master
package org.tgdb.fileimport;
import java.util.*;



/**
 * AbstractValueFileParser extends the AbstractFileParser. It implements an
 * extended version of the inherited <I>parseInputFile()</I> method which
 * loops the rows of the file and parses each row. The contents of the rows
 * are stored within the object.
 *
 * <P>The class expects to find a file with a table-like structure. Data in
 * the file should be stored in columns, separeted by one character (the
 * delimiter). While the first row of the file contains the header (handled
 * in AbstractFileParser) the second row should contain the column
 * titles. The second row is alway interpreted as the tile row, if it
 * contains any data, that data will be used as column titles. The rest of
 * the file is supposed to contain the date The first rows of a file might
 * look something like this:
 *
 * <P><PRE>
 *    Genotype/list/1/;
 *    IDENTITY;MARKER;ALLELE1;ALLELE2
 *    data1;data2;data3;data4
 *    data5;data6;data7;data8
 *  </PRE>
 *
 * <P>The first row is the header and the second row is the column
 * titles. This row is parsed and the titles are stored in the object. The
 * other rows are data rows.
 *
 * <P>The data in the file is stored in a matrix. The matrix will contain
 * as many rows as there are data rows found in the file. The number of
 * columns in the matrix will be the value returned from the method
 * columns(). By default this is the same number as there are column
 * titles. However, this way of calculating the number of columns might be
 * changed in any subclass by cverriding the method.
 *
 * <P>When the data rows are parsed, the number of createMatrix found on each row
 * is compared to the number of columns. If this is not the same value, the
 * parsing is aborted, as there are errors on the row (too many or too few
 * columns of data). Before the parsing is aborted, the class tries to fix
 * the error.
 *
 * <P>Subclasses may access the data by using the protected <I>createMatrix()</I>
 * method. In order to give other classes access to the data, a new method
 * has to be implemented in the subclasses.
 *
 * <P>The class implements a new version of the <I>isDataRow</I> method. The
 * method now tests if it is the second row that is checked. If it is, the
 * method will return false, as the second row of the file is the tile row.
 *
 * <P>
 * @author frob
 * @see AbstractFileParser
 */
public abstract class AbstractValueFileParser extends AbstractFileParser {
    
    /**
     * A matrix with all data createMatrix read from the input file
     */
    protected String[][] mValues;
    
    /**
     * The delimiter to split data
     */
    private String delimiter;
    
    /**
     * The string to comment out a row.
     */
    private String commentStr;
    
    
    
    //////////////////////////////////////////////////////////////////////
    //
    // Constructors
    //
    //////////////////////////////////////////////////////////////////////
    
    
    /**
     * Default constructor, should never be used
     *
     */
//    public AbstractValueFileParser() {
//        super();
//    }
    
    
    /**
     * Creates a new AbstractValueFileParser instance.
     *
     * @param fileName The file this object should read information from
     * @exception AssertionException If no filename is given
     */
    public AbstractValueFileParser(String fileName)
    throws AssertionException {
        super(fileName);
    }
    
    
    
    //////////////////////////////////////////////////////////////////////
    //
    // Public section
    //
    //////////////////////////////////////////////////////////////////////
    
    /**
     * Get the delimiter that is used in the file
     * @return a string of the delimiter
     */
    protected String getDelimiter() {
        return delimiter;
    }
    
    /**
     * Set the delimiter used to split file
     * @param delimiter The delimiter to split
     */
    protected void setDelimiter(String delimiter) {
        this.delimiter = delimiter;
    }

    /**
     * Get the comment str used in file. Example: #, --, //
     * @return The comment string
     */
    protected String getCommentStr() {
        return commentStr;
    }

    /**
     * Set the comment str used in file. Example: #, --, //
     * @param commentStr the comment string
     */
    protected void setCommentStr(String commentStr) {
        this.commentStr = commentStr;
    }
    
    //////////////////////////////////////////////////////////////////////
    //
    // Protected section
    //
    //////////////////////////////////////////////////////////////////////
    
    
    
    
    /**
     * This method parses the file and builds a matrix of string objects
     * containg all the data in the file.
     *
     * @exception InputDataFileException If any errors when reading the file
     */
    protected void parseInputFile() throws InputDataFileException {
        
        // Run the inherited parse() method to read all rows of the
        // file. Also parses the header section
        super.parseInputFile();
        
        
        
        // Create the matrix used for storing the createMatrix read from the file
        initMatrix(super.getFileRows());
        
        // Variables used in the loop below
        String aRow;             // The current row from the data array
        int aCurrentDataRow = 0; // The row in value-matrix createMatrix should
        // be placed at
        
        // Loop all rows in the data array. Start on third row
        for (int aRowCount = 0; aRowCount < getFileRows(); aRowCount++) {
            // Get the row and check its size. If it's empty or a comment,
            // continue with next row
            aRow = getFileData(aRowCount);
            
            // Copy data from the row to the value matrix
            splitRow(aRow, aCurrentDataRow++);
        }
        
    }
    
    
    
    //////////////////////////////////////////////////////////////////////
    //
    // Private section
    //
    //////////////////////////////////////////////////////////////////////
    
    
    /**
     * Initialize the data structure with the right size.
     * @param rows the number of rows to use
     */
    private void initMatrix(int rows) {
        mValues = new String[rows][];
    }
    
    
    /**
     * Parses the given string to get the data. The data is inserted in the
     * value matrix on the given row.
     *
     * @param dataRow The row to get data from
     * @param rowIndex The row in the value matrix to insert data into
     */
    private void splitRow(String dataRow, int rowIndex) {
        // Create a tokenizer based on the row. Each token will represent one
        // column of data
//        StringTokenizer aTokenizer =
//                new StringTokenizer(dataRow, delimiter, false);
        
        String[] tmp = dataRow.split(delimiter, -1);
        
        mValues[rowIndex] = tmp;
        
        
//        int cols = aTokenizer.countTokens();
//        mValues[rowIndex] = new String[cols];
//        
//        // For each column on the row, add the contents of the column to the
//        // correct column on the given row in the value matrix
//        for (int aColumnIndex = 0; aColumnIndex < cols &&
//                aTokenizer.hasMoreTokens(); aColumnIndex++) 
//        {
//            String str = aTokenizer.nextToken();
//            mValues[rowIndex][aColumnIndex] = str;
//        }
    }
}