AbstractValueFileParser.java example

Explorer
crezoo-master
/*
  Copyright (C) 2000 by Prevas AB. All rights reserved.

  $Log$
  Revision 1.3  2003/05/02 07:58:45  heto
  Changed the package structure from se.prevas.arexis.XYZ to se.arexis.agdb.XYZ
  Modified configuration and source files according to package change.

  Revision 1.2  2002/10/18 11:41:26  heto
  Replaced Assertion.assert with Assertion.assertMsg

  Java 1.4 have a keyword "assert".

  Revision 1.1.1.1  2002/10/16 18:14:07  heto
  Import of aGDB 1.5 L3 from Prevas CVS-tree.
  This version of aGDB is migrated to Tomcat from JServ by Tobias Hermansson


  Revision 1.1  2001/04/24 09:34:18  frob
  Moved file import classes to new package se.prevas.arexis.util.FileImport,
  caused updates in several files.

  Revision 1.2  2001/04/24 06:31:45  frob
  Checkin after merging frob_fileparser branch.

  Revision 1.1.2.4  2001/04/19 09:51:58  frob
  Changed class comment.

  Revision 1.1.2.3  2001/04/19 09:48:46  frob
  Renamed the Parse() method to parseInputFile() and made it protected.

  Revision 1.1.2.2  2001/04/03 11:32:01  frob
  Added a call to the inherited Parse(), had been removed by misstake.

  Revision 1.1.2.1  2001/04/03 10:30:21  frob
  Class created as part of the new hierachy for parser classes.

*/

package se.arexis.agdb.util.FileImport;

import java.io.*;
import java.util.*; 
import se.arexis.agdb.util.*;


/**
 * AbstractValueFileParser extends the AbstractFileParser. It implements an
 * extended version of the inherited <I>parseInputFile()</I> method which
 * loops the rows of the file and parses each row. The contents of the rows
 * are stored within the object.
 *
 * <P>The class expects to find a file with a table-like structure. Data in
 * the file should be stored in columns, separeted by one character (the
 * delimiter). While the first row of the file contains the header (handled
 * in AbstractFileParser) the second row should contain the column
 * titles. The second row is alway interpreted as the tile row, if it
 * contains any data, that data will be used as column titles. The rest of
 * the file is supposed to contain the date The first rows of a file might
 * look something like this: 
 *
 * <P><PRE>
 *    Genotype/list/1/;
 *    IDENTITY;MARKER;ALLELE1;ALLELE2
 *    data1;data2;data3;data4
 *    data5;data6;data7;data8
 *  </PRE>
 *
 * <P>The first row is the header and the second row is the column
 * titles. This row is parsed and the titles are stored in the object. The
 * other rows are data rows.
 *
 * <P>The data in the file is stored in a matrix. The matrix will contain
 * as many rows as there are data rows found in the file. The number of
 * columns in the matrix will be the value returned from the method
 * columns(). By default this is the same number as there are column
 * titles. However, this way of calculating the number of columns might be
 * changed in any subclass by cverriding the method.
 *
 * <P>When the data rows are parsed, the number of values found on each row
 * is compared to the number of columns. If this is not the same value, the
 * parsing is aborted, as there are errors on the row (too many or too few
 * columns of data). Before the parsing is aborted, the class tries to fix
 * the error. 
 *
 * <P>Subclasses may access the data by using the protected <I>values()</I>
 * method. In order to give other classes access to the data, a new method
 * has to be implemented in the subclasses. 
 * 
 * <P>The class implements a new version of the <I>isDataRow</I> method. The
 * method now tests if it is the second row that is checked. If it is, the
 * method will return false, as the second row of the file is the tile row.
 *
 * <P>
 * @author frob
 * @see AbstractFileParser
 */
public abstract class AbstractValueFileParser extends AbstractFileParser
{
   
   /**
    * An array with the names of the columns in the file
    */
   private String[] mColumnTitles;

   /**
    * A matrix with all data values read from the input file
    */
   private String[][] mValues;


   
   //////////////////////////////////////////////////////////////////////
   //
   // Constructors
   //
   //////////////////////////////////////////////////////////////////////


   /**
    * Default constructor, should never be used
    *
    */
   public AbstractValueFileParser()
   {
      super();
   }
   

   /**
    * Creates a new AbstractValueFileParser instance.
    *
    * @param fileName The file this object should read information from
    * @exception AssertionException If no filename is given
    */
   public AbstractValueFileParser(String fileName)
      throws AssertionException
   {
      super(fileName);
   }

   

   //////////////////////////////////////////////////////////////////////
   //
   // Public section
   //
   //////////////////////////////////////////////////////////////////////


   
   /**
    * Returns an array with the column names of the file
    *
    * @return An array with the column names
    */
   public String[] columnTitles()
   {
      return mColumnTitles;
   }


   //////////////////////////////////////////////////////////////////////
   //
   // Protected section
   //
   //////////////////////////////////////////////////////////////////////

   

   /**
    * Checks whether the given row is a valid data row or not.
    *
    * @param fileRow The row to verify.
    * @param rowNumber The index of the row within the file
    * @return true if the row is a valid data row<BR>
    *         false if the row is not a data row
    */
   protected boolean isDataRow(String fileRow, int rowNumber)
   {
      // If this is second row, we are checking the the title row which is
      // not a data row
      if (super.isDataRow(fileRow, rowNumber) && rowNumber != 1)
      {
         return true;
      }
      return false;
   }   

   
   /**
    * Returns the matrix with data read from the file
    *
    * @return The matrix with data
    */
   protected String[][] values()
   {
      return mValues;
   }

   
   /**
    * Returns the number of columns of data in the file. By default, this
    * is the same number as there are column titles
    *
    * @return The number of columns of data in the file
    */
   protected int columns()
   {
      return countColumnTitles();
   }


   /**
    * Returns the number of column titles read from file
    *
    * @return The number of column titles
    */
   protected int countColumnTitles()
   {
      return mColumnTitles.length;
   }
   

   /**
    * Looks up the given string in the array of column titles and returns
    * the position of the string within the array.
    *
    * @param columnName The column name to look up
    * @return -1 if the string was not found in the array<BR>
    *         The position of the string in the column array
    */
   protected int columnIndex(String columnName)
   {
      for (int aIndex = 0; aIndex < countColumnTitles(); aIndex++)
      {
         if (columnTitles()[aIndex].equalsIgnoreCase(columnName))
         {
            return aIndex;
         }
      }
      return -1;
   }

      
   /**
    * This method parses the file and builds a matrix of string objects
    * containg all the data in the file.
    * 
    * @exception InputDataFileException If any errors when reading the file
    */
   protected void parseInputFile() throws
      InputDataFileException
   {
      try
      {
         // Run the inherited parse() method to read all rows of the
         // file. Also parses the header section
         super.parseInputFile();
         
         // Parse the information on the second row (column names)
         parseColumnTitles(fileData()[1]);
         
         // Create the matrix used for storing the values read from the file 
         values(dataRows(), columns());

         // Variables used in the loop below
         String aRow;             // The current row from the data array
         int aCurrentDataRow = 0; // The row in value-matrix values should
                                  // be placed at
                  
         // Loop all rows in the data array. Start on third row
         for (int aRowCount = 2; aRowCount < fileRows(); aRowCount++)
         {
            // Get the row and check its size. If it's empty or a comment,
            // continue with next row
            aRow = fileData()[aRowCount];
            if (aRow.length() == 0 || aRow.charAt(0) == commentChar())
            {
               continue;
            }

            // Verifies that the row has the correct number of columns. If
            // not, it tries to fix the row. 
            aRow = ParserUtils.fixColumnsOnRow(aRow, (aRowCount + 1),
                                               columns(), fieldDelimiter()); 

            // Copy data from the row to the value matrix
            copyDataFromRow(aRow, aCurrentDataRow++);
         }
      }
      catch (AssertionException e)
      {
         throw new InputDataFileException(e.getMessage());
      }
   }

   

   //////////////////////////////////////////////////////////////////////
   //
   // Private section
   //
   //////////////////////////////////////////////////////////////////////


   /**
    * Sets the array with the names of the columns in the file
    *
    * @param columnTitles An array with column names
    */
   private void columnTitles(String[] columnTitles)
   {
      mColumnTitles = columnTitles;
   }


   /**
    * Creates the matrix used for the data read from the input
    * file. Each element is initialised with an empty string
    *
    * @param rows The number of rows in the matrix
    * @param cols The number of columns in the matrix
    */
   private void values(int rows, int cols)
   {
      mValues = new String[rows][cols];
      for (int r = 0; r < mValues.length; r++)
         for (int c = 0; c < mValues[0].length; c++)
            mValues[r][c] = "";
   }


   /**
    * Parses the given line in order to determine the column names. The
    * names are stored in the columnTitles array
    *
    * @param columnRow The string to parse column titles from
    * @exception InputDataFileException if second row of the file is a
    * comment.
    */
   private void parseColumnTitles(String columnRow)
      throws InputDataFileException
   {
      try 
      {
         // Ensure there is not a comment on the line
         Assertion.assertMsg(columnRow.charAt(0) != commentChar(),
                          "Second line should not be a comment");

         // Build a tokenizer based on the fieldDelimiter 
         StringTokenizer aTokenizer =
            new StringTokenizer(columnRow, fieldDelimiter().toString(),
                                false); 
      
         // Get the number of column titles found of the row and build an
         // array with that size
         columnTitles(new String[aTokenizer.countTokens()]);
      
         // Now get the tokens (eg the column names) from the row and add them
         // to the array
         int aTitleIndex = 0;
         while (aTokenizer.hasMoreElements())
         {
            columnTitles()[aTitleIndex++] = aTokenizer.nextToken();
         }
      }
      catch (AssertionException e)
      {
         throw new InputDataFileException(e.getMessage());
      }
   }



   /**
    * Parses the given string to get the data. The data is inserted in the
    * value matrix on the given row.
    *
    * @param dataRow The row to get data from
    * @param rowIndex The row in the value matrix to insert data into
    */
   private void copyDataFromRow(String dataRow, int rowIndex)
   {
      // Create a tokenizer based on the row. Each token will represent one
      // column of data
      StringTokenizer aTokenizer =
         new StringTokenizer(dataRow, fieldDelimiter().toString(), false); 

      // For each column on the row, add the contents of the column to the
      // correct column on the given row in the value matrix
      for (int aColumnIndex = 0; aColumnIndex < columns() &&
              aTokenizer.hasMoreTokens(); aColumnIndex++)
      {
         values()[rowIndex][aColumnIndex] = aTokenizer.nextToken();
      }
   }   



}