/* Copyright (C) 2000 by Prevas AB. All rights reserved. $Log$ Revision 1.3 2003/05/02 07:58:45 heto Changed the package structure from se.prevas.arexis.XYZ to se.arexis.agdb.XYZ Modified configuration and source files according to package change. Revision 1.2 2002/10/18 11:41:26 heto Replaced Assertion.assert with Assertion.assertMsg Java 1.4 have a keyword "assert". Revision 1.1.1.1 2002/10/16 18:14:07 heto Import of aGDB 1.5 L3 from Prevas CVS-tree. This version of aGDB is migrated to Tomcat from JServ by Tobias Hermansson Revision 1.1 2001/04/24 09:34:18 frob Moved file import classes to new package se.prevas.arexis.util.FileImport, caused updates in several files. Revision 1.2 2001/04/24 06:31:45 frob Checkin after merging frob_fileparser branch. Revision 1.1.2.4 2001/04/19 09:51:58 frob Changed class comment. Revision 1.1.2.3 2001/04/19 09:48:46 frob Renamed the Parse() method to parseInputFile() and made it protected. Revision 1.1.2.2 2001/04/03 11:32:01 frob Added a call to the inherited Parse(), had been removed by misstake. Revision 1.1.2.1 2001/04/03 10:30:21 frob Class created as part of the new hierachy for parser classes. */ package se.arexis.agdb.util.FileImport; import java.io.*; import java.util.*; import se.arexis.agdb.util.*; /** * AbstractValueFileParser extends the AbstractFileParser. It implements an * extended version of the inherited <I>parseInputFile()</I> method which * loops the rows of the file and parses each row. The contents of the rows * are stored within the object. * * <P>The class expects to find a file with a table-like structure. Data in * the file should be stored in columns, separeted by one character (the * delimiter). While the first row of the file contains the header (handled * in AbstractFileParser) the second row should contain the column * titles. The second row is alway interpreted as the tile row, if it * contains any data, that data will be used as column titles. The rest of * the file is supposed to contain the date The first rows of a file might * look something like this: * * <P><PRE> * Genotype/list/1/; * IDENTITY;MARKER;ALLELE1;ALLELE2 * data1;data2;data3;data4 * data5;data6;data7;data8 * </PRE> * * <P>The first row is the header and the second row is the column * titles. This row is parsed and the titles are stored in the object. The * other rows are data rows. * * <P>The data in the file is stored in a matrix. The matrix will contain * as many rows as there are data rows found in the file. The number of * columns in the matrix will be the value returned from the method * columns(). By default this is the same number as there are column * titles. However, this way of calculating the number of columns might be * changed in any subclass by cverriding the method. * * <P>When the data rows are parsed, the number of values found on each row * is compared to the number of columns. If this is not the same value, the * parsing is aborted, as there are errors on the row (too many or too few * columns of data). Before the parsing is aborted, the class tries to fix * the error. * * <P>Subclasses may access the data by using the protected <I>values()</I> * method. In order to give other classes access to the data, a new method * has to be implemented in the subclasses. * * <P>The class implements a new version of the <I>isDataRow</I> method. The * method now tests if it is the second row that is checked. If it is, the * method will return false, as the second row of the file is the tile row. * * <P> * @author frob * @see AbstractFileParser */ public abstract class AbstractValueFileParser extends AbstractFileParser { /** * An array with the names of the columns in the file */ private String[] mColumnTitles; /** * A matrix with all data values read from the input file */ private String[][] mValues; ////////////////////////////////////////////////////////////////////// // // Constructors // ////////////////////////////////////////////////////////////////////// /** * Default constructor, should never be used * */ public AbstractValueFileParser() { super(); } /** * Creates a new AbstractValueFileParser instance. * * @param fileName The file this object should read information from * @exception AssertionException If no filename is given */ public AbstractValueFileParser(String fileName) throws AssertionException { super(fileName); } ////////////////////////////////////////////////////////////////////// // // Public section // ////////////////////////////////////////////////////////////////////// /** * Returns an array with the column names of the file * * @return An array with the column names */ public String[] columnTitles() { return mColumnTitles; } ////////////////////////////////////////////////////////////////////// // // Protected section // ////////////////////////////////////////////////////////////////////// /** * Checks whether the given row is a valid data row or not. * * @param fileRow The row to verify. * @param rowNumber The index of the row within the file * @return true if the row is a valid data row<BR> * false if the row is not a data row */ protected boolean isDataRow(String fileRow, int rowNumber) { // If this is second row, we are checking the the title row which is // not a data row if (super.isDataRow(fileRow, rowNumber) && rowNumber != 1) { return true; } return false; } /** * Returns the matrix with data read from the file * * @return The matrix with data */ protected String[][] values() { return mValues; } /** * Returns the number of columns of data in the file. By default, this * is the same number as there are column titles * * @return The number of columns of data in the file */ protected int columns() { return countColumnTitles(); } /** * Returns the number of column titles read from file * * @return The number of column titles */ protected int countColumnTitles() { return mColumnTitles.length; } /** * Looks up the given string in the array of column titles and returns * the position of the string within the array. * * @param columnName The column name to look up * @return -1 if the string was not found in the array<BR> * The position of the string in the column array */ protected int columnIndex(String columnName) { for (int aIndex = 0; aIndex < countColumnTitles(); aIndex++) { if (columnTitles()[aIndex].equalsIgnoreCase(columnName)) { return aIndex; } } return -1; } /** * This method parses the file and builds a matrix of string objects * containg all the data in the file. * * @exception InputDataFileException If any errors when reading the file */ protected void parseInputFile() throws InputDataFileException { try { // Run the inherited parse() method to read all rows of the // file. Also parses the header section super.parseInputFile(); // Parse the information on the second row (column names) parseColumnTitles(fileData()[1]); // Create the matrix used for storing the values read from the file values(dataRows(), columns()); // Variables used in the loop below String aRow; // The current row from the data array int aCurrentDataRow = 0; // The row in value-matrix values should // be placed at // Loop all rows in the data array. Start on third row for (int aRowCount = 2; aRowCount < fileRows(); aRowCount++) { // Get the row and check its size. If it's empty or a comment, // continue with next row aRow = fileData()[aRowCount]; if (aRow.length() == 0 || aRow.charAt(0) == commentChar()) { continue; } // Verifies that the row has the correct number of columns. If // not, it tries to fix the row. aRow = ParserUtils.fixColumnsOnRow(aRow, (aRowCount + 1), columns(), fieldDelimiter()); // Copy data from the row to the value matrix copyDataFromRow(aRow, aCurrentDataRow++); } } catch (AssertionException e) { throw new InputDataFileException(e.getMessage()); } } ////////////////////////////////////////////////////////////////////// // // Private section // ////////////////////////////////////////////////////////////////////// /** * Sets the array with the names of the columns in the file * * @param columnTitles An array with column names */ private void columnTitles(String[] columnTitles) { mColumnTitles = columnTitles; } /** * Creates the matrix used for the data read from the input * file. Each element is initialised with an empty string * * @param rows The number of rows in the matrix * @param cols The number of columns in the matrix */ private void values(int rows, int cols) { mValues = new String[rows][cols]; for (int r = 0; r < mValues.length; r++) for (int c = 0; c < mValues[0].length; c++) mValues[r][c] = ""; } /** * Parses the given line in order to determine the column names. The * names are stored in the columnTitles array * * @param columnRow The string to parse column titles from * @exception InputDataFileException if second row of the file is a * comment. */ private void parseColumnTitles(String columnRow) throws InputDataFileException { try { // Ensure there is not a comment on the line Assertion.assertMsg(columnRow.charAt(0) != commentChar(), "Second line should not be a comment"); // Build a tokenizer based on the fieldDelimiter StringTokenizer aTokenizer = new StringTokenizer(columnRow, fieldDelimiter().toString(), false); // Get the number of column titles found of the row and build an // array with that size columnTitles(new String[aTokenizer.countTokens()]); // Now get the tokens (eg the column names) from the row and add them // to the array int aTitleIndex = 0; while (aTokenizer.hasMoreElements()) { columnTitles()[aTitleIndex++] = aTokenizer.nextToken(); } } catch (AssertionException e) { throw new InputDataFileException(e.getMessage()); } } /** * Parses the given string to get the data. The data is inserted in the * value matrix on the given row. * * @param dataRow The row to get data from * @param rowIndex The row in the value matrix to insert data into */ private void copyDataFromRow(String dataRow, int rowIndex) { // Create a tokenizer based on the row. Each token will represent one // column of data StringTokenizer aTokenizer = new StringTokenizer(dataRow, fieldDelimiter().toString(), false); // For each column on the row, add the contents of the column to the // correct column on the given row in the value matrix for (int aColumnIndex = 0; aColumnIndex < columns() && aTokenizer.hasMoreTokens(); aColumnIndex++) { values()[rowIndex][aColumnIndex] = aTokenizer.nextToken(); } } }