/*
* Copyright 2016 (C) Tom Parker <thpr@users.sourceforge.net>
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package pcgen.rules.persistence;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import pcgen.base.text.ParsingSeparator;
import pcgen.base.util.FormatManager;
import pcgen.cdom.format.table.DataTable;
import pcgen.cdom.format.table.TableColumn;
import pcgen.persistence.PersistenceLayerException;
import pcgen.persistence.lst.LstLineFileLoader;
import pcgen.rules.context.LoadContext;
/**
* A TableLoader loads CSV-like files into Tables for PCGen.
*
* The CSV files must conform to the CSV file format with some additional
* limitations. Embedded newlines are not permitted in quotes. This should
* generate an error from TableLoader.
*/
public class TableLoader extends LstLineFileLoader
{
/**
* A pattern for empty lines. This helps the LineProcessors from having to
* deal with this situation.
*/
private static Pattern EMPTY = Pattern.compile("^[\\s,\\\"]+$");
/**
* The active LineProcessor used to interpret the contents of the next
* loaded line in a Table file.
*/
private LineProcessor processor = new ExpectStartTable();
@Override
public void loadLstString(LoadContext context, URI uri, String aString)
throws PersistenceLayerException
{
//Reset to ensure prior file corruption doesn't leak into a new file
processor = new ExpectStartTable();
super.loadLstString(context, uri, aString);
if (!(processor instanceof ExpectStartTable))
{
throw new PersistenceLayerException(
"Did not find last ENDTABLE: entry in " + uri);
}
}
@Override
public void parseLine(LoadContext context, String lstLine, URI sourceURI)
throws PersistenceLayerException
{
//ignore comments
if (lstLine.startsWith("#") || lstLine.startsWith("\"#"))
{
return;
}
//Empty line (commas, whitespace, empty quotes)
if (EMPTY.matcher(lstLine).find())
{
return;
}
processor = processor.parseLine(context, lstLine, sourceURI);
}
/**
* A LineProcessor interprets a line of a Table file and returns the
* LineProcessor that should be responsible for interpreting the next line
* of the file.
*
* A LineProcessor is not expected to be able to understand/comprehend
* either blank lines or comment lines. Both of those should be ignored
* prior to the line being passed to a LineProcessor.
*/
public interface LineProcessor
{
/**
* Processes the given line of a Table file (identified by the
* sourceURI).
*
* @param context
* The LoadContext to be used for interpretation of the Table
* file
* @param lstLine
* The line of the Table file to be processed
* @param sourceURI
* The URI indicating the file being processed
* @return The LineProcessor that should be responsible for interpreting
* the next line of the file
* @throws PersistenceLayerException
* if there is an error during the loading of the given line
*/
public LineProcessor parseLine(LoadContext context, String lstLine,
URI sourceURI) throws PersistenceLayerException;
}
/**
* Unescapes a given entry. This performs whitespace padding removal both
* before and after the removal of the optional escaping quotes available in
* the CSV file format.
*
* @param entry
* The entry to be unescaped into its base state
* @return The unescaped entry (trimmed and with CSV quoting removed)
*/
private static String unescape(String entry)
{
String unescaped = entry.trim();
if (unescaped.startsWith("\"") && unescaped.endsWith("\""))
{
unescaped = unescaped.substring(1, unescaped.length() - 1);
unescaped = unescaped.replace("\"\"", "\"");
}
return unescaped.trim();
}
/**
* Ensures the rest of a given line is empty. This means the
* ParsingSeparator should only return entries that trim to length zero
* (after accounting for CSV escaping quotes)
*
* @param line
* The line being processed (for debugging purposes only)
* @param ps
* The ParsingSeparator that should only return "blank" entries
* @throws PersistenceLayerException
* if there is any non-blank content returned by the
* ParsingSeparator
*/
private static void ensureEmpty(String line, ParsingSeparator ps)
throws PersistenceLayerException
{
while (ps.hasNext())
{
String next = ps.next();
if ((!next.isEmpty()) && (!unescape(next).isEmpty()))
{
throw new PersistenceLayerException(
"Expected Rest of Line to be empty: " + line);
}
}
}
/**
* Generates a new "naive" CSV separator. This is not formally CSV compliant
* because it ignores "embedded" new lines. For purposes of PCGen this is
* acceptable.
*
* @param lstLine
* The line to be processed by a CSV-like ParsingSeparator
* @return A ParsingSeparator for the given line
*/
private static ParsingSeparator generateCSVSeparator(String lstLine)
{
ParsingSeparator ps = new ParsingSeparator(lstLine, ',');
ps.addGroupingPair('"', '"');
return ps;
}
/**
* ExpectStartTable is the LineProcessor that waits for a "STARTTABLE:"
* entry in a Table file. If any other content is encountered, a
* PersistenceLayerException is thrown.
*/
private static class ExpectStartTable implements LineProcessor
{
@Override
public LineProcessor parseLine(LoadContext context, String lstLine,
URI sourceURI) throws PersistenceLayerException
{
ParsingSeparator ps = generateCSVSeparator(lstLine);
String first = unescape(ps.next());
if (first.startsWith("STARTTABLE:"))
{
ensureEmpty(lstLine, ps);
DataTable table = context.getReferenceContext()
.constructCDOMObject(DataTable.class, first.substring(11));
return new ImportColumnNames(table);
}
throw new PersistenceLayerException(
"Expected STARTTABLE: entry, but found: " + lstLine + " in "
+ sourceURI);
}
}
/**
* ImportColumnNames is the LineProcessor that reads in the column names in
* a Table file.
*/
private static class ImportColumnNames implements LineProcessor
{
/**
* The underlying Table to which the column names should be assigned
*/
private final DataTable t;
/**
* Constructs a new ImportColumnNames with the given underlying Table.
*
* @param table
* The underlying Table to which the column names should be
* assigned
*/
public ImportColumnNames(DataTable table)
{
t = table;
}
@Override
public LineProcessor parseLine(LoadContext context, String lstLine,
URI sourceURI) throws PersistenceLayerException
{
ParsingSeparator ps = generateCSVSeparator(lstLine);
List<String> columnNames = new ArrayList<String>();
boolean first = true;
boolean foundEmpty = false;
while (ps.hasNext())
{
String columnName = unescape(ps.next());
if (columnName.isEmpty())
{
foundEmpty = true;
continue;
}
//Once an empty item was reached, nothing later on can have content
if (foundEmpty)
{
throw new PersistenceLayerException(
"Encountered blank Column Name entry in " + lstLine
+ " for " + t.getDisplayName() + " in "
+ sourceURI);
}
if (first && columnName.startsWith("STARTTABLE:"))
{
throw new PersistenceLayerException(
"Encountered STARTTABLE: entry while expecting Column Names for "
+ t.getDisplayName() + " in " + sourceURI);
}
if (first && columnName.startsWith("ENDTABLE:"))
{
throw new PersistenceLayerException(
"Encountered ENDTABLE: entry while expecting Column Names for "
+ t.getDisplayName() + " in " + sourceURI);
}
columnNames.add(columnName);
first = false;
}
return new ImportColumnFormats(t, columnNames);
}
}
/**
* ImportColumnFormats is the LineProcessor that reads in the column formats
* in a Table file.
*
* The number of Format lines in a table must match the number of column
* names in the table.
*/
private static class ImportColumnFormats implements LineProcessor
{
/**
* The underlying Table to which the formats should be assigned.
*/
private final DataTable t;
private final List<String> columnNames;
/**
* Constructs a new ImportColumnFormats with the given underlying Table.
*
* @param table
* The underlying Table to which the formats should be
* assigned
* @param columnNames
*/
public ImportColumnFormats(DataTable table, List<String> columnNames)
{
t = table;
this.columnNames = columnNames;
}
@Override
public LineProcessor parseLine(LoadContext context, String lstLine,
URI sourceURI) throws PersistenceLayerException
{
ParsingSeparator ps = generateCSVSeparator(lstLine);
boolean first = true;
boolean foundEmpty = false;
int i = 0;
while (ps.hasNext())
{
String formatName = unescape(ps.next());
if (formatName.isEmpty())
{
foundEmpty = true;
continue;
}
//Once an empty item was reached, nothing later on can have content
if (foundEmpty)
{
throw new PersistenceLayerException(
"Encountered blank FORMAT entry in " + lstLine + " for "
+ t.getDisplayName() + " in " + sourceURI);
}
if (first && formatName.startsWith("STARTTABLE:"))
{
throw new PersistenceLayerException(
"Encountered STARTTABLE: entry while expecting Formats for "
+ t.getDisplayName() + " in " + sourceURI);
}
if (first && formatName.startsWith("ENDTABLE:"))
{
throw new PersistenceLayerException(
"Encountered ENDTABLE: entry while expecting Formats for "
+ t.getDisplayName() + " in " + sourceURI);
}
if (columnNames.size() <= i)
{
throw new PersistenceLayerException("Encountered FORMAT "
+ i + " but no such column was named for "
+ t.getDisplayName() + " in " + sourceURI);
}
String name = columnNames.get(i++);
TableColumn column = context.getReferenceContext()
.constructNowIfNecessary(TableColumn.class, name);
FormatManager<?> format = context.getReferenceContext()
.getFormatManager(formatName);
if (column.getFormatManager() == null)
{
column.setFormatManager(format);
column.setSourceURI(sourceURI);
}
else if (!column.getFormatManager().equals(format))
{
throw new PersistenceLayerException("Table column " + name
+ " in table " + t.getDisplayName() + " in " + sourceURI
+ " had different format than previous column format: "
+ format + " in " + column.getSourceURI());
}
t.addColumn(column);
first = false;
}
if (t.getColumnCount() != columnNames.size())
{
throw new PersistenceLayerException(
"Table " + t.getDisplayName()
+ " had different quantity of column names and formats "
+ " in " + sourceURI);
}
return new ImportData(t);
}
}
/**
* ImportData is the LineProcessor that reads in the row data in a Table
* file. Each entry must conform to the format defined in the format row of
* the Table. There may not be more columns in a data row than there were
* formats.
*/
private static class ImportData implements LineProcessor
{
/**
* The underlying Table to which the data will be loaded.
*/
private final DataTable t;
/**
* Constructs a new ImportData with the given underlying Table.
*
* @param table
* The underlying Table to which the data will be loaded
*/
public ImportData(DataTable table)
{
t = table;
}
@Override
public LineProcessor parseLine(LoadContext context, String lstLine,
URI sourceURI) throws PersistenceLayerException
{
ParsingSeparator ps = generateCSVSeparator(lstLine);
int i = 0;
List<Object> rowContents = new ArrayList<>();
while (ps.hasNext())
{
String content = unescape(ps.next());
if (i == 0)
{
if (content.startsWith("STARTTABLE:"))
{
throw new PersistenceLayerException(
"Encountered STARTTABLE: entry before reaching ENDTABLE for "
+ t.getDisplayName() + " in " + sourceURI);
}
if (content.startsWith("ENDTABLE:"))
{
ensureEmpty(lstLine, ps);
if (t.getRowCount() == 0)
{
throw new PersistenceLayerException(
"Table " + t.getDisplayName()
+ " had no data in " + sourceURI);
}
t.trim();
return new ExpectStartTable();
}
}
rowContents.add(t.getFormat(i++).convert(content));
}
t.addRow(rowContents);
return this;
}
}
}