package com.openMap1.mapper.converters; import java.io.InputStream; import java.util.Hashtable; import java.util.StringTokenizer; import java.util.Vector; import org.w3c.dom.Document; import org.w3c.dom.Element; import com.openMap1.mapper.ElementDef; import com.openMap1.mapper.MappedStructure; import com.openMap1.mapper.MapperFactory; import com.openMap1.mapper.MaxMult; import com.openMap1.mapper.MinMult; import com.openMap1.mapper.core.MapperException; import com.openMap1.mapper.core.NamespaceSet; import com.openMap1.mapper.structures.MapperWrapper; import com.openMap1.mapper.structures.StructureDefinition; import com.openMap1.mapper.util.FileUtil; import com.openMap1.mapper.util.GenUtil; import com.openMap1.mapper.util.XMLUtil; /** * Wrapper class to convert between a csv file and the XML normally * extracted from relational databases. * XPaths in this XML are /database/<TABLE_NAME>/record/<COLUMN_NAME>. * * For a csv file representing a single table, the first row of the file defines the column * names, and by convention the table is called 'TABLE'. * * For a csv representing several tables, the contents of the first row are '$TABLE <TABLE_NAME>', * the second row defines columns in this table, * and any subsequent rows starting with '$TABLE' define other tables in the same way. * * if the csv file has more than segmentSize lines, then it is converted to several different DOMs in * units of segmentSize. This option is only available if the csv file represents one table only, * without an initial row '$TABLE <TABLE_NAME>'. * * @author Robert * */ public class CSV_Wrapper extends AbstractMapperWrapper implements MapperWrapper, StructureDefinition { /** * @param ms set of mappings which uses this wrapper transform * @param spare spare argument, set to name of topElementDef */ public CSV_Wrapper(MappedStructure ms, Object spare) throws MapperException { super(ms,spare); // for the StructureDefinition interface; define the root node of a csv structure as a database structure databaseElDef = MapperFactory.eINSTANCE.createElementDef(); setNameAndType("database","database_type",databaseElDef); } /** * @return the type of document transformed to and from; * see static constants in class AbstractMapperWrapper. */ public int transformType() {return AbstractMapperWrapper.TEXT_TYPE;} /** * * @return the file extension of the outer document, with initial '*.' */ public String fileExtension() {return "*.csv";} //---------------------------------------------------------------------------------------------------- // reading the csv to an xml instance - possibly in segments of segmentSize lines //---------------------------------------------------------------------------------------------------- /** * @return the maximum number of csv lines to be converted to an XML DOM in one call of transformIn */ public int getSegmentSize() {return segmentSize;} public void setSegmentSize(int segmentSize) {this.segmentSize = segmentSize;} private int segmentSize = 2000; /** * true only if there has been a previous call of transformIn, and because of the segment size restriction * there are still some more csv lines to convert to a DOM */ private boolean started = false; /** * next line of the csv file to be converted to XML. Initial value = after one table header line has been read */ private int currentLine = 1; /** * lines of the csv file, as strings */ private Vector<String> lines = new Vector<String>(); private boolean needsSegmentation() {return (lines.size() > segmentSize - 1);} /** * @return true if transformIn should be called again to convert more csv rows to a DOM */ public boolean hasMoreRows() {return (currentLine < lines.size());} /** * @param lineNo * @return true if the line number is on a boundary for segmentation - a whole multiple of the segment size */ private boolean segmentBoundary(int lineNo) { return ((lineNo - segmentSize*(lineNo/segmentSize)) == 0); } public void initialise() { started = false; currentLine = 1; } /** * @param csvFileObj input stream of the csv file * @param rootName name of the root element; always 'database' * @throws MapperException if, for instance, the actual structure * does not match the expected structure */ public Document transformIn(Object csvFileObj) throws MapperException { Document doc = XMLUtil.makeOutDoc(); Element root = XMLUtil.newElement(doc, "database"); doc.appendChild(root); // to be done only on the first call if (!started) { if (!(csvFileObj instanceof InputStream)) throw new MapperException("Input for making xml instance is not an InputStream"); InputStream csvFile = (InputStream)csvFileObj; lines = FileUtil.getLines(csvFile); if (lines.size() < 3) throw new MapperException("Cannot convert a csv file with only " + lines.size() + " lines"); if ((needsSegmentation()) && (lines.get(0).startsWith("$TABLE"))) throw new MapperException("CSV files longer than " + segmentSize + " lines cannot contain more than one table"); currentLine = 1; // in case the whole file has been read before } // csv file is long enough to need segmentation and only has one table, called 'TABLE' if (needsSegmentation()) { // make or remake the table element for that table Element tableEl = XMLUtil.newElement(doc, "TABLE"); root.appendChild(tableEl); // first row defines the columns Vector<String> columns = getColumns(lines.get(0), "TABLE"); // read rows into that table from the current line, until you hit a segment boundary or the end of the file boolean segmentBoundary = false; while ((hasMoreRows()) && (!segmentBoundary)) { String dataLine = lines.get(currentLine); // end of this table and start of another table - not expected for large csv files if (dataLine.startsWith("$TABLE")) throw new MapperException("New table is not allowed in large csv file at line " + currentLine); addRow(doc, tableEl, columns, dataLine); // check for a segment boundary, then move on so as not to get stuck at the next call segmentBoundary = segmentBoundary(currentLine); currentLine++; } /* if this is not the end of the file, prepare to read the next segment. currentLine persists to the next call. * If this is the end of the file, be ready to read the whole file again */ started = hasMoreRows(); } // csv file does not need segmentation and may contain several tables else if (!needsSegmentation()) { readLines(doc, root); currentLine = lines.size(); // so that hasMoreRows() will return false now } return doc; } /** * read all the lines of the csv file, and convert them to XML * @param doc * @param database * @param lines * @throws MapperException */ private void readLines(Document doc, Element database) throws MapperException { int currentLine = 0; // iteration to process tables while (currentLine > -1) currentLine = processNextTable(currentLine, doc, database); } /** * read all lines of the csv file for one table, and convert them to XML * @param line first line of the lines for this table * @param doc * @param database top 'database' element * @param lines * @return the line for the start of the next table, or -1 if there is none * @throws MapperException */ private int processNextTable(int line, Document doc, Element database) throws MapperException { int size = lines.size(); int newLine = line; // get the table name and create an element for it String firstLine = lines.get(newLine); String tableName = "TABLE"; if (firstLine.startsWith("$TABLE")) { tableName = getTableName(firstLine); newLine++; } Element tableEl = XMLUtil.newElement(doc, tableName); database.appendChild(tableEl); // get the column names if (newLine > size -1) throw new MapperException("There is no line to define the columns of table " + tableName); Vector<String> columns = getColumns(lines.get(newLine), tableName); // get the rows newLine++; while (size > newLine) { String dataLine = lines.get(newLine); // end of this table and start of another table if (dataLine.startsWith("$TABLE")) return newLine; addRow(doc, tableEl, columns, dataLine); newLine++; } // end of the csv file return -1; } /** * get a table name from a line defining it * @param line * @return * @throws MapperException */ private String getTableName(String line) throws MapperException { // strip off trailing commas from empty cells of spreadsheet StringTokenizer cols = new StringTokenizer(line,","); String firstCol = cols.nextToken(); // strip off initial '$TABLE' StringTokenizer st = new StringTokenizer(firstCol," "); if (st.countTokens() != 2) throw new MapperException("Line '" + line + "' does not define a single table name"); st.nextToken(); // return the table name return st.nextToken(); } /** * get a Vector of column names from a line defining it * @param columnLine * @param tableName * @return * @throws MapperException */ private Vector<String> getColumns(String columnLine, String tableName) throws MapperException { Vector<String> cols = new Vector<String>(); StringTokenizer st = new StringTokenizer(columnLine,","); if (st.countTokens() == 0) throw new MapperException("Empty line defining the columns of table " + tableName); while (st.hasMoreTokens()) { String colName = st.nextToken(); // filter for miscellaneous characters not allowed in tag names StringTokenizer ct = new StringTokenizer(colName," ;.",true); if (ct.countTokens() > 1) throw new MapperException("Disallowed XML tag name '" + colName + "'"); // remove any enclosing double quotes in column headers if ((colName.startsWith("\"")) && (colName.endsWith("\""))) colName = colName.substring(1,colName.length() - 1); cols.add(colName); } return cols; } /** * add XML for a new row to a table, if the csv line can be read * @param doc * @param tableEl * @param columns * @param dataLine * @return true if the csv line could be read * @throws MapperException if the line had too many cells */ private boolean addRow(Document doc, Element tableEl, Vector<String> columns, String dataLine) throws MapperException { boolean success = true; Vector<String> cells = new Vector<String>(); /* try to read the csv line; if there is any failure, just miss the line out, * recording it to the console */ try {cells = GenUtil.parseCSVLine(dataLine);} catch (Exception ex) {success = false;System.out.println("Exception: " + ex.getMessage() + " reading csv line " + dataLine);} if (cells.size() > columns.size()) throw new MapperException("row size " + cells.size() + "is greater than the number of columns " + columns.size() + " in row '" + dataLine + "'"); // add lines successfully read, to the XML DOM if (success) { Element record = XMLUtil.newElement(doc, "record"); for (int i = 0; i < cells.size(); i++) { String value = cells.get(i); // do not create XML elements for empty field values if ((value != null) && (!value.equals(""))) { Element field = XMLUtil.textElement(doc, columns.get(i), value); record.appendChild(field); } } tableEl.appendChild(record); } return success; } //---------------------------------------------------------------------------------------------------- // writing a csv instance - not yet implemented //---------------------------------------------------------------------------------------------------- public String[] transformOut(Element csvRoot) throws MapperException { int i = 1; if (6==(7-i)) throw new MapperException("CSV wrapper class out-transform not yet implemented"); String[] result = null; return result; } //---------------------------------------------------------------------------------------------------- // interface StructureDefinition //---------------------------------------------------------------------------------------------------- private ElementDef databaseElDef = null; private Hashtable<String,ElementDef> elementsByName = new Hashtable<String,ElementDef>(); private Hashtable<String,ElementDef> elementsByType = new Hashtable<String,ElementDef>(); /** * give an ElementDef a name and a type, and populate the lookup tables * @param name * @param type * @param elDef */ private void setNameAndType(String name, String type, ElementDef elDef) { elDef.setName(name); elDef.setType(type); elDef.setExpanded(true); elementsByName.put(name, elDef); elementsByType.put(type, elDef); } /** * find the Element and Attribute structure of some named top element (which may have a named * complex type, or a locally defined anonymous type), stopping at the * next complex type definitions it refers to * @param String name the name of the element * @return Element the EObject subtree (Element and Attribute EObjects) defined by the name */ public ElementDef nameStructure(String name) throws MapperException { return elementsByName.get(name); } /** * find the Element and Attribute structure of some complex type, stopping at the * next complex type definitions it refers to * @param type the name of the complex type * @return the EObject subtree (Element and Attribute EObjects) defined by the type */ public ElementDef typeStructure(String type) throws MapperException { return elementsByType.get(type); } /** * * @return an array of the top-level complex types defined in the structure definition - * any of which can be the type of a mapping set */ public String[] topComplexTypes() { String[] types = {"","database_type"}; return types; } /** * * @return an array of the top-level element names defined in the structure definition - * any of which can be the type of a mapping set */ public String[] topElementNames() { String[] names = {"","database"}; return names; } /** * @return the set of namespaces defined for the structure */ public NamespaceSet NSSet() { return new NamespaceSet(); } /** * @param modelClassName * @param modelPropertyName * @return true if this property value supplier supplies values for the * model class and property */ public boolean suppliesPropertyValues(String modelClassName, String modelPropertyName) { if ((modelClassName.equals("MappedStructure")) && (modelPropertyName.equals("Top Element Type"))) return true; if ((modelClassName.equals("MappedStructure")) && (modelPropertyName.equals("Top Element Name"))) return true; return false; } /** * @param modelClassName * @param modelPropertyName * @return the values supplied by this supplier for the model class and property */ public String[] propertyValues(String modelClassName, String modelPropertyName) { String[] vals = {}; if ((modelClassName.equals("MappedStructure")) && (modelPropertyName.equals("Top Element Type"))) return topComplexTypes(); if ((modelClassName.equals("MappedStructure")) && (modelPropertyName.equals("Top Element Name"))) return topElementNames(); return vals; } /** * needs to be called to find out the structure * @param csvFileObj * @throws MapperException */ public void getStructure(Object csvFileObj) throws MapperException { if (!(csvFileObj instanceof InputStream)) throw new MapperException("Input for getting structure definition is not an InputStream"); InputStream csvFile = (InputStream)csvFileObj; Vector<String> lines = FileUtil.getLines(csvFile); if (lines.size() < 3) throw new MapperException("Cannot get structure defnition from a csv file with only " + lines.size() + " lines"); int currentLine = 0; // iteration to process tables while (currentLine > -1) currentLine = processNextTableLines(currentLine, lines); } /** * * @param line * @param lines * @return * @throws MapperException */ private int processNextTableLines(int line, Vector<String> lines) throws MapperException { int newLine = line; // get the table name String firstLine = lines.get(newLine); String tableName = "TABLE"; if (firstLine.startsWith("$TABLE")) { tableName = getTableName(firstLine); newLine++; // ready to read column names for the table } // and create an elementDef for the table, with a child 'record' ElementDef ElementDef tableDef = MapperFactory.eINSTANCE.createElementDef(); setNameAndType(tableName, tableName + "_Type",tableDef); tableDef.setMinMultiplicity(MinMult.ONE); tableDef.setMaxMultiplicity(MaxMult.ONE); databaseElDef.getChildElements().add(tableDef); ElementDef recordDef = MapperFactory.eINSTANCE.createElementDef(); setNameAndType("record", "record_" + tableName + "_Type",recordDef); recordDef.setMinMultiplicity(MinMult.ZERO); recordDef.setMaxMultiplicity(MaxMult.UNBOUNDED); tableDef.getChildElements().add(recordDef); // get the column names if (newLine > lines.size() -1) throw new MapperException("There is no line to define the columns of table " + tableName); Vector<String> columns = getColumns(lines.get(newLine), tableName); newLine++; // move on to a row containing data content of this table, or a new table // add ElementDefs for the columns for (int col = 0; col < columns.size();col++) { ElementDef cellDef = MapperFactory.eINSTANCE.createElementDef(); String colName = columns.get(col); setNameAndType(colName, colName + "_Type",cellDef); cellDef.setMinMultiplicity(MinMult.ZERO); cellDef.setMaxMultiplicity(MaxMult.ONE); recordDef.getChildElements().add(cellDef); } // read through data rows to the next table-defining rows boolean tableFound = false; while ((!tableFound) && (newLine < lines.size())) { String nextLine = lines.get(newLine); tableFound = (nextLine.startsWith("$TABLE")); newLine++; } if (!tableFound) newLine = -1; return newLine; } }