package org.maltparser.core.io.dataformat; import java.net.URL; import java.util.LinkedHashMap; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.maltparser.core.exception.MaltChainedException; import org.maltparser.core.helper.HashSet; import org.maltparser.core.helper.URLFinder; import org.maltparser.core.symbol.SymbolTableHandler; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * * * @author Johan Hall * @since 1.0 * */ public class DataFormatSpecification { public enum DataStructure { DEPENDENCY, // Dependency structure PHRASE, // Phrase structure }; // private int entryPositionCounter; private String dataFormatName; private DataStructure dataStructure; private final Map<String, DataFormatEntry> entries; private final HashSet<Dependency> dependencies; // private final HashSet<SyntaxGraphReader> supportedReaders; // private final HashSet<SyntaxGraphWriter> supportedWriters; public DataFormatSpecification() { entries = new LinkedHashMap<String, DataFormatEntry>(); // entryPositionCounter = 0; dependencies = new HashSet<Dependency>(); // supportedReaders = new HashSet<SyntaxGraphReader>(); // supportedWriters = new HashSet<SyntaxGraphWriter>(); } public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException { return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this); } public void parseDataFormatXMLfile(String fileName) throws MaltChainedException { final URLFinder f = new URLFinder(); URL url = f.findURL(fileName); if (url == null) { throw new DataFormatException("The data format specifcation file '" + fileName + "'cannot be found. "); } parseDataFormatXMLfile(url); } public HashSet<Dependency> getDependencies() { return dependencies; } public void parseDataFormatXMLfile(URL url) throws MaltChainedException { if (url == null) { throw new DataFormatException("The data format specifcation file cannot be found. "); } try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Element root = db.parse(url.openStream()).getDocumentElement(); if (root.getNodeName().equals("dataformat")) { dataFormatName = root.getAttribute("name"); if (root.getAttribute("datastructure").length() > 0) { dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase()); } else { dataStructure = DataStructure.DEPENDENCY; } } else { throw new DataFormatException("Data format specification file must contain one 'dataformat' element. "); } NodeList cols = root.getElementsByTagName("column"); Element col; for (int i = 0, n = cols.getLength(); i < n; i++) { col = (Element) cols.item(i); DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"), col.getAttribute("type"), col.getAttribute("default")); entries.put(entry.getDataFormatEntryName(), entry); } NodeList deps = root.getElementsByTagName("dependencies"); if (deps.getLength() > 0) { NodeList dep = ((Element) deps.item(0)).getElementsByTagName("dependency"); for (int i = 0, n = dep.getLength(); i < n; i++) { Element e = (Element) dep.item(i); dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap"))); } } } catch (java.io.IOException e) { throw new DataFormatException("Cannot find the file " + url.toString() + ". ", e); } catch (ParserConfigurationException e) { throw new DataFormatException("Problem parsing the file " + url.toString() + ". ", e); } catch (SAXException e) { throw new DataFormatException("Problem parsing the file " + url.toString() + ". ", e); } } public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) { DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput); entries.put(entry.getDataFormatEntryName(), entry); } public DataFormatEntry getEntry(String dataFormatEntryName) { return entries.get(dataFormatEntryName); } public String getDataFormatName() { return dataFormatName; } public DataStructure getDataStructure() { return dataStructure; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("Data format specification: "); sb.append(dataFormatName); sb.append('\n'); for (DataFormatEntry dfe : entries.values()) { sb.append(dfe); sb.append('\n'); } return sb.toString(); } public class Dependency { protected String dependentOn; protected String urlString; protected String map; protected String mapUrl; public Dependency(String dependentOn, String urlString, String map, String mapUrl) { setDependentOn(dependentOn); setUrlString(urlString); setMap(map); setMapUrl(mapUrl); } public String getDependentOn() { return dependentOn; } protected void setDependentOn(String dependentOn) { this.dependentOn = dependentOn; } public String getUrlString() { return urlString; } public void setUrlString(String urlString) { this.urlString = urlString; } public String getMap() { return map; } protected void setMap(String map) { this.map = map; } public String getMapUrl() { return mapUrl; } public void setMapUrl(String mapUrl) { this.mapUrl = mapUrl; } @Override public String toString() { return "Dependency [dependentOn=" + dependentOn + ", map=" + map + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]"; } } }