/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.nio.xml; import static com.rapidminer.operator.nio.xml.XMLExampleSource.PARAMETER_DEFAULT_NAMESPACE; import static com.rapidminer.operator.nio.xml.XMLExampleSource.PARAMETER_FILE; import static com.rapidminer.operator.nio.xml.XMLExampleSource.PARAMETER_NAMESPACES; import static com.rapidminer.operator.nio.xml.XMLExampleSource.PARAMETER_USE_DEFAULT_NAMESPACE; import static com.rapidminer.operator.nio.xml.XMLExampleSource.PARAMETER_USE_NAMESPACES; import static com.rapidminer.operator.nio.xml.XMLExampleSource.PARAMETER_XPATHS_FOR_ATTRIBUTES; import static com.rapidminer.operator.nio.xml.XMLExampleSource.PARAMETER_XPATH_FOR_EXAMPLES; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; import javax.swing.table.TableModel; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.xml.sax.SAXException; import com.rapidminer.RapidMiner; import com.rapidminer.gui.tools.VersionNumber; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.OperatorVersion; import com.rapidminer.operator.UserError; import com.rapidminer.operator.nio.model.AbstractDataResultSetReader; import com.rapidminer.operator.nio.model.DataResultSet; import com.rapidminer.operator.nio.model.DataResultSetFactory; import com.rapidminer.operator.nio.model.DefaultPreview; import com.rapidminer.operator.nio.model.ParseException; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.parameter.ParameterTypeEnumeration; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.tools.LogService; import com.rapidminer.tools.ProgressListener; /** * This is the {@link DataResultSetFactory} for the XML Import. It is able * to read the parameters stored in the operator to create a {@link DataResultSet} accordingly. * * @author Sebastian Land */ public class XMLResultSetConfiguration implements DataResultSetFactory { private String fileName; /** * Maps ids to namespaces. */ private Map<String, String> namespaceMap; private String exampleXPath; private List<String> attributeXPaths; private boolean isNamespaceAware; private String defaultNamespaceURI; private Document prefetchedDocument; private OperatorVersion xmlExampleSourceCompatibilityVersion; /** * This creates a completely empty configuration */ public XMLResultSetConfiguration() { namespaceMap = new HashMap<String, String>(); } public void setDefaultNamespaceURI(String defaultNamespaceURI) { this.defaultNamespaceURI = defaultNamespaceURI; } /** * This constructor will read all the needed parameters from the given operator. */ public XMLResultSetConfiguration(XMLExampleSource operator) throws OperatorException { this(); VersionNumber rmVersion = RapidMiner.getVersion(); if (operator instanceof XMLExampleSource) { XMLExampleSource xmlExampleSource = (XMLExampleSource)operator; xmlExampleSourceCompatibilityVersion = xmlExampleSource.getCompatibilityLevel(); } else { xmlExampleSourceCompatibilityVersion = new OperatorVersion(rmVersion.getMajorNumber(), rmVersion.getMinorNumber(), rmVersion.getPatchLevel()); } // if (operator.isParameterSet(PARAMETER_FILE)) // fileName = operator.getParameterAsString(PARAMETER_FILE); if (operator.isFileSpecified()) fileName = operator.getSelectedFile().getAbsolutePath(); if (operator.isParameterSet(PARAMETER_XPATH_FOR_EXAMPLES)) exampleXPath = operator.getParameterAsString(PARAMETER_XPATH_FOR_EXAMPLES); if (operator.getParameterAsBoolean(PARAMETER_USE_DEFAULT_NAMESPACE) && operator.isParameterSet(PARAMETER_DEFAULT_NAMESPACE)) { defaultNamespaceURI = operator.getParameterAsString(PARAMETER_DEFAULT_NAMESPACE); } else { defaultNamespaceURI = null; } isNamespaceAware = operator.getParameterAsBoolean(PARAMETER_USE_NAMESPACES); if (isNamespaceAware && operator.isParameterSet(PARAMETER_NAMESPACES)) { for (String[] pair : operator.getParameterList(PARAMETER_NAMESPACES)) { namespaceMap.put(pair[0], pair[1]); } } attributeXPaths = new ArrayList<String>(); if (operator.isParameterSet(PARAMETER_XPATHS_FOR_ATTRIBUTES)) for (String attributeXPath : ParameterTypeEnumeration.transformString2Enumeration(operator.getParameterAsString(PARAMETER_XPATHS_FOR_ATTRIBUTES))) { attributeXPaths.add(attributeXPath); } } @Override public DataResultSet makeDataResultSet(Operator operator) throws OperatorException { return new XMLResultSet(operator, this, xmlExampleSourceCompatibilityVersion); } @Override public TableModel makePreviewTableModel(ProgressListener listener) throws OperatorException, ParseException { // TODO: Avoid double load of result set. return new DefaultPreview(makeDataResultSet(null), listener); } @Override public String getResourceName() { return fileName; } /** * This returns the full resource identifier. */ public String getResourceIdentifier() { return fileName; } @Override public ExampleSetMetaData makeMetaData() { ExampleSetMetaData emd = new ExampleSetMetaData(); emd.numberOfExamplesIsUnkown(); return emd; } @Override public void setParameters(AbstractDataResultSetReader operator) { operator.setParameter(PARAMETER_FILE, fileName); operator.setParameter(PARAMETER_XPATH_FOR_EXAMPLES, exampleXPath); operator.setParameter(PARAMETER_USE_NAMESPACES, Boolean.toString(isNamespaceAware)); operator.setParameter(PARAMETER_USE_DEFAULT_NAMESPACE, Boolean.toString(getDefaultNamespaceURI() != null)); if (getDefaultNamespaceURI() != null) { // leave unchanged if user did not select a namespace. // this parameter is not used anyway then, since PARAMETER_USE_DEFAULT_NAMESPACE is null in this case. operator.setParameter(PARAMETER_DEFAULT_NAMESPACE, getDefaultNamespaceURI()); } List<String[]> list = new LinkedList<String[]>(); for (Map.Entry<String, String> entry : namespaceMap.entrySet()) { list.add(new String[] { entry.getKey(), entry.getValue() }); } operator.setParameter(PARAMETER_NAMESPACES, ParameterTypeList.transformList2String(list)); operator.setParameter(PARAMETER_XPATHS_FOR_ATTRIBUTES, ParameterTypeEnumeration.transformEnumeration2String(attributeXPaths)); } @Override public void close() { } /** * This method defines whether the XML should be parsed namespace aware or not. */ public boolean isNamespaceAware() { return isNamespaceAware; } /** * This method has to return the String representing the xpath expression that should form the examples. */ public String getExampleXPath() { return exampleXPath; } public void setExampleXPath(String exampleXPath) { this.exampleXPath = exampleXPath; } /** * This method must return the XPath expressions in order */ public List<String> getAttributeXPaths() { return attributeXPaths; } public void setAttributeXPaths(List<String> attributeXPaths) { this.attributeXPaths = attributeXPaths; } /** * This method has to return all defined namespaces. The key will be the prefix used * for identifying the namespace, the value is the URI. */ public Map<String, String> getNamespacesMap() { return namespaceMap; } /** * This method will return the ID as saved in the namespaceMap for the given uri. * If no such uri is registered null is returned. */ public String getNamespaceId(String namespaceURI) { for (Entry<String, String> entry: namespaceMap.entrySet()) { if (entry.getValue().equals(namespaceURI)) return entry.getKey(); } return null; } /** * This sets the used resource identifier. */ public void setResourceIdentifier(String resourceIdentifier) { this.fileName = resourceIdentifier; this.prefetchedDocument = null; //reseting cached dom. } /** * This will load the DOM from the current xml file if necessary or return the * already loaded one. This avoids multiple loaded instances of the same xml file. */ public Document getDocumentObjectModel() throws OperatorException { if (prefetchedDocument == null) { // load document: After expressions to fail fast in case expressions are syntactically wrong DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); domFactory.setValidating(false); domFactory.setNamespaceAware(isNamespaceAware()); try { domFactory.setFeature("http://xml.org/sax/features/namespaces", isNamespaceAware()); domFactory.setFeature("http://xml.org/sax/features/validation", false); domFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); domFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); DocumentBuilder builder = domFactory.newDocumentBuilder(); String resourceIdentifier = getResourceIdentifier(); if (resourceIdentifier == null) { throw new UserError(null, "file_consumer.no_file_defined"); } this.prefetchedDocument = builder.parse(new File(resourceIdentifier)); return prefetchedDocument; } catch (ParserConfigurationException e) { LogService.getRoot().log(Level.WARNING, "Failed to configure XML parser: "+e, e); throw new OperatorException("Failed to configure XML parser: "+e, e); } catch (SAXException e) { LogService.getRoot().log(Level.WARNING, "Failed to parse XML document: "+e, e); throw new OperatorException("Failed to parse XML document: "+e, e); } catch (IOException e) { LogService.getRoot().log(Level.WARNING, "Failed to parse XML document: "+e, e); throw new OperatorException("Failed to parse XML document: "+e, e); } //throw new UserError(null, 100); } else { return prefetchedDocument; } } /** * This returns a string for the default namespace uri or null if no one defined. */ public String getDefaultNamespaceURI() { return defaultNamespaceURI; } public void setNamespacesMap(Map<String, String> idNamespaceMap) { this.namespaceMap = idNamespaceMap; } public void setNamespaceAware(boolean b) { this.isNamespaceAware = b; } public OperatorVersion getXmlExampleSourceCompatibilityVersion() { return xmlExampleSourceCompatibilityVersion; } }