/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio.xml;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.NodeList;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.nio.model.DataResultSet;
import com.rapidminer.operator.nio.model.ParseException;
import com.rapidminer.operator.nio.model.ParsingError;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.ProgressListener;
import com.rapidminer.tools.xml.MapBasedNamespaceContext;
/**
*
* @author Sebastian Land
*/
public class XMLResultSet implements DataResultSet {
private NodeList exampleNodes = null;
private XPathExpression[] attributeExpressions = null;
private String[] attributeNames = null;
private int[] attributeValueTypes = null;
private int currentExampleIndex = -1;
private String[] currentExampleValues = null;
private OperatorVersion operatorVersion;
/**
* The constructor to build an ExcelResultSet from the given configuration. The calling operator might be null. It
* is only needed for error handling.
*/
public XMLResultSet(Operator callingOperator, XMLResultSetConfiguration configuration, OperatorVersion operatorVersion) throws OperatorException {
// creating XPath environment
XPathFactory factory = XPathFactory.newInstance();
XPath xpath = factory.newXPath();
this.operatorVersion = operatorVersion;
final Map<String, String> namespacesMap = configuration.getNamespacesMap();
xpath.setNamespaceContext(new MapBasedNamespaceContext(namespacesMap, configuration.getDefaultNamespaceURI()));
// generating Example's expression
XPathExpression exampleExpression = null;
try {
String exampleXPath = configuration.getExampleXPath();
if (exampleXPath == null) {
throw new UserError(callingOperator, 217, XMLExampleSource.PARAMETER_XPATH_FOR_EXAMPLES, callingOperator.getName(), "");
}
exampleExpression = xpath.compile(exampleXPath);
} catch (XPathExpressionException e1) {
throw new UserError(null, 214, configuration.getExampleXPath());
}
// generating Attribute's expressions
int i = 0;
List<String> attributeXPathsList = configuration.getAttributeXPaths();
attributeExpressions = new XPathExpression[attributeXPathsList.size()];
attributeNames = new String[attributeXPathsList.size()];
for (String expressionString : attributeXPathsList) {
attributeNames[i] = expressionString;
try {
attributeExpressions[i] = xpath.compile(expressionString);
} catch (XPathExpressionException e) {
throw new UserError(null, 214, expressionString);
}
i++;
}
attributeValueTypes = new int[attributeXPathsList.size()];
Arrays.fill(attributeValueTypes, Ontology.NOMINAL);
currentExampleValues = new String[attributeXPathsList.size()];
try {
exampleNodes = (NodeList) exampleExpression.evaluate(configuration.getDocumentObjectModel(), XPathConstants.NODESET);
} catch (XPathExpressionException e) {
throw new UserError(null, 214, configuration.getExampleXPath());
}
}
@Override
public boolean hasNext() {
return exampleNodes.getLength() > currentExampleIndex + 1;
}
@Override
public void next(ProgressListener listener) throws OperatorException {
currentExampleIndex++;
if (currentExampleIndex >= exampleNodes.getLength()) {
throw new NoSuchElementException("No further match to examples XPath expression in XML file. Accessed " + currentExampleIndex + " but has has " + exampleNodes.getLength());
}
for (int i = 0; i < attributeExpressions.length; i++) {
try {
if (operatorVersion.compareTo(XMLExampleSource.CHANGE_5_1_013_NODE_OUTPUT) > 0) {
NodeList nodeList = (NodeList)attributeExpressions[i].evaluate(exampleNodes.item(currentExampleIndex), XPathConstants.NODESET);
currentExampleValues[i] = XMLDomHelper.nodeListToString(nodeList);
} else {
currentExampleValues[i] = (String)attributeExpressions[i].evaluate(exampleNodes.item(currentExampleIndex), XPathConstants.STRING);
}
} catch (XPathExpressionException e) {
currentExampleValues[i] = null;
} catch (TransformerException e) {
currentExampleValues[i] = null;
}
}
}
@Override
public int getNumberOfColumns() {
return attributeNames.length;
}
@Override
public String[] getColumnNames() {
return attributeNames;
}
@Override
public boolean isMissing(int columnIndex) {
return currentExampleValues[columnIndex] == null;
}
@Override
/**
* This method is not supported by the XML result set. Anytime it is called a ParseException will be thrown.
*/
public Number getNumber(int columnIndex) throws ParseException {
throw new ParseException(new ParsingError(currentExampleIndex, columnIndex, ParsingError.ErrorCode.UNPARSEABLE_REAL, ""));
}
@Override
public String getString(int columnIndex) throws ParseException {
return currentExampleValues[columnIndex];
}
@Override
/**
* This method is not supported by the XML result set. Anytime it is called a ParseException will be thrown.
*/
public Date getDate(int columnIndex) throws ParseException {
throw new ParseException(new ParsingError(currentExampleIndex, columnIndex, ParsingError.ErrorCode.UNPARSEABLE_DATE, ""));
}
@Override
public ValueType getNativeValueType(int columnIndex) throws ParseException {
return ValueType.STRING;
}
@Override
public void close() throws OperatorException {
// Nothing to close: inputstream to File or URL has already be closed in constructor
}
@Override
public void reset(ProgressListener listener) throws OperatorException {
currentExampleIndex = -1;
}
@Override
public int[] getValueTypes() {
return attributeValueTypes;
}
@Override
public int getCurrentRow() {
return currentExampleIndex;
}
}