/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedList;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.generator.ConstantGenerator;
import com.rapidminer.generator.FeatureGenerator;
import com.rapidminer.generator.GenerationException;
import com.rapidminer.tools.LoggingHandler;
import com.rapidminer.tools.Ontology;
/**
* Parses a file containing construction descriptions and adds the new
* attributes to the example set.
*
* @author Simon Fischer, Ingo Mierswa
* @version $Id: AttributeParser.java,v 1.8 2008/08/20 16:50:50 ingomierswa Exp $
*/
public class AttributeParser {
/** The example table to which the attributes should be added. */
private ExampleTable exampleTable;
/** The attributes which should be constructed during this construction parsing process (including intermediate atts). */
private List<Attribute> attributes2Construct = new LinkedList<Attribute>();
public AttributeParser(ExampleTable et) {
this.exampleTable = et;
}
/** Parses all lines. */
public void generateAll(LoggingHandler logging, ExampleSet exampleSet, InputStream in) throws IOException, GenerationException {
Document document = null;
try {
document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(in);
} catch (SAXException e1) {
throw new IOException(e1.getMessage());
} catch (ParserConfigurationException e1) {
throw new IOException(e1.getMessage());
}
Element constructionsElement = document.getDocumentElement();
if (!constructionsElement.getTagName().equals("constructions")) {
throw new IOException("Outer tag of attribute constructions file must be <constructions>");
}
NodeList constructions = constructionsElement.getChildNodes();
for (int i = 0; i < constructions.getLength(); i++) {
Node node = constructions.item(i);
if (node instanceof Element) {
Element constructionTag = (Element)node;
String tagName = constructionTag.getTagName();
if (!tagName.equals("attribute"))
throw new IOException("Only tags <attribute> are allowed, was " + tagName);
String attributeName = constructionTag.getAttribute("name");
String attributeString = constructionTag.getAttribute("construction");
Attribute att = generateAttribute(logging, attributeString);
if (att != null) {
if (attributeName != null) {
att.setName(attributeName);
}
exampleSet.getAttributes().addRegular(att);
}
}
}
}
public Attribute generateAttribute(LoggingHandler logging, String constructionDescription) throws GenerationException {
attributes2Construct.clear();
parseAttributes(constructionDescription);
return generate(logging, attributes2Construct);
}
// ===========================================================================
private static int getClosingBracketIndex(String string, int startIndex) throws GenerationException {
int openCount = 1;
while (true) {
int nextOpen = string.indexOf("(", startIndex + 1);
int nextClosing = string.indexOf(")", startIndex + 1);
if (nextClosing == -1)
throw new GenerationException("Malformed attribute description: mismatched parantheses");
if ((nextOpen != -1) && (nextOpen < nextClosing)) {
openCount++;
startIndex = nextOpen;
} else {
openCount--;
startIndex = nextClosing;
}
if (openCount == 0) {
return nextClosing;
}
}
}
/** Recursively parses the string starting at the current position. */
private List<Attribute> parseAttributes(String construction) throws GenerationException {
List<Attribute> attributes = new LinkedList<Attribute>();
int start = 0;
while (start < construction.length()) {
int leftBr = construction.indexOf("(", start);
int comma = construction.indexOf(",", start);
if ((comma == -1) && (leftBr == -1)) { // no comma and left bracket
int end = construction.length();
String name = construction.substring(start, end).trim();
if (name.startsWith(ConstantGenerator.FUNCTION_NAME)) {
throw new GenerationException("The function name '" + ConstantGenerator.FUNCTION_NAME + "' must be used with empty arguments!");
} else {
Attribute attribute = AttributeFactory.createAttribute(name, Ontology.REAL);
attributes.add(attribute);
start = construction.length();
}
} else if ((leftBr == -1) || ((comma < leftBr) && (comma != -1))) {
int end = comma;
String name = construction.substring(start, end).trim();
if (name.startsWith(ConstantGenerator.FUNCTION_NAME)) {
throw new GenerationException("The function name '" + ConstantGenerator.FUNCTION_NAME + "' must be used with empty arguments!");
} else {
Attribute attribute = AttributeFactory.createAttribute(name, Ontology.REAL);
attributes.add(attribute);
start = end + 1;
}
} else {
int rightBr = getClosingBracketIndex(construction, leftBr);
String functionName = construction.substring(start, leftBr).trim();
List<Attribute> argumentList = parseAttributes(construction.substring(leftBr + 1, rightBr).trim());
ConstructionDescription[] argumentDescriptions = new ConstructionDescription[argumentList.size()];
for (int i = 0; i < argumentDescriptions.length; i++) {
argumentDescriptions[i] = argumentList.get(i).getConstruction();
}
Attribute generated = AttributeFactory.createAttribute(functionName, argumentDescriptions);
attributes.add(generated);
attributes2Construct.add(generated);
start = construction.indexOf(",", rightBr) + 1;
if (start <= 0)
start = construction.length();
}
}
return attributes;
}
private Attribute getAttributeInTable(ExampleTable table, Attribute attribute) {
for (int i = 0; i < table.getNumberOfAttributes(); i++) {
Attribute a = table.getAttribute(i);
if ((a != null) && (a.getName().equals(attribute.getName())))
return a;
}
return null;
}
private Attribute getConstructedAttributeInTable(ExampleTable table, Attribute attribute) {
for (int i = 0; i < table.getNumberOfAttributes(); i++) {
Attribute a = table.getAttribute(i);
if ((a != null) && (a.getConstruction().equals(attribute.getConstruction())))
return a;
}
return null;
}
private FeatureGenerator getGenerator(Attribute a) throws GenerationException {
FeatureGenerator fg = FeatureGenerator.createGeneratorForFunction(a.getConstruction().getFunction());
if (fg != null) {
Attribute[] args = new Attribute[a.getConstruction().getArguments().length];
for (int c = 0; c < args.length; c++) {
args[c] = a.getConstruction().getArguments()[c].getAttribute();
}
if (args != null) {
for (int n = 0; n < args.length; n++) {
Attribute actualAttribute = getAttributeInTable(exampleTable, args[n]);
if (actualAttribute == null) {
actualAttribute = getConstructedAttributeInTable(exampleTable, args[n]);
}
if (args[n] == null) {
throw new GenerationException("No attribute with name " + args[n].getName() + " was found.");
}
args[n] = actualAttribute;
}
if (fg.getInputAttributes().length != args.length) {
throw new GenerationException(fg + " has arity " + fg.getInputAttributes().length + "!");
} else {
fg.setArguments(args);
}
}
return fg;
} else {
throw new GenerationException("No generator found for function " + a.getConstruction().getFunction());
}
}
/** Generates new attributes as long as it is possible. */
private Attribute generate(LoggingHandler logging, List<Attribute> attributes2Construct) throws GenerationException {
List<Attribute> allGeneratedAttributes = new LinkedList<Attribute>();
Attribute targetAttribute = null;
for (Attribute constructionAttribute : attributes2Construct) {
if (getAttributeInTable(exampleTable, constructionAttribute) == null) {
FeatureGenerator fg = getGenerator(constructionAttribute);
List<FeatureGenerator> generatorList = new LinkedList<FeatureGenerator>();
generatorList.add(fg);
List<Attribute> lastGeneratedAtts = FeatureGenerator.generateAll(exampleTable, generatorList);
targetAttribute = lastGeneratedAtts.get(lastGeneratedAtts.size() - 1);
allGeneratedAttributes.addAll(lastGeneratedAtts);
}
}
allGeneratedAttributes.clear();
return targetAttribute;
}
}