/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.tools; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.Collection; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorCreationException; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.performance.AbstractPerformanceEvaluator; import com.rapidminer.operator.performance.PerformanceCriterion; /** * <p>This class reads the description of the RapidMiner operators. These descriptions * are entries in a XML File like:</p> <br> * <code> * <operators><br> *   <operator<br> *     name="OperatorName" <br> *     class="java.path.OperatorClass" <br> *     description="OperatorDescription" <br> *     deprecation="OperatorDeprecationInfo" <br> *     group="OperatorGroup" <br> *     icon="OperatorIcon" <br> * /><br> * </code><br> * * <p>The values (and the whole tag) for deprecation and icon might be omitted. If no * deprecation info was specified, the operator is simply not deprecated. If no icon * is specified, RapidMiner just uses the icon of the parent group.</p> * * <p>NOTE: This class should be used to create operators and is therefore an * operator factory.</p> * * @author Ingo Mierswa, Simon Fischer * @version $Id: OperatorService.java,v 1.12 2008/07/12 17:46:46 ingomierswa Exp $ */ public class OperatorService { public static final String MAIN_OPERATORS_NAME = "core"; /** * Maps operator names of form classname|subclassname to operator * descriptions. */ private static Map<String, OperatorDescription> names2descriptions = new HashMap<String, OperatorDescription>(); /** Map for group name <-> group (list). */ private static GroupTree groupTree = new GroupTree(""); /** The Map for all IO objects (maps short names on classes). */ private static Map<String, Class<IOObject>> ioObjects = new TreeMap<String, Class<IOObject>>(); /** Returns the main operator description file (XML). */ public static URL getMainOperators() { Enumeration<URL> allOperatorsXML = null; try { allOperatorsXML = OperatorService.class.getClassLoader().getResources(Tools.RESOURCE_PREFIX + "operators.xml"); } catch (IOException e) { LogService.getGlobal().logWarning("Cannot find any operator description files (XML): " + e.getMessage()); } URL resultURL = null; if (allOperatorsXML != null) { while (allOperatorsXML.hasMoreElements()) { URL candidateURL = allOperatorsXML.nextElement(); InputStream candidateIn = null; try { candidateIn = candidateURL.openStream(); } catch (IOException e1) { // do nothing } if (candidateIn != null) { Document document = null; try { document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(candidateIn); } catch (Exception e) { LogService.getGlobal().log("Cannot read operator description file '" + candidateURL + "': no valid XML: " + e.getMessage(), LogService.WARNING); } if (document != null) { if (!document.getDocumentElement().getTagName().toLowerCase().equals("operators")) { LogService.getGlobal().log("Operator description file '" + candidateURL + "': outermost tag must be <operators>!", LogService.WARNING); continue; } } Attr nameAttr = document.getDocumentElement().getAttributeNode("name"); if (nameAttr != null) { String candidateName = nameAttr.getValue(); if (candidateName != null) { if (candidateName.equals(MAIN_OPERATORS_NAME)) { resultURL = candidateURL; } } } try { candidateIn.close(); } catch (IOException e) { // do nothing } if (resultURL != null) { break; } } } } return resultURL; } /** Registers all operators from a given XML input stream. */ public static void registerOperators(String name, InputStream operatorsXML, ClassLoader classLoader, boolean addWekaOperators) { // create long descriptions map Map<String, String> descriptionMap = loadDescriptionMap(); // register operators if (classLoader == null) classLoader = OperatorService.class.getClassLoader(); LogService.getGlobal().log("Loading operators from '" + name + "'.", LogService.INIT); Document document = null; try { document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(operatorsXML); } catch (Exception e) { LogService.getGlobal().log("Cannot read operator description file '" + name + "': no valid XML: " + e.getMessage(), LogService.ERROR); } if (document != null) { if (!document.getDocumentElement().getTagName().toLowerCase().equals("operators")) { LogService.getGlobal().log("Operator description file '" + name + "': outermost tag must be <operators>!", LogService.ERROR); return; } // operator factories NodeList factoryTags = document.getDocumentElement().getElementsByTagName("factory"); //LogService.getGlobal().log("Registering operators from " + factoryTags.getLength() + " factories.", LogService.INIT); for (int i = 0; i < factoryTags.getLength(); i++) { Element factoryTag = (Element) factoryTags.item(i); Attr classAttr = factoryTag.getAttributeNode("class"); if (classAttr == null) { LogService.getGlobal().log("Operator description file '" + name + "': factory tag must provide class attribute!", LogService.ERROR); } else { Class factoryClass = null; try { factoryClass = classLoader.loadClass(classAttr.getValue()); } catch (ClassNotFoundException e) { LogService.getGlobal().log("Operator factory class '" + classAttr.getValue() + "' not found!", LogService.ERROR); } if (factoryClass != null) { if (GenericOperatorFactory.class.isAssignableFrom(factoryClass)) { GenericOperatorFactory factory = null; try { factory = (GenericOperatorFactory) factoryClass.newInstance(); } catch (Exception e) { LogService.getGlobal().log("Cannot instantiate operator factory class '" + factoryClass.getName() + "'!", LogService.ERROR); } if (factory != null) { if (addWekaOperators || (!(factory instanceof WekaOperatorFactory))) { factory.registerOperators(classLoader); } } } else { LogService.getGlobal().log("Operator description file '" + name + "': only subclasses of GenericOperatorFactory may be defined as class, was '" + classAttr.getValue() + "'!", LogService.ERROR); } } } } // operators NodeList operatorTags = document.getDocumentElement().getElementsByTagName("operator"); //LogService.getGlobal().log("Registering " + operatorTags.getLength() + " operators.", LogService.INIT); for (int i = 0; i < operatorTags.getLength(); i++) { Element currentElement = (Element) operatorTags.item(i); try { registerOperator(currentElement, classLoader, descriptionMap); } catch (Throwable e) { Attr currentNameAttr = currentElement.getAttributeNode("name"); if (currentNameAttr != null) LogService.getGlobal().log("Cannot register '" + currentNameAttr.getValue() + "': " + e, LogService.ERROR); else LogService.getGlobal().log("Cannot register '" + currentElement + "': " + e, LogService.ERROR); } } } else { LogService.getGlobal().logWarning("Operator description '" + name + "' was empty."); } } /** * Registers an operator description from an XML tag (operator description * file, mostly operators.xml). */ private static void registerOperator(Element operatorTag, ClassLoader classLoader, Map<String, String> descriptionMap) throws Exception { Attr nameAttr = operatorTag.getAttributeNode("name"); Attr classAttr = operatorTag.getAttributeNode("class"); if (nameAttr == null) throw new Exception("Missing name for <operator> tag"); if (classAttr == null) throw new Exception("Missing class for <operator> tag"); String name = nameAttr.getValue(); String shortDescription = operatorTag.getAttribute("description"); String longDescription = descriptionMap.get(name); if (longDescription == null) longDescription = shortDescription; registerOperator(classLoader, nameAttr.getValue(), classAttr.getValue(), shortDescription, longDescription, operatorTag.getAttribute("group"), operatorTag.getAttribute("icon"), operatorTag.getAttribute("deprecation")); } /** Registers an operator description from the given meta data. */ private static void registerOperator(ClassLoader classLoader, String name, String clazz, String shortDescription, String longDescription, String group, String icon, String deprecationInfo) throws Exception { registerOperator(new OperatorDescription(classLoader, name, clazz, shortDescription, longDescription, group, icon, deprecationInfo)); } /** * Registers the given operator description. Please note that two different * descriptions must not have the same name. Otherwise the * second description overwrite the first in the description map. */ public static void registerOperator(OperatorDescription description) throws Exception { // check if this operator was not registered earlier OperatorDescription oldDescription = names2descriptions.get(description.getName()); if (oldDescription != null) { LogService.getGlobal().log("An operator '" + description.getName() + "' was already registered. Overwriting...", LogService.WARNING); } // register names2descriptions.put(description.getName(), description); Operator currentOperator = description.createOperatorInstance(); checkIOObjects(currentOperator.getInputClasses()); checkIOObjects(currentOperator.getOutputClasses()); // add to group String groupString = description.getGroup(); String[] groupNames = groupString.split("\\."); GroupTree currentGroup = groupTree; for (int j = 0; j < groupNames.length; j++) { String currentGroupName = groupNames[j].trim(); if (currentGroupName.length() > 0) { GroupTree subGroup = currentGroup.getSubGroup(currentGroupName); if (subGroup == null) { subGroup = new GroupTree(currentGroupName); currentGroup.addSubGroup(subGroup); } currentGroup = subGroup; } } currentGroup.addOperatorDescription(description); } private static Map<String, String> loadDescriptionMap() { URL descriptionUrl = Tools.getResource("long_documentation.txt"); Map<String, String> descriptionMap = new HashMap<String, String>(); if (descriptionUrl != null) { BufferedReader in = null; boolean beginNew = true; try { in = new BufferedReader(new InputStreamReader(descriptionUrl.openStream())); String line = null; String currentName = null; StringBuffer currentDescription = null; while ((line = in.readLine()) != null) { if (line.trim().length() == 0) continue; if (beginNew) { currentName = line; currentDescription = new StringBuffer(); beginNew = false; } else { if (line.startsWith("#####")) { if (currentName != null) { descriptionMap.put(currentName, currentDescription.toString()); currentName = null; currentDescription = null; beginNew = true; } else { currentName = null; currentDescription = null; beginNew = true; } } else { String transformed = Tools.removeAllLineSeparators(line); currentDescription.append(transformed); } } } } catch (IOException e) { LogService.getGlobal().logError("Cannot read long descriptions from resources."); } finally { if (in != null) { try { in.close(); } catch (IOException e) { // do nothing } } } } return descriptionMap; } /** This method is only necessary since the operators deliver Class arrays (which * cannot be instantiated with Generics) and can be removed after this was changed to * collections. * TODO: remove this method after getInputClasses() and getOutputClasses() deliver * collections and call checkIOObjects(Collection) directly. */ @SuppressWarnings("unchecked") private static void checkIOObjects(Class[] objects) { List<Class<IOObject>> result = new LinkedList<Class<IOObject>>(); if (objects != null) { for (int i = 0; i < objects.length; i++) { //Class<IOObject> newClass = (Class<IOObject>)objects[i]; result.add(objects[i]); } } checkIOObjects(result); } /** Checks if the given classes are already registered and adds them if not. */ private static void checkIOObjects(Collection<Class<IOObject>> objects) { Iterator<Class<IOObject>> i = objects.iterator(); while (i.hasNext()) { Class<IOObject> currentClass = i.next(); String current = currentClass.getName(); ioObjects.put(current.substring(current.lastIndexOf(".") + 1), currentClass); } } /** Returns a sorted set of all short IO object names. */ public static Set<String> getIOObjectsNames() { return ioObjects.keySet(); } /** Defines the alias pairs for the {@link XMLSerialization} for all IOObject pairs. */ public static void defineXMLAliasPairs() { // pairs for IOObjects Iterator<Map.Entry<String, Class<IOObject>>> i = ioObjects.entrySet().iterator(); while (i.hasNext()) { Map.Entry<String, Class<IOObject>> entry = i.next(); String objectName = entry.getKey(); Class objectClass = entry.getValue(); XMLSerialization.getXMLSerialization().addAlias(objectName, objectClass); } // pairs for performance criteria Iterator<String> o = getOperatorNames().iterator(); while (o.hasNext()) { String name = o.next(); Operator operator = null; try { operator = createOperator(name); } catch (OperatorCreationException e) { // does nothing } if (operator != null) { if (operator instanceof AbstractPerformanceEvaluator) { AbstractPerformanceEvaluator evaluator = (AbstractPerformanceEvaluator)operator; List<PerformanceCriterion> criteria = evaluator.getCriteria(); for (PerformanceCriterion criterion : criteria) { XMLSerialization.getXMLSerialization().addAlias(criterion.getName(), criterion.getClass()); } } } } } /** * Returns a collection of all operator descriptions of operators which * return the desired IO object as output. */ public static Set<OperatorDescription> getOperatorsDelivering(Class ioObject) { Set<OperatorDescription> result = new HashSet<OperatorDescription>(); Iterator<String> i = names2descriptions.keySet().iterator(); while (i.hasNext()) { String name = i.next(); OperatorDescription description = getOperatorDescription(name); try { Operator currentOperator = description.createOperatorInstance(); if (containsClass(currentOperator.getOutputClasses(), ioObject)) result.add(description); } catch (Exception e) {} } return result; } /** * Returns a collection of all operator descriptions which requires the * given IO object as input. */ public static Set<OperatorDescription> getOperatorsRequiring(Class ioObject) { Set<OperatorDescription> result = new HashSet<OperatorDescription>(); Iterator<String> i = names2descriptions.keySet().iterator(); while (i.hasNext()) { String name = i.next(); OperatorDescription description = getOperatorDescription(name); try { Operator currentOperator = description.createOperatorInstance(); if (containsClass(currentOperator.getInputClasses(), ioObject)) result.add(description); } catch (Exception e) {} } return result; } /** * Returns true if the given class array contains the given class itself or * a subclass. */ private static boolean containsClass(Class<?>[] types, Class<?> type) { if (types != null) { for (int i = 0; i < types.length; i++) { if (type.isAssignableFrom(types[i])) return true; } } return false; } /** Returns the class for the short name of an IO object. */ public static Class<IOObject> getIOObjectClass(String name) { return ioObjects.get(name); } /** * Returns a collection of all operator names. A name has the structure * classname|subclassname. */ public static Set<String> getOperatorNames() { return names2descriptions.keySet(); } /** Returns the group hierarchy of all operators. */ public static GroupTree getGroups() { return groupTree; } /** Reload all icons, e.g. after a look and feel change. */ public static void reloadIcons() { for (String name : getOperatorNames()) { OperatorDescription description = getOperatorDescription(name); description.reloadIcon(null); } } // ================================================================================ // Operator Factory Methods // ================================================================================ /** * Returns the operator descriptions for the operators which uses the given * class. Performs a linear seach through all operator descriptions. */ public static OperatorDescription[] getOperatorDescriptions(Class clazz) { List<OperatorDescription> result = new LinkedList<OperatorDescription>(); Iterator<String> i = names2descriptions.keySet().iterator(); while (i.hasNext()) { OperatorDescription current = getOperatorDescription(i.next()); if (current.getOperatorClass().equals(clazz)) result.add(current); } OperatorDescription[] resultArray = new OperatorDescription[result.size()]; result.toArray(resultArray); return resultArray; } /** * Returns the operator description for a given class name from the * operators.xml file, e.g. "Process" for a ProcessRootOperator. */ public static OperatorDescription getOperatorDescription(String completeName) { return names2descriptions.get(completeName); } /** * Use this method to create an operator from the given class name (from * operator description file operators.xml, not from the Java class name). * For most operators, is is recommended to use the method * {@link #createOperator(Class)} which can be checked during compile time. * This is, however, not possible for some generic operators like the * Weka operators. In that case, you have to use this method with the * argument from the operators.xml file, e.g. <tt>createOperator("J48")</tt> * for a J48 decision tree learner. */ public static Operator createOperator(String typeName) throws OperatorCreationException { OperatorDescription description = getOperatorDescription(typeName); if (description == null) throw new OperatorCreationException(OperatorCreationException.NO_DESCRIPTION_ERROR, typeName, null); return createOperator(description); } /** Use this method to create an operator of a given description object. */ public static Operator createOperator(OperatorDescription description) throws OperatorCreationException { return description.createOperatorInstance(); } /** * <p>Use this method to create an operator from an operator class. * This is the only method which ensures operator existence * checks during compile time (and not during runtime) and the usage * of this method is therefore the recommended way for operator creation. * </p> * * <p>It is, however, not possible to create some generic operators * with this method (this mainly applies to the Weka operators). Please * use the method {@link #createOperator(String)} for those generic * operators.</p> * * <p>If you try to create a generic operator with this method, * the OperatorDescription will not be unique for the given class and * an OperatorCreationException is thrown.</p> * * <p>Please note that is is not necessary to cast the operator to * the desired class.</p> * * TODO: can we remove the supress warning here? */ @SuppressWarnings("unchecked") public static <T extends Operator> T createOperator(Class<T> clazz) throws OperatorCreationException { OperatorDescription[] descriptions = getOperatorDescriptions(clazz); if (descriptions.length == 0) throw new OperatorCreationException(OperatorCreationException.NO_DESCRIPTION_ERROR, clazz.getName(), null); if (descriptions.length > 1) throw new OperatorCreationException(OperatorCreationException.NO_UNIQUE_DESCRIPTION_ERROR, clazz.getName(), null); return (T)descriptions[0].createOperatorInstance(); } }