/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.tools; import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.jar.JarFile; import com.rapidminer.RapidMiner; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.ListDataRowReader; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorCreationException; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeString; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; /** * This class contains static methods for converting <a * href="http://www.cs.waikato.ac.nz/ml/weka/">Weka</a> Instances to RapidMiner * ExampleSet and vice versa. * * @author Ingo Mierswa * @version $Id: WekaTools.java,v 1.11 2008/05/09 19:22:55 ingomierswa Exp $ */ public class WekaTools { /** This prefix indicates all Weka operators. It also allows RapidMiner operators with the * same name. */ public static final String WEKA_OPERATOR_PREFIX = "W-"; // ================================================================================ // Conversion: Weka Instances --> RapidMiner ExampleSet // ================================================================================ /** * Invokes toRapidMinerExampleSet(instances, null, * DataRowFactory.TYPE_DOUBLE_ARRAY). */ public static ExampleSet toRapidMinerExampleSet(Instances instances) { return toRapidMinerExampleSet(instances, null, DataRowFactory.TYPE_DOUBLE_ARRAY); } /** * Invokes toRapidMinerExampleSet(instances, attributeNamePrefix, * DataRowFactory.TYPE_DOUBLE_ARRAY). */ public static ExampleSet toRapidMinerExampleSet(Instances instances, String attributeNamePrefix) { return toRapidMinerExampleSet(instances, attributeNamePrefix, DataRowFactory.TYPE_DOUBLE_ARRAY); } /** * Creates a RapidMiner example set from Weka instances. Only a label can be used * as special attributes, other types of special attributes are not * supported. If <code>attributeNamePrefix</code> is not null, the given * string prefix plus a number is used as attribute names. */ public static ExampleSet toRapidMinerExampleSet(Instances instances, String attributeNamePrefix, int datamanagement) { int classIndex = instances.classIndex(); // create example table // 1. Extract attributes List<Attribute> attributes = new ArrayList<Attribute>(); int number = 1; // use for attribute names for (int i = 0; i < instances.numAttributes(); i++) { weka.core.Attribute wekaAttribute = instances.attribute(i); int rapidMinerAttributeValueType = Ontology.REAL; if (wekaAttribute.isNominal()) rapidMinerAttributeValueType = Ontology.NOMINAL; else if (wekaAttribute.isString()) rapidMinerAttributeValueType = Ontology.STRING; Attribute attribute = AttributeFactory.createAttribute(wekaAttribute.name(), rapidMinerAttributeValueType); if ((i != classIndex) && (attributeNamePrefix != null) && (attributeNamePrefix.length() > 0)) { attribute.setName(attributeNamePrefix + "_" + (number++)); } if (wekaAttribute.isNominal()) { for (int a = 0; a < wekaAttribute.numValues(); a++) { String nominalValue = wekaAttribute.value(a); attribute.getMapping().mapString(nominalValue); } } attributes.add(attribute); } Attribute label = null; if (classIndex >= 0) { label = attributes.get(classIndex); label.setName("label"); } // 2. Guarantee alphabetical mapping to numbers for (int j = 0; j < attributes.size(); j++) { Attribute attribute = attributes.get(j); if (attribute.isNominal()) attribute.getMapping().sortMappings(); } // 3. Read data MemoryExampleTable table = new MemoryExampleTable(attributes); DataRowFactory factory = new DataRowFactory(datamanagement, '.'); // create data List<DataRow> dataList = new LinkedList<DataRow>(); int numberOfRapidMinerAttributes = instances.numAttributes(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); DataRow dataRow = factory.create(numberOfRapidMinerAttributes); for (int a = 0; a < instances.numAttributes(); a++) { Attribute attribute = table.getAttribute(a); double wekaValue = instance.value(a); if (attribute.isNominal()) { String nominalValue = instances.attribute(a).value((int) wekaValue); dataRow.set(attribute, attribute.getMapping().mapString(nominalValue)); } else { dataRow.set(attribute, wekaValue); } } dataRow.trim(); dataList.add(dataRow); } // handle label extra table.readExamples(new ListDataRowReader(dataList.iterator())); // create and return example set return table.createExampleSet(label); } // ================================================================================ // Conversion: RapidMiner ExampleSet --> Weka Instances // ================================================================================ /** * Creates Weka instances with the given name from the given example set. * The taskType defines for which task the instances object should be used. */ public static Instances toWekaInstances(ExampleSet exampleSet, String name, int taskType) throws OperatorException { return new WekaInstancesAdaptor(name, exampleSet, taskType); } // ================================================================================ // Parameter handling // ================================================================================ /** Returns the Weka parameters for a RapidMiner parameter list. */ public static String[] getWekaParametersFromList(List rapidMinerParameters) { String[] parameters = new String[rapidMinerParameters.size() * 2]; Iterator i = rapidMinerParameters.iterator(); int j = 0; while (i.hasNext()) { Object[] parameter = (Object[]) i.next(); parameters[j++] = "-" + (String) parameter[0]; parameters[j++] = (String) parameter[1]; } return parameters; } /** * Returns all Weka parameters as String array from the given list of * parameter types. */ public static String[] getWekaParametersFromTypes(Operator operator, List parameterTypes) { List<String> parameterStrings = new LinkedList<String>(); Iterator i = parameterTypes.iterator(); while (i.hasNext()) { ParameterType type = (ParameterType) i.next(); try { if (type instanceof ParameterTypeBoolean) { if (!(Boolean.valueOf(operator.getParameterAsBoolean(type.getKey())).equals(type.getDefaultValue()))) parameterStrings.add("-" + type.getKey()); } else if (type instanceof ParameterTypeDouble) { double value = operator.getParameterAsDouble(type.getKey()); if (!Double.isNaN(value)) { double defaultValue = (Double)type.getDefaultValue(); if ((Double.isNaN(defaultValue)) || (defaultValue != value)) { parameterStrings.add("-" + type.getKey()); String valueString = Tools.formatIntegerIfPossible(value); parameterStrings.add(valueString); } } } else { String value = operator.getParameterAsString(type.getKey()); if (value != null) { String defaultValue = (String) type.getDefaultValue(); if ((defaultValue == null) || (!defaultValue.equals(value))) { parameterStrings.add("-" + type.getKey()); parameterStrings.add(value); } } } } catch (Exception e) { throw new RuntimeException("Cannot use parameter " + type.getKey() + ": " + e.getMessage()); } } String[] result = new String[parameterStrings.size()]; parameterStrings.toArray(result); return result; } /** * Tries to guess the type of the given option. If the number of arguments * is zero, than a boolean type is assumed. In other cases it will be tried * to parse the default value in the options array as a number and on * success a Double type is returned. If this fails, a ParameterTypeString * is returned. */ public static ParameterType guessParameterType(Option option, String[] options) { if (option.numArguments() == 0) { String defaultString = getStringDefault(option.name(), options); if (defaultString == null) { return new ParameterTypeBoolean(option.name(), option.description(), getBooleanDefault(option.name(), options)); } else { return new ParameterTypeString(option.name(), option.description(), defaultString); } } else { String defaultString = getStringDefault(option.name(), options); if (defaultString == null) { return new ParameterTypeString(option.name(), option.description()); } else { try { double defaultValue = Double.parseDouble(defaultString); return new ParameterTypeDouble(option.name(), option.description(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, defaultValue); } catch (NumberFormatException e) { return new ParameterTypeString(option.name(), option.description(), defaultString); } } } } /** Returns the default value for a boolean parameter. */ private static boolean getBooleanDefault(String key, String[] options) { for (int i = 0; i < options.length; i++) { if (options[i].equals("-" + key)) return true; } return false; } /** Returns the default value for a boolean parameter. */ private static String getStringDefault(String key, String[] options) { for (int i = 0; i < options.length; i++) { if ((options[i].equals("-" + key)) && (i + 1 < options.length)) return options[i + 1]; } return null; } /** * Removes all parameters from the given Weka options which are part of the * inner learner of a meta learning scheme. */ private static String[] removeMetaOptions(String[] options) { int index = -1; for (int i = 0; i < options.length; i++) { if (options[i].trim().equals("--")) { index = i; break; } } if (index == -1) { return options; } else { String[] result = new String[index]; System.arraycopy(options, 0, result, 0, index); return result; } } /** Add the parameter type for the options of a Weka option handler. */ public static void addParameterTypes(OptionHandler handler, List<ParameterType> types, List<ParameterType> wekaParameters, boolean meta, String metaParameter) { String[] defaultOptions = removeMetaOptions(handler.getOptions()); Enumeration options = handler.listOptions(); while (options.hasMoreElements()) { Option option = (Option) options.nextElement(); if (option.name().trim().length() == 0) break; // necessary to prevent adding of parameters of children // of meta learners // prevent adding the meta learning scheme options if (meta && option.name().trim().toLowerCase().equals(metaParameter.toLowerCase())) { continue; } ParameterType type = guessParameterType(option, defaultOptions); type.setExpert(false); // all Weka paras as non expert paras types.add(type); wekaParameters.add(type); } } // ================================================================================ // Misc // ================================================================================ /** * Checks if the user has defined the position of Weka in an environment * variable. If this is the case, the system uses the Jar file of Weka * located at the given position. Otherwise this method assumes that Weka is * at the default place in the lib directory under RapidMiner's home dir. If this * is also not possible a runtime exception will be thrown. */ public static File getWekaJarAsFile() throws IOException { String wekaJar = System.getProperty(RapidMiner.PROPERTY_RAPIDMINER_WEKA_JAR); if (wekaJar != null) { LogService.getGlobal().logMessage("Using "+wekaJar,LogService.WARNING); return new File(wekaJar); } else { return ParameterService.getLibraryFile("weka.jar"); } } /** * This method retrieves the jar file of Weka as a file and wraps it into a JarFile. */ public static JarFile getWekaJar() throws IOException { return new JarFile(getWekaJarAsFile()); } public static String[] getWekaClasses(Class superclass) { return getWekaClasses(superclass, (String)null, true); } public static String[] getWekaClasses(Class superclass, String seachConstraint, boolean includeConstraint) { if (seachConstraint != null) return getWekaClasses(superclass, new String[] { seachConstraint }, includeConstraint); else return getWekaClasses(superclass, (String[])null, includeConstraint); } public static String[] getWekaClasses(Class superclass, String[] searchConstraints, boolean positive) { if (positive) { return getWekaClasses(superclass, searchConstraints, null); } else { return getWekaClasses(superclass, null, searchConstraints); } } /** If Weka is not found, this method silently returns an empty string array. */ public static String[] getWekaClasses(Class superclass, String[] positiveSearchConstraints, String[] negativeSearchConstraints) { JarFile jar = null; try { jar = getWekaJar(); } catch (IOException e) { return new String[0]; } if (jar == null) return new String[0]; List<String> classes = new LinkedList<String>(); Tools.findImplementationsInJar(jar, superclass, classes); LogService.getGlobal().logWarning(classes.toString()); Iterator<String> i = classes.iterator(); while (i.hasNext()) { String name = i.next(); boolean removed = false; if (positiveSearchConstraints != null) { boolean shouldRemove = true; for (String constraint : positiveSearchConstraints) { if (name.indexOf(constraint) != -1) { shouldRemove = false; break; } } if (shouldRemove) { i.remove(); removed = true; } } if ((!removed) && (negativeSearchConstraints != null)) { for (String constraint : negativeSearchConstraints) { if (name.indexOf(constraint) != -1) { i.remove(); break; } } } } String[] names = new String[classes.size()]; classes.toArray(names); return names; } /** * Registers all given Weka operators. The parameter firstDescription will * be prepended to the name and firstGroup should have its last point if the * last package should form a subgroup and no ending point if the given * group should be definitely the group of the operators. Invokes the method * without deprecated operators. */ public static void registerWekaOperators(ClassLoader classLoader, String[] classNames, String operatorClass, String firstDescription, String firstGroup, String icon) { registerWekaOperators(classLoader, classNames, new HashMap<String,String>(), operatorClass, firstDescription, firstGroup, icon); } /** * Registers all given Weka operators. The parameter firstDescription will * be prepended to the name and firstGroup should have its last point if the * last package should form a subgroup and no ending point if the given * group should be definitely the group of the operators. */ public static void registerWekaOperators(ClassLoader classLoader, String[] classNames, Map<String,String> deprecationInfos, String operatorClass, String firstDescription, String firstGroup, String icon) { for (int i = 0; i < classNames.length; i++) { String infoString = null; try { Class<?> clazz = Class.forName(classNames[i], true, classLoader); Object wekaObject = clazz.newInstance(); Method method = clazz.getMethod("globalInfo", new Class[0]); infoString = (String)method.invoke(wekaObject, new Object[0]); if (infoString != null) { // replaces ampers and by a word, necessary for automatic doc generation // infoString.replaceAll("&", "and"); --> does not work here! infoString = htmlEscape(infoString, "&", "and"); infoString = htmlEscape(infoString, "_", ""); infoString = htmlEscape(infoString, "#", "number"); // remove physical markup infoString = htmlEscape(infoString, "<i>", ""); infoString = htmlEscape(infoString, "</i>", ""); infoString = htmlEscape(infoString, "<b>", ""); infoString = htmlEscape(infoString, "</b>", ""); infoString = htmlEscape(infoString, "<tt>", ""); infoString = htmlEscape(infoString, "</tt>", ""); infoString = htmlEscape(infoString, "<num>", ""); infoString = htmlEscape(infoString, "</num>", ""); // if the description probably contains some formula which // cannot be used in documentation or tooltips // --> use only the first sentence which probably does not // contain the formula // if the first contains another "^" discard the description... if (infoString.indexOf("^") >= 0) { infoString = infoString.substring(0, infoString.indexOf(".") + 1).trim(); } if (infoString.indexOf("^") >= 0) { infoString = null; // still contains formula (probably) --> // discard info text } } else { LogService.getGlobal().log("Delivered infoString from Weka is empty for '" + classNames[i] + "': using default short description.", LogService.WARNING); } } catch (ClassNotFoundException e) { // cannot create info string from weka --> // use simple description LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (InstantiationException e) { LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (IllegalAccessException e) { LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (ExceptionInInitializerError e) { LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (NoClassDefFoundError e) { LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (SecurityException e) { LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (NoSuchMethodException e) { // no global info method? Do nothing but simply use simple description //LogService.logMessage("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (IllegalArgumentException e) { LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } catch (InvocationTargetException e) { LogService.getGlobal().log("Cannot retrieve operator information from Weka for '"+classNames[i] + "': " + e, LogService.WARNING); } int lastIndex = classNames[i].lastIndexOf("."); String name = WEKA_OPERATOR_PREFIX + classNames[i].substring(lastIndex + 1); String packageName = classNames[i].substring(0, lastIndex); String group = packageName.substring(packageName.lastIndexOf(".") + 1); String groupStart = group.substring(0, 1); String groupEnd = group.substring(1); group = groupStart.toUpperCase() + groupEnd.toLowerCase(); try { String deprecationInfo = null; if (deprecationInfos != null) deprecationInfo = deprecationInfos.get(classNames[i]); String shortDescription = null; String longDescription = null; if (infoString != null) { int pointIndex = infoString.indexOf('.'); if (pointIndex >= 0) { String shortCandidate = infoString.substring(0, pointIndex + 1); if (shortCandidate.length() > 10) { shortDescription = shortCandidate; longDescription = infoString; } else { shortDescription = firstDescription.trim() + " " + name; longDescription = infoString; } } else { shortDescription = firstDescription.trim() + " " + name; longDescription = infoString; } } else { shortDescription = firstDescription.trim() + " " + name; longDescription = firstDescription.trim() + " " + name; } OperatorDescription description = new OperatorDescription(classLoader, name, operatorClass, shortDescription, longDescription, (firstGroup.endsWith(".") ? firstGroup + group : firstGroup), icon, deprecationInfo); // ==================================================================== // TODO: add the following command for testing new Weka versions !!! // ==================================================================== //description.createOperatorInstance(); // no error? --> register... OperatorService.registerOperator(description); } catch (OperatorCreationException e) { // RapidMiner problems --> report LogService.getGlobal().log("Cannot construct operator '" + name + "', error: " + e.getMessage(), LogService.WARNING); } catch (Throwable t) { // weka problems --> do nothing //LogService.logMessage("Cannot register operator '" + name + "', cause: " + t.getMessage(), LogService.WARNING); } } } private static String htmlEscape(String toEscape, String what, String by) { String result = toEscape; int index = 0; int generalIndex = 0; while ((index = result.indexOf(what, generalIndex)) >= 0) { String first = result.substring(0, index); String last = result.substring(index + what.length()); result = first + by + last; generalIndex = index + by.length(); } return result; } }