/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.rapidminer.datatable.DataTable;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.IllegalInputException;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessRootOperator;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.WrongNumberOfInnerOperatorsException;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.report.ReportStream;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.OperatorService;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.ResultService;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.XMLException;
import de.tud.inf.operator.IOCapability;
import de.tud.inf.operator.UnsatisfiedCapabilityException;
/**
* <p>This class was introduced to avoid confusing handling of operator maps and
* other stuff when a new process definition is created. It is also necessary for file
* name resolving and breakpoint handling.</p>
*
* <p>If you want to use RapidMiner from your own application the best way is often to
* create a process definition from the scratch (by adding the complete operator tree
* to the process' root operator) or from a file (for example created with the
* GUI beforehand) and start it by invoking the {@link #run()} method.</p>
*
* @author Ingo Mierswa
* @version $Id: Process.java,v 1.20 2008/07/21 17:34:54 ingomierswa Exp $
*/
public class Process implements Cloneable {
public static final int PROCESS_STATE_UNKNOWN = -1;
public static final int PROCESS_STATE_STOPPED = 0;
public static final int PROCESS_STATE_PAUSED = 1;
public static final int PROCESS_STATE_RUNNING = 2;
/** The root operator of the process. */
private ProcessRootOperator rootOperator;
/** This is the operator which is currently applied. */
private Operator currentOperator;
/** The process might be connected to this file which is then used to resolve relative
* file names which might be defined as parameters. */
private File processFile;
/** The listeners for breakpoints. */
private List<BreakpointListener> breakpointListeners = new LinkedList<BreakpointListener>();
/** Indicates if the process should stop. */
private boolean stopProcess = false;
/** The macro handler can be used to replace (user defined) macro strings. */
private MacroHandler macroHandler = new MacroHandler(this);
/** This map holds the names of all operators in the process. Operators are
* automatically registered during adding and unregistered after removal. */
private Map<String, Operator> operatorNameMap = new HashMap<String, Operator>();
/**
* Maps names of ProcessLog operators to Objects, that these Operators use
* for collecting statistics (objects of type {@link DataTable}).
*/
private Map<String, DataTable> dataTableMap = new HashMap<String, DataTable>();
/**
* Maps names of report streams to reportStream objects
*/
private Map<String, ReportStream> reportStreamMap = new HashMap<String, ReportStream>();
/** Indicates the current process state. */
private int processState = PROCESS_STATE_STOPPED;
/** The logging for this process. */
private LogService logService = LogService.getGlobal();
// -------------------
// Constructors
// -------------------
/** Constructs an process consisting only of a SimpleOperatorChain. */
public Process() {
try {
ProcessRootOperator root = (ProcessRootOperator) OperatorService.createOperator("Process");
root.rename("Root");
setRootOperator(root);
} catch (Exception e) {
throw new RuntimeException("Cannot initialize root operator of the process: " + e.getMessage(), e);
}
//initLogging();
}
/** Creates a new process from the given URL. */
public Process(URL url) throws IOException, XMLException {
InputStream in = url.openStream();
readProcess(in);
in.close();
//initLogging();
}
/** Creates a new process from the given process file. This might have been created
* with the GUI beforehand. */
public Process(File file) throws IOException, XMLException {
InputStream in = null;
try {
in = new FileInputStream(file);
readProcess(in);
} catch (IOException e) {
throw e;
} finally {
if (in != null)
in.close();
}
this.processFile = file;
//initLogging();
}
/** Reads an process configuration from an XML String. */
public Process(String xmlString) throws IOException, XMLException {
ByteArrayInputStream in = new ByteArrayInputStream(xmlString.getBytes());
readProcess(in);
in.close();
//initLogging();
}
/** Reads an process configuration from the given file. */
public Process(InputStream in) throws IOException, XMLException {
readProcess(in);
//initLogging();
}
/** Clone constructor. Makes a deep clone of the operator tree and the process file.
* The same applies for the operatorNameMap. The breakpoint listeners are copied by reference
* and all other fields are initialized like for a fresh process. */
private Process(Process other) {
this();
setRootOperator((ProcessRootOperator)other.rootOperator.cloneOperator(other.rootOperator.getName()));
this.currentOperator = null;
if (other.processFile != null)
this.processFile = new File(other.processFile.getAbsolutePath());
else
this.processFile = null;
//initLogging();
}
/*
private void initLogging() {
initLogging(LogService.UNKNOWN_LEVEL);
}
*/
private void initLogging(int logVerbosity) {
try {
this.logService = new LogService(this, logVerbosity);
} catch (UndefinedParameterError e) {
// cannot happen
this.logService = LogService.getGlobal();
}
}
public Object clone() {
return new Process(this);
}
/**
* @deprecated Use {@link #setProcessState(int)} instead
*/
@Deprecated
public synchronized void setExperimentState(int state) {
setProcessState(state);
}
public synchronized void setProcessState(int state) {
this.processState = state;
}
/**
* @deprecated Use {@link #getProcessState()} instead
*/
@Deprecated
public synchronized int getExperimentState() {
return getProcessState();
}
public synchronized int getProcessState() {
return this.processState;
}
// -------------------------
// Logging
// -------------------------
public LogService getLog() {
return this.logService;
}
// -------------------------
// Macro Handler
// -------------------------
/** Returns the macro handler. */
public MacroHandler getMacroHandler() {
return this.macroHandler;
}
/** Clears all macros. */
public void clearMacros() {
this.macroHandler.clear();
}
// -------------------------
// Data Tables
// -------------------------
/** Returns true if a data table object with the given name exists. */
public boolean dataTableExists(String name) {
return dataTableMap.get(name) != null;
}
/**
* Returns the data table associated with the given name. If the name was
* not used yet, an empty DataTable object is created with the given columnNames.
*/
public DataTable addDataTable(DataTable table) {
dataTableMap.put(table.getName(), table);
return table;
}
/**
* Returns the data table associated with the given name. If the name was
* not used yet, an empty DataTable object is created with the given columnNames.
*/
public DataTable getDataTable(String name) {
return dataTableMap.get(name);
}
/** Returns all data tables. */
public Collection<DataTable> getDataTables() {
return dataTableMap.values();
}
/** Removes all data tables before running a new process. */
public void clearDataTables() {
dataTableMap.clear();
}
/**
* This method adds a new report stream with the given name
*/
public void addReportStream(ReportStream stream) {
reportStreamMap.put(stream.getName(), stream);
}
/**
* Returns the reportStream with given name
*/
public ReportStream getReportStream(String name) {
if ((name == null) || (name.length() == 0)) {
if (reportStreamMap.size() == 1) {
return reportStreamMap.values().iterator().next();
} else {
return null;
}
} else {
return reportStreamMap.get(name);
}
}
/**
* Removes this reportStream from process. This report Stream will not be
* notified about new report items.
* @param name of the report stream given in the ReportGenerator operator
*/
public void removeReportStream(String name) {
reportStreamMap.remove(name);
}
public void clearReportStreams() {
reportStreamMap.clear();
}
// ----------------------
// Operator Handling
// ----------------------
/** Sets the current root operator. This might lead to a new registering of operator names. */
public void setRootOperator(ProcessRootOperator root) {
this.rootOperator = root;
this.operatorNameMap.clear();
this.rootOperator.setProcess(this);
}
/** Delivers the current root operator. */
public ProcessRootOperator getRootOperator() {
return rootOperator;
}
/** Creates the process from the given XML. */
public void setupFromXML(String xmlString) throws IOException, XMLException {
ByteArrayInputStream in = new ByteArrayInputStream(xmlString.getBytes());
readProcess(in);
in.close();
}
/** Returns the current process file.
* @deprecated Use {@link #getProcessFile()} instead*/
@Deprecated
public File getExperimentFile() {
return getProcessFile();
}
/** Returns the current process file. */
public File getProcessFile() {
return processFile;
}
/** Returns the operator with the given name. */
public Operator getOperator(String name) {
return operatorNameMap.get(name);
}
/** Returns the operator that is currently being executed. */
public Operator getCurrentOperator() {
return currentOperator;
}
/** Returns a Collection view of all operators. */
public Collection<Operator> getAllOperators() {
List<Operator> result = rootOperator.getAllInnerOperators();
result.add(0, rootOperator);
return result;
}
/** Returns a Set view of all operator names (i.e. Strings). */
public Collection<String> getAllOperatorNames() {
Collection<String> allNames = new LinkedList<String>();
for (Operator o : getAllOperators()) {
allNames.add(o.getName());
}
return allNames;
}
/** Returns the operator that is currently being executed. */
public void setCurrentOperator(Operator operator) {
this.currentOperator = operator;
}
// --------------------
// Breakpoint Handling
// --------------------
/** Removes a breakpoint listener. */
public void addBreakpointListener(BreakpointListener listener) {
breakpointListeners.add(listener);
}
/** Adds a breakpoint listener. */
public void removeBreakpointListener(BreakpointListener listener) {
breakpointListeners.remove(listener);
}
/** Fires the event that the process was paused. */
public void fireBreakpointEvent(Operator operator, IOContainer ioContainer, int location) {
Iterator i = breakpointListeners.iterator();
while (i.hasNext()) {
((BreakpointListener) i.next()).breakpointReached(operator, ioContainer, location);
}
}
/** Fires the event that the process was resumed. */
public void fireResumeEvent() {
Iterator i = breakpointListeners.iterator();
while (i.hasNext()) {
((BreakpointListener) i.next()).resume();
}
}
// -----------------
// Checks
// -----------------
/** Checks the nesting (compatible in- and output types) of the current process. */
private int checkIO(IOContainer inputContainer) {
IOObject[] inputObjects = inputContainer.getIOObjects();
Class[] inputClasses = new Class[inputObjects.length];
for (int i = 0; i < inputObjects.length; i++) {
inputClasses[i] = inputObjects[i].getClass();
}
logService.log("Checking i/o classes...", LogService.INIT);
try {
Class[] output = rootOperator.checkIO(inputClasses);
if (output.length == 0) {
logService.log("i/o classes are ok.", LogService.INIT);
} else {
StringBuffer left = new StringBuffer();
for (int i = 0; i < output.length; i++) {
left.append(Tools.classNameWOPackage(output[i]));
if (i < output.length - 1)
left.append(", ");
}
logService.log("i/o classes are ok. Process output: " + left.toString() + ".", LogService.INIT);
}
return 0;
} catch (IllegalInputException e) {
if (e.getOperator() != null)
e.getOperator().addError(e.getMessage());
return 1;
} catch (WrongNumberOfInnerOperatorsException e) {
if (e.getOperator() != null)
e.getOperator().addError(e.getMessage());
return 1;
}
}
/** Checks the nesting (compatible in- and output types) of the current process. */
private int checkCapabilites() {
logService.log("Checking Capabilities", LogService.INIT);
try {
rootOperator.checkCapabilites(new IOCapability[]{});
/*
if (output.length == 0) {
logService.log("i/o classes are ok.", LogService.INIT);
} else {
StringBuffer left = new StringBuffer();
for (int i = 0; i < output.length; i++) {
left.append(Tools.classNameWOPackage(output[i]));
if (i < output.length - 1)
left.append(", ");
}
logService.log("i/o classes are ok. Process output: " + left.toString() + ".", LogService.INIT);
}
*/
return 0;
} catch (UnsatisfiedCapabilityException e) {
if (e.getOperator() != null)
e.getOperator().addError(e.getMessage());
return 1;
}
}
/** Checks the nesting (number of inner operators) of the current process. */
private int checkNumberOfInnerOperators() {
logService.log("Checking process setup...", LogService.INIT);
int errorCount = ((OperatorChain) rootOperator).checkNumberOfInnerOperators();
if (errorCount == 0)
logService.log("Inner operators are ok.", LogService.INIT);
else
logService.log("Process setup not ok", LogService.ERROR);
return errorCount;
}
/** Checks the properties (parameter values) of the current process. */
private int checkProperties() {
logService.log("Checking properties...", LogService.INIT);
int errorCount = rootOperator.checkProperties();
if (errorCount == 0)
logService.log("Properties are ok.", LogService.INIT);
else
logService.log("Properties are not ok", LogService.ERROR);
return errorCount;
}
/** This method leads to some additional checks which might be defined by some operators. */
private int performAdditionalChecks() {
try {
rootOperator.performAdditionalChecks();
return 0;
} catch (UserError e) {
logService.log(e.getMessage(), LogService.ERROR);
return 1;
}
}
/** Checks for correct number of inner operators, properties, and io.
* @deprecated Use {@link #checkProcess(IOContainer)} instead*/
@Deprecated
public boolean checkExperiment(IOContainer inputContainer) {
return checkProcess(inputContainer);
}
/** Checks for correct number of inner operators, properties, and io. */
public boolean checkProcess(IOContainer inputContainer) {
boolean ok = true;
rootOperator.clearErrorList();
int errorCount = checkProperties();
errorCount += checkNumberOfInnerOperators();
if (errorCount == 0)
errorCount += performAdditionalChecks();
if (errorCount == 0)
errorCount += checkIO(inputContainer);
//TODO: test default (no capabilities defined)
/*
if (errorCount == 0)
errorCount += checkCapabilites();
*/
if (errorCount == 0) {
logService.log("Process ok.", LogService.INIT);
} else {
String errorMessage = null;
if (errorCount == 1) {
errorMessage = "There was 1 error.";
} else {
errorMessage = "There were " + errorCount + " errors.";
}
logService.log(errorMessage, LogService.ERROR);
ok = false;
}
int deprecationCount = rootOperator.checkDeprecations();
if (deprecationCount > 0)
logService.log("Deprecations: " + deprecationCount + (deprecationCount == 1 ? " usage" : " usages") + " of deprecated operators.", LogService.WARNING);
return ok;
}
// ------------------
// Running
// ------------------
/** This method initializes the process, the operators, and the services and must be invoked
* at the beginning of run. */
private final void prepareRun(IOContainer inputContainer, int logVerbosity, boolean cleanUp) throws OperatorException {
// TODO: perform this cleaning here after object visualiers, log service and
// temp file service are bound to a single process
if (cleanUp)
RapidMiner.cleanUp();
initLogging(logVerbosity);
stopProcess = false;
logService.log("Initialising process setup", LogService.INIT);
RandomGenerator.init(this);
ResultService.init(this);
checkProcess(inputContainer);
clearDataTables();
clearReportStreams();
clearMacros();
AttributeFactory.resetNameCounters();
logService.log("Process initialised", LogService.INIT);
}
/** Starts the process with no input. */
public final IOContainer run() throws OperatorException {
return run(new IOContainer());
}
/** Starts the process with the given log verbosity. */
public final IOContainer run(int logVerbosity) throws OperatorException {
return run(new IOContainer(), logVerbosity, true);
}
/** Starts the process with the given input. */
public final IOContainer run(IOContainer input) throws OperatorException {
return run(input, LogService.UNKNOWN_LEVEL, true);
}
/** Starts the process with the given input. The process uses the given log verbosity. */
public final IOContainer run(IOContainer input, int logVerbosity) throws OperatorException {
return run(input, logVerbosity, true);
}
/** Starts the process with the given input. The process uses a default log verbosity.
* The boolean flag indicates if some static initializations should be cleaned
* before the process is started. This should usually be true but it might be useful
* to set this to false if, for example, several process runs uses the same
* object visualizer which would have been cleaned otherwise.
*/
public final IOContainer run(IOContainer input, boolean cleanUp) throws OperatorException {
return run(input, LogService.UNKNOWN_LEVEL, cleanUp);
}
/** Starts the process with the given input. The process uses the given log verbosity.
* The boolean flag indicates if some static initializations should be cleaned
* before the process is started. This should usually be true but it might be useful
* to set this to false if, for example, several process runs uses the same
* object visualizer which would have been cleaned otherwise. */
public final IOContainer run(IOContainer input, int logVerbosity, boolean cleanUp) throws OperatorException {
prepareRun(input, logVerbosity, cleanUp);
long start = System.currentTimeMillis();
logService.log("Process starts", LogService.NOTE);
logService.log("Process:" + Tools.getLineSeparator() + getRootOperator().createProcessTree(3), LogService.INIT);
rootOperator.processStarts();
try {
IOContainer result = rootOperator.apply(input);
long end = System.currentTimeMillis();
logService.log("Process:" + Tools.getLineSeparator() + getRootOperator().createProcessTree(3), LogService.INIT);
logService.log("Produced output:" + Tools.getLineSeparator() + result, LogService.INIT);
logService.log("Process finished successfully after " + ((end - start) / 1000) + " seconds", LogService.NOTE);
return result;
} catch (OperatorException e) {
throw e;
} finally {
tearDown();
}
}
/** Stops the process as soon as possible. */
public void stop() {
this.stopProcess = true;
}
/** Returns true iff the process should be stopped. */
public boolean shouldStop() {
return stopProcess;
}
/** This method is invoked after an process has finished. */
private void tearDown() {
try {
if (!shouldStop())
rootOperator.processFinished();
} catch (OperatorException e) {
logService.log("Problem during finishing the process: " + e.getMessage(), LogService.ERROR);
}
ResultService.close();
logService.flush();
logService = LogService.getGlobal();
}
// ----------------------
// Process IO
// ----------------------
/** Saves the process to the process file. */
public void save() throws IOException {
save(processFile);
}
/** Saves the process to the given process file. */
public void save(File file) throws IOException {
Charset encoding = rootOperator.getEncoding();
PrintWriter writer = null;
try {
writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), encoding));
// write encoding
writer.println("<?xml version=\"1.0\" encoding=\""+encoding+"\"?>");
// write process
writer.println("<process version=\"" + RapidMiner.getShortVersion() + "\">" + Tools.getLineSeparator());
rootOperator.writeXML(writer, " ");
writer.println("</process>");
} catch (IOException e) {
throw e;
} finally {
if (writer != null)
writer.close();
}
logService.log("Finished writing of process definition file '" + file + "'.", LogService.STATUS);
}
/** Sets the process file. This file might be used for resolving relative filenames.
* @deprecated Please use {@link #setProcessFile(File)} instead.
*/
@Deprecated
public void setExperimentFile(File file) {
this.setProcessFile(file);
}
/** Sets the process file. This file might be used for resolving relative filenames. */
public void setProcessFile(File file) {
this.processFile = file;
}
/**
* Resolves the given filename against the directory containing the
* process file.
*/
public File resolveFileName(String name) {
File workingDir = new File(System.getProperty("user.dir"));
return Tools.getFile(processFile != null ? processFile.getParentFile() : workingDir, name);
}
/** Creates a new file relative to the process file path. */
public File createFile(String name) {
File file = resolveFileName(name);
Tools.mkdir(file.getParentFile());
return file;
}
/** Reads the process setup from the given input stream.
* @deprecated Use {@link #readProcess(InputStream)} instead*/
@Deprecated
public void readExperiment(InputStream in) throws XMLException, IOException {
readProcess(in);
}
/** Reads the process setup from the given input stream. */
public void readProcess(InputStream in) throws XMLException, IOException {
Map<String, Operator> nameMapBackup = operatorNameMap;
operatorNameMap = new HashMap<String, Operator>(); // no invocation of clear (see below)
try {
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(in);
Element processElement = document.getDocumentElement();
Element rootOperatorElement = null;
if ((processElement.getTagName().equals("process")) ||
(processElement.getTagName().equals("experiment"))) {
NodeList children = processElement.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node childNode = children.item(i);
if (childNode instanceof Element) {
Element childElement = (Element)childNode;
if (childElement.getTagName().equals("operator")) {
rootOperatorElement = childElement;
break;
}
}
}
if (rootOperatorElement == null) {
throw new XMLException("The <process> tag must contain exactly one inner operator of type 'Process'!");
}
String version = processElement.getAttribute("version");
if (version != null) {
LogService.getGlobal().log("Reading process definition (version: " + version + ")");
}
} else if (processElement.getTagName().equals("operator")) {
rootOperatorElement = processElement;
} else {
throw new XMLException("Outermost tag of a process definition must be either <process> or <operator>!");
}
Operator root = Operator.createFromXML(rootOperatorElement);
if (!(root instanceof ProcessRootOperator))
throw new XMLException("Outermost operator must be of type 'Process'!");
rootOperator = (ProcessRootOperator) root;
setRootOperator(rootOperator);
nameMapBackup = operatorNameMap;
} catch (javax.xml.parsers.ParserConfigurationException e) {
throw new XMLException(e.toString(), e);
} catch (SAXException e) {
throw new XMLException("Cannot parse document: " + e, e);
} finally {
operatorNameMap = nameMapBackup; // if everything went fine -->
// map = new map, if not -->
// map = old map (backup)
}
}
/** Returns a "name (i)" if name is already in use. This new name should then
* be used as operator name. */
public String registerName(String name, Operator operator) {
if (operatorNameMap.get(name) != null) {
String baseName = name;
int index = baseName.indexOf(" (");
if (index >= 0) {
baseName = baseName.substring(0, index);
}
int i = 2;
while (operatorNameMap.get(baseName + " (" + i + ")") != null) {
i++;
}
String newName = baseName + " (" + i + ")";
operatorNameMap.put(newName, operator);
return newName;
} else {
operatorNameMap.put(name, operator);
return name;
}
}
/** This method is used for unregistering a name from the operator name map. */
public void unregisterName(String name) {
operatorNameMap.remove(name);
}
public String toString() {
if (rootOperator == null)
return "empty process";
else
return "Process:" + Tools.getLineSeparator() + rootOperator.getXML("");
}
}