/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.gui.new_plotter.utility;
import com.rapidminer.Process;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessRootOperator;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.tools.I18N;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.XMLException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import java.util.List;
import java.util.logging.Level;
import java.util.regex.Pattern;
/**
* This is a utility class which can transform {@link ExampleSet}s for various needs.
*
* @author Marco Boeck
*
*/
public class DataTransformation {
private static final String TO_REPLACE_WITH_DEPIVOT_ATTRIBUTE_LIST = "TO_REPLACE_WITH_DEPIVOT_ATTRIBUTE_LIST";
private static final String TO_REPLACE_WITH_NOM_TO_NUM_ATTRIBUTE_LIST = "TO_REPLACE_WITH_NOM_TO_NUM_ATTRIBUTE_LIST";
private static final String INVERT_NOM_TO_NUM_SELECTION = "INVERT_NOM_TO_NUM_SELECTION";
/**
* Creates a de-pivotized meta data {@link ExampleSet} from a given {@link ExampleSet}. This set
* de-pivots the given numerical attributes.
*
* @param exampleSet
* the original {@link ExampleSet}
* @param listOfNumericalAttributes
* list with the names of the numerical attributes to de-pivot
* @param selectedNomToNumericAttributesList
* a list of nominal attributes that should be converted to be numerical afterwards
* @return the meta data {@link ExampleSet} or {@code null} if there was an error/empty
* attribute list
* @throws IOException
* thrown if the transformation process cannot be read
*/
public static ExampleSet createDePivotizedExampleSet(ExampleSet exampleSet, List<String> listOfNumericalAttributes) {
return createDePivotizedExampleSet(exampleSet, listOfNumericalAttributes, null);
}
/**
* Creates a de-pivotized meta data {@link ExampleSet} from a given {@link ExampleSet}. This set
* de-pivots the given numerical attributes.
*
* @param exampleSet
* the original {@link ExampleSet}
* @param listOfNumericalAttributes
* list with the names of the numerical attributes to de-pivot
* @param selectedNomToNumericAttributesList
* a list of nominal attributes that should be transformed to numerical attributes
* before depivotization
* @return the meta data {@link ExampleSet} or {@code null} if there was an error/empty
* attribute list
* @throws IOException
* thrown if the transformation process cannot be read
*/
public static ExampleSet createDePivotizedExampleSet(ExampleSet exampleSet, List<String> listOfNumericalAttributes,
Collection<String> selectedNomToNumericAttributesList) {
if (exampleSet == null) {
throw new IllegalArgumentException("exampleSet must not be null!");
}
if (listOfNumericalAttributes == null) {
throw new IllegalArgumentException("listOfNumericalAttributes must not be null!");
}
if (listOfNumericalAttributes.size() == 0) {
return null;
}
listOfNumericalAttributes.remove("id");
try {
InputStream is = DataTransformation.class
.getResourceAsStream("/com/rapidminer/resources/processes/TransformationDepivot.rmp");
String transformProcessXML = Tools.readTextFile(is);
StringBuffer defaultValueBuffer = new StringBuffer();
// modify NominalToNumerical to change only nominal values that have been selected
if (selectedNomToNumericAttributesList != null) {
selectedNomToNumericAttributesList.remove("id");
for (String attName : selectedNomToNumericAttributesList) {
defaultValueBuffer.append(Pattern.quote(attName));
defaultValueBuffer.append("|");
}
}
String nominalToNumericalValueString;
String invertNomToNumSelection;
if (selectedNomToNumericAttributesList == null || selectedNomToNumericAttributesList.isEmpty()) {
nominalToNumericalValueString = ".*";
invertNomToNumSelection = "true";
} else {
// remove last '|' so length -1
nominalToNumericalValueString = defaultValueBuffer.substring(0, defaultValueBuffer.length() - 1);
invertNomToNumSelection = "false";
}
transformProcessXML = transformProcessXML.replace(TO_REPLACE_WITH_NOM_TO_NUM_ATTRIBUTE_LIST,
nominalToNumericalValueString);
transformProcessXML = transformProcessXML.replace(INVERT_NOM_TO_NUM_SELECTION, invertNomToNumSelection);
// modify de-pivot to only de-pivot given list of numerical attributes
for (String attName : listOfNumericalAttributes) {
defaultValueBuffer.append(Pattern.quote(attName));
defaultValueBuffer.append("|");
}
// remove last '|' so length -1
String numericalValuesString;
numericalValuesString = defaultValueBuffer.substring(0, defaultValueBuffer.length() - 1);
transformProcessXML = transformProcessXML.replace(TO_REPLACE_WITH_DEPIVOT_ATTRIBUTE_LIST, numericalValuesString);
Process transformProcess = new Process(transformProcessXML);
// disable logging messages
ParameterTypeCategory loggingParameterType = (ParameterTypeCategory) transformProcess.getOperator("Process")
.getParameterType(ProcessRootOperator.PARAMETER_LOGVERBOSITY);
loggingParameterType.setDefaultValue(loggingParameterType.getIndex("off"));
// disable ID generation if ID already exists
if (exampleSet.getAttributes().getId() != null) {
transformProcess.getOperator("Generate ID").setEnabled(false);
transformProcess.getOperator("idToNumerical").setEnabled(false);
}
IOContainer inputContainer = new IOContainer(exampleSet);
IOContainer resultContainer = transformProcess.run(inputContainer);
if (resultContainer.getElementAt(0) instanceof ExampleSet) {
return (ExampleSet) resultContainer.getElementAt(0);
} else {
throw new OperatorException("First element returned was not ExampleSet, but "
+ resultContainer.getElementAt(0).getClass());
}
} catch (XMLException e) {
// LogService.getRoot().log(Level.SEVERE,
// "Failed to create MetaInformationDePivotized transformation process!", e);
LogService
.getRoot()
.log(Level.WARNING,
I18N.getMessage(LogService.getRoot().getResourceBundle(),
"com.rapidminer.gui.new_plotter.utility.DataTransformation.creating_metainformationdepivotized_transformation_error"),
e);
} catch (IOException e) {
// LogService.getRoot().log(Level.SEVERE,
// "Failed to read MetaInformationDePivotized transformation process!", e);
LogService
.getRoot()
.log(Level.WARNING,
I18N.getMessage(LogService.getRoot().getResourceBundle(),
"com.rapidminer.gui.new_plotter.utility.DataTransformation.reading_metainformationdepivotized_transformation_error"),
e);
} catch (OperatorException e) {
// LogService.getRoot().log(Level.SEVERE,
// "Failed to execute MetaInformationDePivotized transformation process!", e);
LogService
.getRoot()
.log(Level.WARNING,
I18N.getMessage(LogService.getRoot().getResourceBundle(),
"com.rapidminer.gui.new_plotter.utility.DataTransformation.executing_metainformationdepivotized_transformation_error"),
e);
}
// we only arrive here in case of error, return null
return null;
}
}