/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio.model;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Annotations;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.io.AbstractDataReader.AttributeColumn;
import com.rapidminer.operator.io.AbstractExampleSource;
import com.rapidminer.operator.nio.file.FileInputPortHandler;
import com.rapidminer.operator.nio.file.FileObject;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.Port;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.operator.preprocessing.filter.AbstractDateDataProcessing;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDateFormat;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.ParameterTypeTupel;
import com.rapidminer.parameter.PortProvider;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.parameter.internal.DataManagementParameterHelper;
/**
* This class uses DataResultSets to load data from file and then delivers the data as an example
* set.
*
* @author Sebastian Land
*/
public abstract class AbstractDataResultSetReader extends AbstractExampleSource {
/** Pseudo-annotation to be used for attribute names. */
public static final String ANNOTATION_NAME = "Name";
/**
* This parameter holds the hole information about the attribute columns. I.e. which attributes
* are defined, the names, what value type they have, whether the att. is selected,
*/
public static final String PARAMETER_META_DATA = "data_set_meta_data_information";
/**
* Parameters being part of the list for PARAMETER_META_DATA
*/
public static final String PARAMETER_COLUMN_INDEX = "column_index";
public static final String PARAMETER_COLUMN_META_DATA = "attribute_meta_data_information";
public static final String PARAMETER_COLUMN_NAME = "attribute name";
public static final String PARAMETER_COLUMN_SELECTED = "column_selected";
public static final String PARAMETER_COLUMN_VALUE_TYPE = "attribute_value_type";
public static final String PARAMETER_COLUMN_ROLE = "attribute_role";
public static final String PARAMETER_DATE_FORMAT = "date_format";
public static final String PARAMETER_TIME_ZONE = "time_zone";
public static final String PARAMETER_LOCALE = "locale";
/**
* The parameter name for "Determines, how the data is represented internally."
*/
public static final String PARAMETER_DATAMANAGEMENT = "datamanagement";
public static final String PARAMETER_FIRST_ROW_AS_NAMES = "first_row_as_names";
public static final String PARAMETER_ANNOTATIONS = "annotations";
public static final String PARAMETER_ERROR_TOLERANT = "read_not_matching_values_as_missings";
private InputPort fileInputPort = getInputPorts().createPort("file");
private FileInputPortHandler filePortHandler = new FileInputPortHandler(this, fileInputPort, this.getFileParameterName());
public AbstractDataResultSetReader(OperatorDescription description) {
super(description);
fileInputPort.addPrecondition(new SimplePrecondition(fileInputPort, new MetaData(FileObject.class)) {
@Override
protected boolean isMandatory() {
return false;
}
});
}
public InputPort getFileInputPort() {
return fileInputPort;
}
@Override
public ExampleSet createExampleSet() throws OperatorException {
// loading data result set
final ExampleSet exampleSet;
try (DataResultSetFactory dataResultSetFactory = getDataResultSetFactory();
DataResultSet dataResultSet = dataResultSetFactory.makeDataResultSet(this)) {
exampleSet = transformDataResultSet(dataResultSet);
}
if (fileInputPort.isConnected()) {
IOObject fileObject = fileInputPort.getDataOrNull(IOObject.class);
if (fileObject != null) {
String sourceAnnotation = fileObject.getAnnotations().getAnnotation(Annotations.KEY_SOURCE);
if (sourceAnnotation != null) {
exampleSet.getAnnotations().setAnnotation(Annotations.KEY_SOURCE, sourceAnnotation);
}
}
}
return exampleSet;
}
/**
*
* Transforms the provided {@link DataResultSet} into an example set.
*
* @param dataResultSet
* the data result set to transform into an example set
* @return the generated example set
* @throws OperatorException
* in case something goes wrong
*/
protected ExampleSet transformDataResultSet(DataResultSet dataResultSet) throws OperatorException {
// loading configuration
DataResultSetTranslationConfiguration configuration = new DataResultSetTranslationConfiguration(this);
final boolean configComplete = !configuration.isComplete();
if (configComplete) {
configuration.reconfigure(dataResultSet);
}
// now use translator to read, translate and return example set
DataResultSetTranslator translator = new DataResultSetTranslator(this);
NumberFormat numberFormat = getNumberFormat();
if (numberFormat != null) {
configuration.setNumberFormat(numberFormat);
}
if (configComplete) {
translator.guessValueTypes(configuration, dataResultSet, null);
}
return translator.read(dataResultSet, configuration, false, null);
}
@Override
public MetaData getGeneratedMetaData() throws OperatorException {
try (DataResultSetFactory dataResultSetFactory = getDataResultSetFactory()) {
ExampleSetMetaData result = dataResultSetFactory.makeMetaData();
DataResultSetTranslationConfiguration configuration = new DataResultSetTranslationConfiguration(this);
configuration.addColumnMetaData(result);
return result;
}
}
/**
* Must be implemented by subclasses to return the DataResultSet.
*/
protected abstract DataResultSetFactory getDataResultSetFactory() throws OperatorException;
/**
* Returns the configured number format or null if a default number format should be used.
*/
protected abstract NumberFormat getNumberFormat() throws OperatorException;
/**
* This method might be overwritten by subclasses to avoid that the first row might be
* misinterpreted as attribute names.
*/
protected boolean isSupportingFirstRowAsNames() {
return true;
}
/**
* Returns either the selected file referenced by the value of the parameter with the name
* {@link #getFileParameterName()} or the file delivered at {@link #fileInputPort}. Which of
* these options is chosen is determined by the parameter {@link #PARAMETER_DESTINATION_TYPE}.
* */
public File getSelectedFile() throws OperatorException {
return filePortHandler.getSelectedFile();
}
/**
* Same as {@link #getSelectedFile()}, but opens the stream.
* */
public InputStream openSelectedFile() throws OperatorException, IOException {
return filePortHandler.openSelectedFile();
}
/**
* Same as {@link #getSelectedFile()}, but returns true if file is specified (in the respective
* way).
* */
public boolean isFileSpecified() {
return filePortHandler.isFileSpecified();
}
/**
* Returns the name of the {@link ParameterTypeFile} to be added through which the user can
* specify the file name.
*/
protected abstract String getFileParameterName();
/** Returns the allowed file extension. */
protected abstract String getFileExtension();
/** Returns the allowed file extensions. */
protected String[] getFileExtensions() {
return new String[] { getFileExtension() };
}
/**
* Creates (but does not add) the file parameter named by {@link #getFileParameterName()} that
* depends on whether or not {@link #fileInputPort} is connected.
*/
protected ParameterType makeFileParameterType() {
return FileInputPortHandler.makeFileParameterType(this, getFileParameterName(),
"Name of the file to read the data from.", new PortProvider() {
@Override
public Port getPort() {
return fileInputPort;
}
}, true, getFileExtensions());
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = new LinkedList<ParameterType>();
if (isSupportingFirstRowAsNames()) {
types.add(new ParameterTypeBoolean(
PARAMETER_FIRST_ROW_AS_NAMES,
"Indicates if the first row should be used for the attribute names. If activated no annotations can be used.",
true, false));
}
List<String> annotations = new LinkedList<String>();
annotations.add(ANNOTATION_NAME);
annotations.addAll(Arrays.asList(Annotations.ALL_KEYS_ATTRIBUTE));
ParameterType type = new ParameterTypeList(PARAMETER_ANNOTATIONS, "Maps row numbers to annotation names.", //
new ParameterTypeInt("row_number", "Row number which contains an annotation", 0, Integer.MAX_VALUE), //
new ParameterTypeCategory("annotation", "Name of the annotation to assign this row.",
annotations.toArray(new String[annotations.size()]), 0), true);
if (isSupportingFirstRowAsNames()) {
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_FIRST_ROW_AS_NAMES, false, false));
}
types.add(type);
type = new ParameterTypeDateFormat(PARAMETER_DATE_FORMAT,
"The parse format of the date values, for example \"yyyy/MM/dd\".", false);
type.setExpert(false);
types.add(type);
type = new ParameterTypeCategory(PARAMETER_TIME_ZONE,
"The time zone used for the date objects if not specified in the date string itself.",
Tools.getAllTimeZones(), Tools.getPreferredTimeZoneIndex());
types.add(type);
type = new ParameterTypeCategory(PARAMETER_LOCALE,
"The used locale for date texts, for example \"Wed\" (English) in contrast to \"Mi\" (German).",
AbstractDateDataProcessing.availableLocaleNames, AbstractDateDataProcessing.defaultLocale);
types.add(type);
types.addAll(super.getParameterTypes());
type = new ParameterTypeList(PARAMETER_META_DATA, "The meta data information", //
new ParameterTypeInt(PARAMETER_COLUMN_INDEX, "The column index", 0, Integer.MAX_VALUE), //
new ParameterTypeTupel(PARAMETER_COLUMN_META_DATA, "The meta data definition of one column", //
new ParameterTypeString(PARAMETER_COLUMN_NAME, "Describes the attributes name."), //
new ParameterTypeBoolean(PARAMETER_COLUMN_SELECTED, "Indicates if a column is selected", true), //
new ParameterTypeCategory(PARAMETER_COLUMN_VALUE_TYPE, "Indicates the value type of an attribute",
Ontology.VALUE_TYPE_NAMES, Ontology.NOMINAL), //
new ParameterTypeStringCategory(PARAMETER_COLUMN_ROLE, "Indicates the role of an attribute",
Attributes.KNOWN_ATTRIBUTE_TYPES, AttributeColumn.REGULAR)),
true);
types.add(type);
types.add(new ParameterTypeBoolean(PARAMETER_ERROR_TOLERANT,
"Values which does not match to the specified value typed are considered as missings.", true, true));
DataManagementParameterHelper.addParameterTypes(types, this);
return types;
}
@Override
public OperatorVersion[] getIncompatibleVersionChanges() {
OperatorVersion[] changes = super.getIncompatibleVersionChanges();
changes = Arrays.copyOf(changes, changes.length + 1);
changes[changes.length - 1] = DataResultSetTranslator.VERSION_6_0_3;
return changes;
}
}