AbstractDataReader.java example

Explorer
rapidminer-vega-master
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2011 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.io;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeTypeException;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.Annotations;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessStoppedException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDInteger;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.ParameterTypeTupel;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Observable;
import com.rapidminer.tools.Observer;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.ProgressListener;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.math.container.Range;

/**
 * Abstract super class of all example sources reading from files.
 * 
 * @author Tobias Malbrecht
 * @author Sebastian Loh (29.04.2010)
 */
public abstract class AbstractDataReader extends AbstractExampleSource {

    public static final int PREVIEW_LINES = 300;

    /**
     * DO NOT SET THIS PARAMETER DIRECTLY. USE THE {@link AbstractDataReader#setErrorTolerant(boolean)} in order to
     * cache the value.
     * 
     * Indicates whether the reader tolerates values, which do not match a attributes value type.
     * <p>
     * For example if the value type is NUMERICAL and the reader reads a string. If
     * {@link AbstractDataReader#PARAMETER_ERROR_TOLERANT} is <code>true</code>, the reader writes a missing value, if
     * it is <code>false</code> the reader throws an exception. The reader replaces also a binomial value type by
     * nominal the attributes domain has more then two values and this parameter is checked.
     */
    public static final String PARAMETER_ERROR_TOLERANT = "read_not_matching_values_as_missings";

    /**
     * Hidden parameter in order to remember whether attributes names, which are defined by the user are used or not.
     * This parameter is set <code>true</code> when the user uses the import wizards in order to configure the reader.
     */
    private static final String PARAMETER_ATTRIBUTE_NAMES_DEFINED_BY_USER = "attribute_names_already_defined";

    /**
     * This parameter holds the hole information about the attribute columns. I.e. which attributes are defined, the
     * names, what value type they have, whether the att. is selected,
     */
    private static final String PARAMETER_META_DATA = "data_set_meta_data_information";

    /**
     * hidden parameter which is used to construct the {@link AbstractDataReader#PARAMETER_META_DATA}
     */
    public static final String PARAMETER_COLUMN_INDEX = "column_index";

    /**
     * hidden parameter which is used to construct the {@link AbstractDataReader#PARAMETER_META_DATA}
     */
    public static final String PARAMETER_COLUMN_META_DATA = "attribute_meta_data_information";

    /**
     * hidden parameter which is used to construct the {@link AbstractDataReader#PARAMETER_META_DATA}
     */
    public static final String PARAMETER_COLUMN_NAME = "attribute name";

    /**
     * hidden parameter which is used to construct the {@link AbstractDataReader#PARAMETER_META_DATA}
     */
    public static final String PARAMETER_COLUMN_SELECTED = "column_selected";

    /**
     * hidden parameter which is used to construct the {@link AbstractDataReader#PARAMETER_META_DATA}
     */
    public static final String PARAMETER_COLUM_VALUE_TYPE = "attribute_value_type";

    /**
     * hidden parameter which is used to construct the {@link AbstractDataReader#PARAMETER_META_DATA}
     */
    public static final String PARAMETER_COLUM_ROLE = "attribute_role";

    public static final ArrayList<String> ROLE_NAMES = new ArrayList<String>();
    {
        ROLE_NAMES.clear();
        for (int i = 0; i < Attributes.KNOWN_ATTRIBUTE_TYPES.length; i++) {
            if (Attributes.KNOWN_ATTRIBUTE_TYPES[i].equals("attribute")) {
                ROLE_NAMES.add(AttributeColumn.REGULAR);
            } else {
                ROLE_NAMES.add(Attributes.KNOWN_ATTRIBUTE_TYPES[i]);
            }
        }
    }

    /**
     * a list of errors which might occurred during the importing prozess.
     */
    private List<OperatorException> importErrors = new LinkedList<OperatorException>();

    protected abstract DataSet getDataSet() throws OperatorException, IOException;

    /**
     * the row count is the number of row/lines which are read during the guessing process. It is only used for the
     * operator's MetaData prediction.
     * 
     * @see AbstractDataReader#guessValueTypes()
     */
    private int rowCountFromGuessing = 0;

    /**
     * Indicated whether the operator MetaData is only guessed ( <code>metaDataFixed == false</code>) or somebody called
     * {@link AbstractDataReader#fixMetaDataDefinition()}.
     * 
     */
    private boolean metaDataFixed = false;

    /**
     * Indicates whether the ValueTypes were already guessed once.
     */
    private boolean guessedValueTypes = false;

    private boolean detectErrorsInPreview = false;

    /**
     * cached flag in order to avoid reading the parameter every single row
     */
    boolean isErrorTollerantCache = true;

    /**
     * Flag which interrupts the reading prcocess if it is set <code>true</code> . @see
     * {@link AbstractDataReader#stopReading()}
     */
    private boolean stopReading = false;

    /**
     * Data structure to manage the background highlighting for cells, which can not be parsed as the specified value
     * type.
     * 
     * Maps the column to a set of row which in which the parsing failed.
     * 
     * @see AbstractDataReader#hasParseError(int, int)
     * @see AbstractDataReader#hasParseErrorInColumn(int)
     */
    TreeMap<Integer, TreeSet<Integer>> errorCells = new TreeMap<Integer, TreeSet<Integer>>();

    /**
     * The columns of the created {@link ExampleSet}.
     * 
     * @see AbstractDataReader#createExampleSet()
     * @see AbstractDataReader#guessValueTypes()
     */
    private List<AttributeColumn> attributeColumns = new ArrayList<AttributeColumn>();

    public void clearAllReaderSettings() {
        clearReaderSettings();
        deleteAttributeMetaDataParamters();
    }

    public void clearReaderSettings() {
        stopReading();
        attributeColumns.clear();
        importErrors.clear();
    }

    public void deleteAttributeMetaDataParamters() {
        setParameter(PARAMETER_META_DATA, null);
        setAttributeNamesDefinedByUser(false);
    }

    // public void clearOperatorMetaData() {
    // // just meta data information for the gui
    // metaDataFixed = false;
    // guessedValueTypes = false;
    // rowCountFromGuessing = 0;
    // }

    public AbstractDataReader(OperatorDescription description) {
        super(description);
    }

    public boolean attributeNamesDefinedByUser() {
        return getParameterAsBoolean(PARAMETER_ATTRIBUTE_NAMES_DEFINED_BY_USER);
    }

    public void setAttributeNamesDefinedByUser(boolean flag) {
        setParameter(PARAMETER_ATTRIBUTE_NAMES_DEFINED_BY_USER, Boolean.toString(flag));
    }

    /**
     * Returns all <b>activated</b> attribute columns.
     * 
     * @return
     */
    public List<AttributeColumn> getActiveAttributeColumns() {
        List<AttributeColumn> list = new LinkedList<AttributeColumn>();
        for (AttributeColumn column : attributeColumns) {
            if (column.isActivated()) {
                list.add(column);
            }
        }
        return list;
    }

    /**
     * Returns all attribute columns, despite they are activated or not.
     * 
     * @return
     */
    public List<AttributeColumn> getAllAttributeColumns() {
        // List<AttributeColumn> list = new LinkedList<AttributeColumn>();
        // list.addAll(attributeColumns);
        // return list;
        return Collections.unmodifiableList(attributeColumns);
    }

    /**
     * Returns the attribute column with the given index if it exists (it does not matter if the column is activated or
     * not). Else a {@link IllegalArgumentException} is thrown
     * 
     * @param index
     *            the index of the requested column.
     * @return
     */
    public AttributeColumn getAttributeColumn(int index) throws IllegalArgumentException {
        if (index < attributeColumns.size()) {
            return attributeColumns.get(index);
        }
        throw new IllegalArgumentException("The attribute column with index " + index + " does not exists.");
    }

    /**
     * Returns the index of the given {@link AttributeColumn} (it does not matter if it is activated or not). If the
     * attribute column does not exist, -1 is returned.
     * 
     * @param column
     * @return the index of the attribute column, -1 else.
     */
    public int getIndexOfAttributeColumn(AttributeColumn column) {
        return attributeColumns.indexOf(column);
    }

    /**
     * Returns the index of the given <b>activated</b> {@link AttributeColumn}. Returns -1 if the column is not
     * activated or does not exist.
     * 
     * @param column
     * @return
     */
    public int getIndexOfActiveAttributeColumn(AttributeColumn column) {
        return getActiveAttributeColumns().indexOf(column);
    }

    public void addAttributeColumn() {
        String name = getNewGenericColumnName(attributeColumns.size());
        attributeColumns.add(new AttributeColumn(name));
    }

    public void addAttributeColumn(String attributeName) {
        attributeColumns.add(new AttributeColumn(attributeName));
    }

    /**
     * Returns <code>true</code> when somebody called {@link AbstractDataReader#fixMetaDataDefinition()}. Otherwise the
     * operator MetaData is only guessed (<code>metaDataFixed == false</code>) or
     * 
     * @return
     */
    public boolean isMetaDatafixed() {
        return metaDataFixed;
    }

    /**
     * Method to declare the operators MetaData as final.
     */
    public void fixMetaDataDefinition() {
        metaDataFixed = true;
    }

    public void writeMetaDataInParameter() {
        deleteAttributeMetaDataParamters();
        setAttributeNamesDefinedByUser(true);
        for (AttributeColumn col : getAllAttributeColumns()) {
            col.setMetaParameter();
        }
    }

    public void loadMetaDataFromParameters() {
        List<AttributeColumn> oldColumns = attributeColumns;
        attributeColumns.clear();
        try {
            List<String[]> metaData = getParameterList(PARAMETER_META_DATA);
            // first create as many attribute columns as parameters in the list
            // exists
            for (int i = 0; i < metaData.size(); i++) {
                this.addAttributeColumn();
            }
            Iterator<AttributeColumn> it = oldColumns.iterator();
            // then let them load their properties from the meta data
            for (AttributeColumn column : getAllAttributeColumns()) {
                column.loadMetaParameter();
                // restore annotations
                Annotations ann = it.next().getAnnotations();
                for (String key : ann.getKeys()) {
                    column.getAnnotations().setAnnotation(key, ann.get(key));
                }
            }
        } catch (UndefinedParameterError e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    /**
     * Returns the number of all columns, regardless a column is activated or not.
     * 
     * @return
     */
    public int getColumnCount() {
        return attributeColumns.size();
    }

    /**
     * Returns the number of row which are read during the value type guessing.
     * 
     * @see AbstractDataReader#guessValueTypes()
     * 
     * @return
     */
    private int getRowCountFromGuessing() {
        return rowCountFromGuessing;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.rapidminer.operator.io.AbstractExampleSource#getGeneratedMetaData()
     */
    @Override
    public ExampleSetMetaData getGeneratedMetaData() {
        if (attributeNamesDefinedByUser()) {
            loadMetaDataFromParameters();
            guessedValueTypes = true;
        }

        if (!guessedValueTypes) {
            return new ExampleSetMetaData();
        }

        ExampleSetMetaData metaData = new ExampleSetMetaData();

        for (AttributeColumn column : getActiveAttributeColumns()) {

            AttributeMetaData amd = new AttributeMetaData(column.getName(), column.getValueType());
            amd.setAnnotations(column.getAnnotations());

            String role = column.getRole();
            if (role.equals(AttributeColumn.REGULAR)) {
                role = null;
            }
            amd.setRole(role);

            MDInteger missings = new MDInteger(column.numberOfMissings);
            SetRelation relation = SetRelation.EQUAL;

            if (!isMetaDatafixed()) {
                relation = SetRelation.SUPERSET;
                missings.increaseByUnknownAmount();
            }
            if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(column.getValueType(), Ontology.NUMERICAL) || Ontology.ATTRIBUTE_VALUE_TYPE.isA(column.getValueType(), Ontology.DATE_TIME)) {
                amd.setValueRange(new Range(column.maxValue, column.maxValue), relation);
            } else {
                amd.setValueSet(column.valueSet, relation);
            }
            amd.setNumberOfMissingValues(missings);
            metaData.addAttribute(amd);
        }
        metaData.setNumberOfExamples(new MDInteger(getRowCountFromGuessing()));
        if (!isMetaDatafixed()) {
            metaData.getNumberOfExamples().increaseByUnknownAmount();
            metaData.attributesAreSuperset();
        }
        return metaData;
    }

    private double[] generateRow(DataSet set, List<Attribute> activeAttributes, int rowNumber) throws OperatorException {

        List<AttributeColumn> allAttributeColumns = getAllAttributeColumns();
        if (allAttributeColumns.size() > set.getNumberOfColumnsInCurrentRow()) {
            UnexpectedRowLenghtException e = new TooShortRowLengthException(rowNumber, set.getNumberOfColumnsInCurrentRow(), allAttributeColumns.size());
            if (isErrorTolerant()) {
                this.logReadingError(e);
            } else {
                throw e;
            }
        }

        if (allAttributeColumns.size() < set.getNumberOfColumnsInCurrentRow()) {
            UnexpectedRowLenghtException e = new TooLongRowLengthException(rowNumber, set.getNumberOfColumnsInCurrentRow(), allAttributeColumns.size());
            if (isErrorTolerant()) {
                // adjust columns in case a longer row is read
                if (this instanceof CSVDataReader) {
                    adjustAttributeColumnsNumbers(set.getNumberOfColumnsInCurrentRow());
                    for (AttributeColumn col : getActiveAttributeColumns()) {
                        if (!activeAttributes.contains(col.getAttribute())) {
                            activeAttributes.add(col.getAttribute());
                        }
                    }
                }
                this.logReadingError(e);
            } else {
                throw e;
            }
        }
        double[] values = new double[activeAttributes.size()];

        for (int i = 0; i < values.length; i++) {
            values[i] = Double.NaN;
        }

        int activeAttributeIndex = 0;
        for (int columnIndex = 0; columnIndex < set.getNumberOfColumnsInCurrentRow(); columnIndex++) {
            AttributeColumn column = allAttributeColumns.get(columnIndex);
            // skip deactivated columns
            if (!column.isActivated()) {
                continue;
            }

            assert columnIndex != -1;

            try {
                // do Ontology.ATTRIBUTE_VALUE_TYPE.isA(..) comparisons after
                // the
                // explicit check on the value type due to performance reasons.
                if (set.isMissing(columnIndex)) {
                    values[activeAttributeIndex] = Double.NaN;
                } else if (column.getValueType() == Ontology.DATE || column.getValueType() == Ontology.TIME || column.getValueType() == Ontology.DATE_TIME || Ontology.ATTRIBUTE_VALUE_TYPE.isA(column.getValueType(), Ontology.DATE_TIME)) {
                    Date dateValue = set.getDate(columnIndex);
                    if (dateValue == null) {
                        throw new UnexpectedValueTypeException(Ontology.DATE_TIME, rowNumber, columnIndex, set.getString(columnIndex));
                    }
                    values[activeAttributeIndex] = dateValue.getTime();
                } else if (column.getValueType() == Ontology.INTEGER || column.getValueType() == Ontology.REAL || column.getValueType() == Ontology.NUMERICAL || Ontology.ATTRIBUTE_VALUE_TYPE.isA(column.getValueType(), Ontology.NUMERICAL)) {
                    Number numberValue = set.getNumber(columnIndex);
                    if (numberValue == null) {
                        throw new UnexpectedValueTypeException(Ontology.NUMERICAL, rowNumber, columnIndex, set.getString(columnIndex));
                    }
                    values[activeAttributeIndex] = numberValue.doubleValue();
                } else if (column.getValueType() == Ontology.BINOMINAL || column.getValueType() == Ontology.NOMINAL || Ontology.ATTRIBUTE_VALUE_TYPE.isA(column.getValueType(), Ontology.NOMINAL)) {
                    try {
                        values[activeAttributeIndex] = activeAttributes.get(activeAttributeIndex).getMapping().mapString(set.getString(columnIndex));
                    } catch (AttributeTypeException e) {
                        if (isErrorTolerant()) {
                            column.setValueType(Ontology.NOMINAL);
                            Attribute att = column.createAttribute();
                            for (String value : activeAttributes.get(columnIndex).getMapping().getValues()) {
                                att.getMapping().mapString(value);
                            }
                            values[activeAttributeIndex] = att.getMapping().mapString(set.getString(columnIndex));
                            activeAttributes.set(columnIndex, att);
                        } else {
                            throw new AttributeTypeException("Attribute: " + column.getName() + ", Index: " + columnIndex + ", Row: " + rowNumber + "\n\n" + e.getMessage());
                        }
                    }
                } else {
                    throw new OperatorException("The value type of the attribute " + column.getName() + " is unknown.");
                }

            } catch (UnexpectedValueTypeException e) {
                if (isErrorTolerant()) {
                    values[activeAttributeIndex] = Double.NaN;
                    this.logReadingError(e);
                } else {
                    throw e;
                }
            }
            activeAttributeIndex++;
        }
        return values;
    }

    /**
     * This method adjusts the number of columns to the given number.
     */
    private void adjustAttributeColumnsNumbers(int newNumberOfColumns) {
        // too short
        if (getAllAttributeColumns().size() < newNumberOfColumns) {
            int actualNumberOfAttributes = getAllAttributeColumns().size();
            int numberOfNewColumns = newNumberOfColumns - actualNumberOfAttributes;
            String[] genericNames = new String[numberOfNewColumns];
            for (int i = 0; i < numberOfNewColumns; i++)
                genericNames[i] = getNewGenericColumnName(actualNumberOfAttributes + i);
            for (String name : genericNames)
                attributeColumns.add(new AttributeColumn(name));

        }
        // too long
        if (getAllAttributeColumns().size() > newNumberOfColumns) {
            List<AttributeColumn> list = new ArrayList<AttributeColumn>();
            for (int i = 0; i < newNumberOfColumns; i++) {
                list.add(getAttributeColumn(i));
            }
            attributeColumns = list;
        }
    }

    /**
     * Sets the name of each attribute to the given name.
     * 
     * @param newColumnNames
     */
    protected void setAttributeNames(String[] newColumnNames) {

        adjustAttributeColumnsNumbers(newColumnNames.length);
        assert attributeColumns.size() == newColumnNames.length;

        if (attributeNamesDefinedByUser()) {
            // assume attributes names were set already by the user
            return;
        }
        List<AttributeColumn> allAttributeColumns = getAllAttributeColumns();
        String[] oldColumnNames = new String[allAttributeColumns.size()];
        int i = 0;
        for (AttributeColumn column : allAttributeColumns) {
            oldColumnNames[i] = column.getName();
            i++;
        }

        newColumnNames = getGenericColumnNames(newColumnNames, oldColumnNames);
        i = 0;
        for (AttributeColumn column : allAttributeColumns) {
            column.setName(newColumnNames[i]);
            i++;
        }
    }

    /**
     * Resets the column names to a generic column name given by the method
     * {@link AbstractDataReader#getNewGenericColumnName(int)}.
     */
    protected void resetColumnNames() {
        int i = 0;
        for (AttributeColumn column : getAllAttributeColumns()) {
            column.setName(getNewGenericColumnName(i));
            i++;
        }
    }

    /**
     * 
     */
    public void stopReading() {
        stopReading = true;

    }

    protected void setAnnotations(Annotations[] annotations) {
        assert getAllAttributeColumns().size() == annotations.length;
        int i = 0;
        for (AttributeColumn column : getAllAttributeColumns()) {
            column.getAnnotations().clear();
            column.getAnnotations().putAll(annotations[i]);
            i++;
        }
    }

    protected void setValueTypes(List<Integer> valueTypesList) throws OperatorException {
        if (getAllAttributeColumns().size() != valueTypesList.size()) {
            throw new OperatorException("Internal error: The number of valueTypes does not match with the number of attributes.");
        } else {
            Iterator<Integer> it = valueTypesList.iterator();
            for (AttributeColumn column : getAllAttributeColumns()) {
                column.setValueType(it.next());
            }
        }
    }

    /**
     * @param e
     */
    private void logReadingError(OperatorException e) {
        importErrors.add(e);
    }

    public List<OperatorException> getImportErrors() {
        return importErrors;
    }

    public List<Object[]> getShortPreviewAsList(ProgressListener progress, boolean trimAttributeColumns) throws OperatorException {
        return getPreviewAsList(progress, false, trimAttributeColumns, PREVIEW_LINES);
    }

    public List<Object[]> getPreviewAsList(ProgressListener progress, boolean trimAttributeColumns) throws OperatorException {
        if (detectErrorsInPreview) {
            return getPreviewAsList(progress, true, trimAttributeColumns, -1);
        } else {
            return getShortPreviewAsList(progress, trimAttributeColumns);
        }
    }

    public List<Object[]> getErrorPreviewAsList(ProgressListener progress) throws OperatorException {
        List<Object[]> preview = getPreviewAsList(progress, true, false, -1);
        List<Object[]> errorPreview = new LinkedList<Object[]>();

        Iterator<Object[]> it = preview.iterator();
        int rowNum = 0;
        while (it.hasNext()) {
            Object[] row = it.next();
            if (hasParseErrorInRow(rowNum)) {
                errorPreview.add(row);
            }
            rowNum++;
        }
        return errorPreview;
    }

    /**
     * 
     * @see AbstractDataReader#PREVIEW_LINES
     * 
     * @return
     * @throws OperatorException
     */
    public List<Object[]> getPreviewAsList(ProgressListener progress, boolean enableErrorDetection, boolean trimAttributeColumns, int numberOfLinesRead) throws OperatorException {
        stopReading = false;
        int limit = numberOfLinesRead;
        if (progress != null) {
            progress.setTotal(rowCountFromGuessing);
        }
        int hundredPercent = rowCountFromGuessing / 100;
        hundredPercent = hundredPercent == 0 ? 10 : hundredPercent;
        rowCountFromGuessing = 0;

        if (numberOfLinesRead < 0) {
            limit = rowCountFromGuessing + 1;
        }

        errorCells.clear();
        // clear value sets
        for (AttributeColumn column : getAllAttributeColumns()) {
            column.valueSet.clear();
        }

        DataSet set = null;
        try {
            set = getDataSet();
        } catch (IOException e) {
            throw new UserError(this, e, 403, e.getMessage());
        }

        List<Object[]> preview = new LinkedList<Object[]>();
        // counting starts at one because the user sees it.
        int currentRow = 1;

        UnexpectedRowLenghtException rowLenghtWarning = null;
        while (!stopReading && set.next() && rowCountFromGuessing < limit) {

            rowLenghtWarning = null;

            if (progress != null && currentRow % hundredPercent == 0) {
                progress.setCompleted(currentRow);
            }
            int actualColumnCount = set.getNumberOfColumnsInCurrentRow();

            if (getAllAttributeColumns().size() < actualColumnCount || getAllAttributeColumns().size() > actualColumnCount) {
                // report too short/long line errors only after the first line
                if (rowCountFromGuessing > 0) {
                    foundParseError(-1, rowCountFromGuessing);
                    if (getAllAttributeColumns().size() > actualColumnCount) {
                        rowLenghtWarning = new TooShortRowLengthException(currentRow, actualColumnCount, getAllAttributeColumns().size());
                    }
                    if (getAllAttributeColumns().size() < actualColumnCount) {
                        rowLenghtWarning = new TooLongRowLengthException(currentRow, actualColumnCount, getAllAttributeColumns().size());
                        // only extend number of attribute column, do not shrink
                        // them.
                        adjustAttributeColumnsNumbers(actualColumnCount);
                    }
                } else {
                    if (trimAttributeColumns) {
                        adjustAttributeColumnsNumbers(actualColumnCount);
                    }
                }

            }

            Object[] values = new Object[actualColumnCount + 1];

            // first column of the preview contains the line/row number
            values[0] = currentRow;
            currentRow++;

            // walk through the columns
            for (int i = 0; i < actualColumnCount; i++) {
                AttributeColumn column = getAttributeColumn(i);

                if (set.isMissing(i)) {
                    values[i + 1] = ""; //
                    continue;
                } else {
                    if (enableErrorDetection) {
                        if (column.getValueType() == Ontology.DATE || column.getValueType() == Ontology.TIME || column.getValueType() == Ontology.DATE_TIME) {
                            values[i + 1] = set.getDate(i);
                        } else if (column.getValueType() == Ontology.INTEGER || column.getValueType() == Ontology.REAL || column.getValueType() == Ontology.NUMERICAL) {
                            values[i + 1] = set.getNumber(i);
                        } else if (column.getValueType() == Ontology.BINOMINAL) {
                            values[i + 1] = set.getString(i);
                            // look for value type error and update meta data
                            // information
                            if (values[i + 1] != null) {
                                if (column.valueSet.size() >= 2) {
                                    if (!column.valueSet.contains(values[i + 1])) {
                                        if (column.isActivated)
                                            foundParseError(i, rowCountFromGuessing);
                                    }
                                } else {
                                    column.valueSet.add((String) values[i + 1]);
                                }
                            }
                        } else if (column.getValueType() == Ontology.NOMINAL) {
                            values[i + 1] = set.getString(i);
                            // update meta data information
                            column.valueSet.add((String) values[i + 1]);
                        }

                        if (values[i + 1] == null) {
                            values[i + 1] = set.getString(i);
                            if (column.isActivated)
                                foundParseError(i, rowCountFromGuessing);
                        }
                    } else {
                        // read everything as a string
                        values[i + 1] = set.getString(i);
                    }

                }
            }
            preview.add(values);
            rowCountFromGuessing++;
            // numberOfLinesRead == -1 means read till eof.:
            if (numberOfLinesRead < 0) {
                limit = rowCountFromGuessing + 1;
            }
            // TODO integrate warning in the GUI
            if (rowLenghtWarning != null) {
                // getLogger().warning(rowLenghtWarning.getMessage());
            }
        }
        set.close();
        guessedValueTypes = true;
        return preview;
    }

    private void foundParseError(int column, int row) {
        TreeSet<Integer> treeSet = errorCells.get(column);
        if (treeSet == null) {
            treeSet = new TreeSet<Integer>();
            errorCells.put(column, new TreeSet<Integer>());
        }
        treeSet.add(row);
    }

    public boolean hasParseErrorInColumn(int column) {
        TreeSet<Integer> treeSet = errorCells.get(column);
        return (treeSet != null && !treeSet.isEmpty());
    }

    public boolean hasParseErrorInRow(int row) {
        for (int column : errorCells.keySet()) {
            if (hasParseError(column, row)) {
                return true;
            }
        }
        return false;
    }

    public boolean hasParseError(int column, int row) {
        TreeSet<Integer> treeSet = errorCells.get(column);
        if (treeSet != null) {
            return treeSet.contains(row);
        }
        return false;
    }

    public boolean isDetectErrorsInPreview() {
        return detectErrorsInPreview;
    }

    public void setDetectErrorsInPreview(boolean detectErrorsInPreview) {
        this.detectErrorsInPreview = detectErrorsInPreview;
    }

    @Override
    protected boolean isMetaDataCacheable() {
        return true;
    }

    /**
     * Returns a new column name for new column to build. Probably something like "attribute_1".
     * 
     * @param column
     * @return a unique column name
     */
    protected String getNewGenericColumnName(int column) {
        HashSet<String> usedNames = new HashSet<String>();
        for (AttributeColumn col : getAllAttributeColumns()) {
            usedNames.add(col.getName());
        }

        while (usedNames.contains("attribute_" + column)) {
            column++;
        }
        return "attribute_" + column;
    }

    /**
     * Returns a generic column name, probably something like proposedName+"_"+columnIndex.
     * 
     * @param oldColumnNames
     * 
     * @param proposedName
     *            can be null, then "attribute" the proposed name is "attribute"
     * @param columnIndex
     *            the index of the column of this attribute.
     * @return
     */
    private String[] getGenericColumnNames(String[] proposedNames, String[] oldColumnNames) {
        HashSet<String> usedNames = new HashSet<String>();
        for (AttributeColumn col : getAllAttributeColumns()) {
            usedNames.add(col.getName());
        }

        int offset = usedNames.size();
        String[] genericNames = new String[proposedNames.length];
        for (int i = 0; i < proposedNames.length; i++) {
            String proposedName = proposedNames[i];
            if (proposedName == null)
                proposedName = "attribute_" + (offset + i + 1);
            if (!proposedName.equals(oldColumnNames[i])) {
                if (usedNames.contains(proposedName))
                    proposedName = proposedName + "_" + (offset + i + 1);
                usedNames.add(proposedName);
            }
            genericNames[i] = proposedName;
        }
        return genericNames;
    }

    /**
     * Guesses the attribute value types based on the values in the first {@link AbstractDataReader#PREVIEW_LINES} rows.
     * 
     * @see {@link AbstractDataReader#PREVIEW_LINES}
     * 
     * @throws OperatorException
     */
    public void guessValueTypes(ProgressListener progress) throws OperatorException {
        stopReading = false;
        // this.clearReaderSettings();

        DataSet set = null;
        try {
            set = getDataSet();
        } catch (IOException e) {
            throw new UserError(this, e, 403, e.getMessage());
        }
        if (progress != null) {
            progress.setTotal(PREVIEW_LINES);
        }
        int tenPercent = PREVIEW_LINES / 10;
        tenPercent = tenPercent == 0 ? 10 : tenPercent;
        rowCountFromGuessing = 0;
        int linesTried = 0;

        // TODO introduce timeout instead PREVIEW_LINES
        while (!stopReading && set.next() && linesTried <= PREVIEW_LINES) {
            rowCountFromGuessing++;
            // only read every 10'th line to see more diverse cells
            if ((rowCountFromGuessing > 20) && (rowCountFromGuessing % 10 != 0)) {
                continue;
            }
            if ((progress != null) && (linesTried % tenPercent == 0)) {
                progress.setCompleted(linesTried);
            }
            linesTried++;

            int actualColumnCount = set.getNumberOfColumnsInCurrentRow();

            // add column(s) if the read row has more columns then previously
            // seen.
            if (getAllAttributeColumns().size() < actualColumnCount) {
                adjustAttributeColumnsNumbers(actualColumnCount);
            }

            Object[] values = new Object[actualColumnCount];

            // TODO remove this from the loop
            Calendar lastDateCalendar = Calendar.getInstance();
            Calendar currDateCalendar = Calendar.getInstance();

            // go through the actual row
            for (int i = 0; i < actualColumnCount; i++) {
                AttributeColumn column = getAttributeColumn(i);
                if (set.isMissing(i)) {
                    column.incNummerOfMissing();
                    values[i] = null;
                    continue;
                }
                // try numerical value type
                if (column.canParseDouble) {
                    Number number = set.getNumber(i);
                    if (number != null) {
                        if (Double.isNaN(number.doubleValue())) {
                            column.incNummerOfMissing();
                            continue;
                        }
                        // actualize min/max values
                        if (column.minValue > number.doubleValue()) {
                            column.minValue = number.doubleValue();
                        }
                        if (column.maxValue < number.doubleValue()) {
                            column.maxValue = number.doubleValue();
                        }
                        // try integer
                        if (column.canParseInteger) {
                            if (!Tools.isEqual(Math.round(number.doubleValue()), number.doubleValue())) {
                                column.canParseInteger = false;
                            }
                        }
                        // set the value
                        values[i] = number;

                        // for guessing binomial.
                        if (column.valueSet.size() <= 2) {
                            column.valueSet.add(number.toString());
                        }
                        continue;
                    } else {
                        // numerical failed
                        column.canParseDouble = false;
                        column.canParseInteger = false;
                    }
                }
                // try Date mhh? what's going on here?
                if (column.canParseDate) {
                    Date date = set.getDate(i);
                    if (date != null) {
                        // set the value
                        values[i] = date;
                        // determine whether it is date or time
                        if (column.lastDate != null) {
                            lastDateCalendar.setTime(column.lastDate);
                            currDateCalendar.setTime(date);
                            if (!column.shouldBeDate) {
                                if (lastDateCalendar.get(Calendar.DAY_OF_MONTH) != currDateCalendar.get(Calendar.DAY_OF_MONTH) || lastDateCalendar.get(Calendar.MONTH) != currDateCalendar.get(Calendar.MONTH) || lastDateCalendar.get(Calendar.YEAR) != currDateCalendar.get(Calendar.YEAR)) {
                                    column.shouldBeDate = true;
                                }
                            }
                            if (!column.shouldBeTime) {
                                if (lastDateCalendar.get(Calendar.HOUR_OF_DAY) != currDateCalendar.get(Calendar.HOUR_OF_DAY) || lastDateCalendar.get(Calendar.MINUTE) != currDateCalendar.get(Calendar.MINUTE) || lastDateCalendar.get(Calendar.SECOND) != currDateCalendar.get(Calendar.SECOND)
                                        || lastDateCalendar.get(Calendar.MILLISECOND) != currDateCalendar.get(Calendar.MILLISECOND)) {
                                    column.shouldBeTime = true;
                                }
                            }
                        }
                        column.lastDate = date;

                        if (column.minValue > date.getTime()) {
                            column.minValue = date.getTime();
                        }
                        if (column.maxValue < date.getTime()) {
                            column.maxValue = date.getTime();
                        }

                        // for guessing binomial.
                        if (column.valueSet.size() <= 2) {
                            column.valueSet.add(date.toString());
                        }
                        continue;
                    } else {
                        column.canParseDate = false;
                    }
                }
                // nothing worked, choose nominal
                String string = set.getString(i);
                if (string != null && !string.isEmpty()) {
                    values[i] = string;
                    // for guessing binomial.
                    if (column.valueSet.size() <= 2) {
                        column.valueSet.add(string);
                    }
                    continue;
                } else {
                    column.incNummerOfMissing();
                    values[i] = null;
                    continue;
                }
            }
        }

        set.close();

        // set up the guessed value types
        int debugColumnCount = -1; // debug purpose
        for (AttributeColumn column : getAllAttributeColumns()) {
            debugColumnCount++;
            if (column.numberOfMissings == rowCountFromGuessing) {
                column.setValueType(Ontology.NOMINAL);
                // column.activateColumn(false);
                continue;
            }
            if (column.canParseInteger) {
                column.setValueType(Ontology.INTEGER);
                continue;
            }
            if (column.canParseDouble) {
                column.setValueType(Ontology.REAL);
                continue;
            }
            if (column.canParseDate) {
                if (column.shouldBeDate && column.shouldBeTime) {
                    column.setValueType(Ontology.DATE_TIME);
                    continue;
                }
                if (column.shouldBeDate) {
                    column.setValueType(Ontology.DATE);
                    continue;
                }
                if (column.shouldBeTime) {
                    column.setValueType(Ontology.TIME);
                    continue;
                }
                throw new OperatorException("Could not determine the value type of the attribute " + column.getName() + " in column number " + getIndexOfAttributeColumn(column) + ".");
            }
            // maybe binomial?
            if (column.valueSet.size() <= 2) {
                column.setValueType(Ontology.BINOMINAL);
                continue;
            }
            // last option nominal
            column.setValueType(Ontology.NOMINAL);
        }
        guessedValueTypes = true;
    }

    @Override
    public ExampleSet createExampleSet() throws OperatorException {
        return createExampleSet(-1);
    }

    /**
     * Creates an {@link ExampleSet} with the given {@link ExampleSetMetaData} and reads at the most
     * <code>limitOfReadLines</code> lines.
     * 
     * 
     * @param metaData
     * @param limitOfReadLines
     *            max number of read lines.
     * @return
     * @throws OperatorException
     */
    private ExampleSet createExampleSet(int limitOfReadLines) throws OperatorException {
        List<Attribute> activeAttributes = new ArrayList<Attribute>();

        // load the attribute names/value types/ roles/... which are defined by
        // the user
        if (attributeNamesDefinedByUser()) {
            loadMetaDataFromParameters();
        } else {
            clearAllReaderSettings();
            guessValueTypes(null);
        }

        // get the data set
        DataSet set = null;
        try {
            set = getDataSet();
        } catch (IOException e) {
            throw new UserError(this, e, 403, e.getMessage());
        }

        // finally create the actual attributes here
        for (AttributeColumn column : getActiveAttributeColumns()) {
            activeAttributes.add(column.getAttribute());
        }

        // list of double arrays which holds the read values fpr each line
        List<double[]> dataRows = new LinkedList<double[]>();

        int lineCount = 0; // debugging purpose
        while (set.next() && (limitOfReadLines == -1 || limitOfReadLines > lineCount)) {
            double[] row = generateRow(set, activeAttributes, lineCount);

            // collect the read line
            dataRows.add(row);
            lineCount++;
            try {
                // check for abort of the process
                checkForStop();
            } catch (ProcessStoppedException e) {
                dataRows = null;
                // table = null;
                set.close();
                set = null;
                throw e;
            }
        }
        // build the example table with the active attributes.
        // The attributes might have changed during the loop above
        // (Only if this is instance of CSVDataReader, see generateRow() ).
        // This happens if a line is read that has more columns then expected.
        MemoryExampleTable table = new MemoryExampleTable(activeAttributes);

        Iterator<double[]> rowIt = dataRows.iterator();
        double[] row = null;
        try {
            while (rowIt.hasNext()) {
                row = rowIt.next();
                // adopt size of the row in case some attributes were added during
                // the reading. Should happen only this is a CSVDataReader.
                if (row.length < activeAttributes.size()) {
                    double[] values = new double[activeAttributes.size()];
                    System.arraycopy(row, 0, values, 0, row.length);
                    for (int i = row.length; i < values.length; i++) {
                        values[i] = Double.NaN;
                    }
                    row = values;
                }
                table.addDataRow(new DoubleArrayDataRow(row));

                // check for abort of the process
                checkForStop();
            }

            ExampleSet exampleSet = table.createExampleSet();

            // set special attributes
            for (AttributeColumn column : getActiveAttributeColumns()) {
                if (!column.getRole().equals(AttributeColumn.REGULAR)) {
                    exampleSet.getAttributes().setSpecialAttribute(exampleSet.getAttributes().get(column.getName()), column.getRole());
                }
            }
            // add annotations
            addAnnotations(exampleSet);
            return exampleSet;

        } finally {
            set.close();
        }

    }

    /*
     * (non-Javadoc)
     * 
     * @see com.rapidminer.operator.io.AbstractReader#getParameterTypes()
     */
    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = new LinkedList<ParameterType>();
        types.addAll(super.getParameterTypes());

        types.add(new ParameterTypeBoolean(PARAMETER_ERROR_TOLERANT, "Values which does not match to the specified value typed are considered as missings.", true, true));

        // The meta data parameters which holds the information about the
        // attribute/column properties, eg. name, role, value type ...
        String[] roles = new String[ROLE_NAMES.size()];
        for (int i = 0; i < roles.length; i++) {
            roles[i] = ROLE_NAMES.get(i);
        }
        // hidden param
        ParameterTypeList typeList = new ParameterTypeList(PARAMETER_META_DATA, "The meta data information", new ParameterTypeInt(PARAMETER_COLUMN_INDEX, "The column index", 0, 9999), //
                new ParameterTypeTupel(PARAMETER_COLUMN_META_DATA, "the meta data information of one column", //
                        new ParameterTypeString(PARAMETER_COLUMN_NAME, "Describes the attributes name."), //
                        new ParameterTypeBoolean(PARAMETER_COLUMN_SELECTED, "Indicates if a column is selected", true), //
                        new ParameterTypeCategory(PARAMETER_COLUM_VALUE_TYPE, "Indicates the value type of an attribute", Ontology.VALUE_TYPE_NAMES, Ontology.NOMINAL), //
                        new ParameterTypeStringCategory(PARAMETER_COLUM_ROLE, "Indicates the role of an attribute", roles, AttributeColumn.REGULAR)));
        typeList.setHidden(true);
        types.add(typeList);

        // hidden param
        ParameterTypeBoolean typeBool = new ParameterTypeBoolean(PARAMETER_ATTRIBUTE_NAMES_DEFINED_BY_USER, "the parameter describes whether the attribute names were set by the user manually or were generated by the the reader (generic names or first row of the file)", false);
        typeBool.setHidden(true);
        types.add(typeBool);
        return types;
    }

    /**
     * Use this method to set the parameter {@link AbstractDataReader#PARAMETER_ERROR_TOLERANT}. Do not set the
     * parameter directly because its value need to be cached.
     * 
     * @param flag
     */
    public void setErrorTolerant(boolean flag) {
        isErrorTollerantCache = flag;
        setParameter(PARAMETER_ERROR_TOLERANT, Boolean.toString(flag));
    }

    /**
     * @return the cached value of the parameter {@link AbstractDataReader#PARAMETER_ERROR_TOLERANT}. The parameter
     *         needs to be cached since it cost to much time to read the parameter every line.
     */
    public boolean isErrorTolerant() {
        return isErrorTollerantCache;
    }

    /**
     * Observer that clears the reader settings if the source file is changed. Only relevant for {@link CSVDataReader}
     * and {@link ExcelExampleSource}
     * 
     * @author Sebastian Loh (14.07.2010)
     * 
     */
    protected class CacheResetParameterObserver implements Observer<String> {
        private String parameterKey;
        private String oldFilename;

        protected CacheResetParameterObserver(String parameterKey) {
            this.parameterKey = parameterKey;
        }

        @Override
        public void update(Observable<String> observable, String arg) {
            if (arg == null || !arg.equals(CSVDataReader.PARAMETER_CSV_FILE) || arg.equals(ExcelExampleSource.PARAMETER_EXCEL_FILE)) {
                return;
            }
            String newFilename = getParameters().getParameterOrNull(parameterKey);
            if (((newFilename == null) && (oldFilename != null)) || ((newFilename != null) && (oldFilename == null)) || ((newFilename != null) && (oldFilename != null) && !newFilename.equals(oldFilename))) {
                clearAllReaderSettings();
                this.oldFilename = newFilename;
            }
        }
    }

    private abstract class UnexpectedRowLenghtException extends OperatorException {
        private static final long serialVersionUID = 1L;

        private int rowNumber = -1;
        private int rowLenght = -1;
        int expectedRowLenght = -1;

        /**
         * 
         */
        public UnexpectedRowLenghtException(String message, int rowNumber, int rowLenght, int expectedRowLenght) {
            super(message);
            this.rowNumber = rowNumber;
            this.rowLenght = rowLenght;
            this.expectedRowLenght = expectedRowLenght;
        }

        /**
         * 
         */
        public UnexpectedRowLenghtException(int rowNumber, int rowLenght, int expectedRowLenght) {
            super("NO MESSAGE");
            this.rowNumber = rowNumber;

        }

        /**
         * Returns the row where the error occurred. <b>Warning:</b> you might want to add +1 if you intend to present
         * this number to the user.
         * 
         * 
         * @return
         */
        public int getRow() {
            return rowNumber;
        }

        /**
         * Returns the length of the the row {@link UnexpectedRowLenghtException#rowNumber}
         * 
         * @return
         */
        public int getRowLenght() {
            return rowLenght;
        }

        public int getExpectedRowLenght() {
            return expectedRowLenght;
        }
    }

    public class TooShortRowLengthException extends UnexpectedRowLenghtException {

        private static final long serialVersionUID = -9183147637149034838L;

        /**
         * @param rowNumber
         * @param rowLenght
         * @param expectedRowLenght
         */
        public TooShortRowLengthException(int rowNumber, int rowLenght, int expectedRowLenght) {
            super("Row number <b>" + rowNumber + "<//b> is too <b>short<//b>. The row has <b>" + rowLenght + "<//b> columns but it is expected to have <b>" + expectedRowLenght + "<//b> columns.", rowNumber, rowLenght, expectedRowLenght);
        }
    }

    public class TooLongRowLengthException extends UnexpectedRowLenghtException {

        private static final long serialVersionUID = -9079042758212112074L;

        /**
         * @param rowNumber
         * @param rowLenght
         * @param expectedRowLenght
         */
        public TooLongRowLengthException(int rowNumber, int rowLenght, int expectedRowLenght) {
            super("Row number <b>" + rowNumber + "</b> is too <b>long</b>. It has <b>" + rowLenght + "</b> columns but it is expected to have <b>" + expectedRowLenght + "</b> columns.", rowNumber, rowLenght, expectedRowLenght);
        }
    }

    public static class UnexpectedValueTypeException extends OperatorException {

        private static final long serialVersionUID = 1L;
        private int expectedValueType = -1;
        private int row = -1;
        private int column = -1;
        private Object value = null;

        public UnexpectedValueTypeException(String message, int expectedValueType, int column, int row, Object value) {
            super(message);
            this.expectedValueType = expectedValueType;
            this.row = row;
            this.column = column;
            this.value = value;
        }

        /**
         * Creates a proper error message;
         * 
         * @param expectedValueType
         * @param column
         * @param row
         * @param value
         */
        public UnexpectedValueTypeException(int expectedValueType, int column, int row, Object value) {
            this("Could not interpreted the value <b>" + value + "<//b> in row <b>" + row + "<//b> and column <b>" + column + "<//b> as a <b>" + Ontology.ATTRIBUTE_VALUE_TYPE.mapIndex(expectedValueType) + "<//b>. Plaese adjust to a proper value type or enable the operator's error tolerance.", expectedValueType, column, row, value);
        }

        /**
         * Returns the row where the error occurred. <b>Warning:</b> you might want to add +1 if you intend to present
         * this number to the user.
         * 
         * @return
         */
        public int getRow() {
            return row;
        }

        /**
         * Returns the column where the error occurred. <b>Warning:</b> you might want to add +1 if you intend to
         * present this number to the user.
         * 
         * @return
         */
        public int getColumn() {
            return column;
        }

        /**
         * Returns the value which caused the error
         * 
         * @return
         */
        public Object getValue() {
            return value;
        }

        /**
         * @return the expectedValueType
         */
        public int getExpectedValueType() {
            return expectedValueType;
        }
    }

    /**
     * @author Sebastian Loh (28.04.2010)
     * 
     *         <p>
     *         Private class describing a column of the created ExampleSet. Holds all information (name, value type,
     *         annotations) in order to create the actual attribute for this column. Despite that the class manages
     *         different properties - eg. the activation status (is the column actual selected to be read?) and missing
     *         values - in order to build proper meta data description.
     *         </p>
     * 
     * @see AbstractDataReader#getGeneratedMetaData()
     * @see AbstractDataReader#guessValueTypes()
     * @see AbstractDataReader#createExampleSet(ExampleSetMetaData, int)
     * 
     */
    public class AttributeColumn {

        public static final int NAME_PARAMETER = 0;

        public static final int IS_ACTIVATED_PARAMETER = 1;

        public static final int VALUE_TYPE_PARAMETER = 2;

        public static final int ROLE_PARAMETER = 3;

        @Override
        public String toString() {
            return name + "," + isActivated + "," + valueType + "," + role + "," + annotations;
        }

        /**
         * ugly workaround to define regular role instead of role = null;
         */
        public static final String REGULAR = "regular";

        private String name;

        private String role = REGULAR;

        private boolean isActivated = true;

        private int valueType = Ontology.NOMINAL;

        private Attribute attribute = null;

        /**
         * the column's annotations that are also the attribute's annotations, which is created from this column.
         */
        private Annotations annotations = new Annotations();

        /**
         * The minValue of this attribute. Only for the operator MetaData purposes.
         */
        protected double minValue = Double.NEGATIVE_INFINITY;

        /**
         * The maxValue of this attribute. Only for the operator MetaData purposes.
         */
        protected double maxValue = Double.POSITIVE_INFINITY;

        /**
         * The valueSet of this attribute, in case it is (bi)nominal. Only for the operator MetaData purposes.
         */
        protected Set<String> valueSet = new LinkedHashSet<String>();

        /**
         * The number of missing values which were read during the guessing. Only for the operator MetaData purposes.
         */
        protected int numberOfMissings = 0;

        /**
         * indicate whether this attribute is a candidate for value type real
         */
        private boolean canParseDouble = true;

        /**
         * indicate whether this attribute is a candidate for value type integer
         */
        private boolean canParseInteger = true;

        /**
         * indicate whether this attribute is a candidate for value type date
         */
        private boolean canParseDate = true;

        /**
         * indicate whether this attribute is a candidate for value type date without time
         */
        private boolean shouldBeDate = false;

        /**
         * indicate whether this attribute is a candidate for value type only time
         */
        private boolean shouldBeTime = false;

        /**
         * the last date which was read, to guess if it is date or date/time or both
         */
        private Date lastDate = null;

        /**
         * increase the number of read missing value by one.
         * 
         * @return the number after the increasement.
         */
        public int incNummerOfMissing() {
            return numberOfMissings++;
        }

        /**
         * @return
         */
        public Annotations getAnnotations() {
            return annotations;
        }

        /**
         * Creates the actual attribute object that is described by this column's properties (name, value type,
         * annotations).
         * 
         * @return the created attribute.
         */
        private Attribute createAttribute() {
            Attribute att = AttributeFactory.createAttribute(getName(), getValueType());
            att.getAnnotations().clear();
            att.getAnnotations().putAll(getAnnotations());
            attribute = att;
            return att;
        }

        /**
         * @return the columns {@link Attribute}. If a attribute was not created before, a new Attribute is created.
         *         Otherwise a new Attribute is created if the already existing Attribute does not match to the current
         *         AttributeColumn settings.
         */
        public Attribute getAttribute() {
            if (attribute == null) {
                return createAttribute();
            }
            if (!attribute.getName().equals(getName())) {
                return createAttribute();
            }
            if (attribute.getValueType() != this.getValueType()) {
                return createAttribute();
            }
            // check same annotations
            for (String key : this.getAnnotations().getKeys()) {
                if (!attribute.getAnnotations().get(key).equals(this.getAnnotations().get(key))) {
                    return createAttribute();
                }
            }
            // else the attribute information are equal:
            return attribute;
        }

        /**
         * Indicated whether this column is actual read/imported or if it is ignored. In other word, returns
         * <code>true</code> if the column is active
         */
        public boolean isActivated() {
            return isActivated;
        }

        /**
         * Activates or deactivates this column.
         * 
         * @param flag
         */
        public void activateColumn(boolean flag) {
            isActivated = flag;
        }

        /**
         * Returns the value type of the columns attribute.
         * 
         * @see Ontology
         * 
         * @return
         */
        public int getValueType() {
            return valueType;
        }

        /**
         * Sets the value type of the columns attribute by actually replacing the existing attribute with a new
         * generated attribute with same name and the new type.
         * 
         * @param newValueType
         */
        public void setValueType(int newValueType) {
            valueType = newValueType;
        }

        /**
         * Returns the name of this column, which is also the name of the attribute that is created from this column's
         * properties.
         * 
         * @return the name
         */
        public String getName() {
            return name;
            //
        }

        /**
         * @param name
         */
        public void setName(String name) {
            this.name = name;
        }

        /**
         * Returns the attribute's role as a String.
         * 
         * @return
         */
        public String getRole() {
            return role;
        }

        /**
         * Set the role of the attribute column
         * 
         * @param role
         */
        public void setRole(String role) {
            this.role = role;
        }

        private String getMetaParameter(int parameterIndex) {
            int index = getIndexOfAttributeColumn(this);

            try {
                // get former parameters
                List<String[]> list = getParameterList(PARAMETER_META_DATA);

                String[] metadata;
                String tuple;
                String[] map = null;
                // get the metadata of this attribute if exists
                for (String[] m : list) {
                    if (Integer.parseInt(m[0]) == index) {
                        map = m;
                        break;
                    }
                }
                if (map == null) {
                    return null;
                }

                tuple = map[1];
                metadata = ParameterTypeTupel.transformString2Tupel(tuple);

                // return the parameter
                return metadata[parameterIndex];
            } catch (UndefinedParameterError e) {
                e.printStackTrace();
                return null;
            }
        }

        private void loadMetaParameter() {
            if (getMetaParameter(NAME_PARAMETER) != null) {
                setName(getMetaParameter(NAME_PARAMETER));
            }
            String s = getMetaParameter(VALUE_TYPE_PARAMETER);
            if (s != null) {
                setValueType(Integer.parseInt(s));
            }
            if (getMetaParameter(ROLE_PARAMETER) != null) {
                setRole(getMetaParameter(ROLE_PARAMETER));
            }
            if (getMetaParameter(IS_ACTIVATED_PARAMETER) != null) {
                activateColumn(Boolean.parseBoolean(getMetaParameter(IS_ACTIVATED_PARAMETER)));
            }

        }

        /**
         * 
         * 
         * Sets the meta data value with entry <code>parameterIndex</code> of this {@link AttributeColumn}.
         * <p>
         * If this method is called the first time for this attributeColumn, default parameters are set. (the first time
         * means that this attribute column does not have an index, ie.
         * <code>getIndexOfAttributeColumn(this) == -1</code>.
         * </p>
         * 
         * @param parameterIndex
         *            legal parameters are: {@link AttributeColumn#NAME_PARAMETER},
         *            {@link AttributeColumn#IS_ACTIVATED_PARAMETER}, {@link AttributeColumn#VALUE_TYPE_PARAMETER},
         *            {@link AttributeColumn#ROLE_PARAMETER}.
         * 
         * @param value
         *            the new value
         */
        private void setMetaParameter() {
            // get index of this column
            int myIndex = getIndexOfAttributeColumn(this);

            try {
                List<String[]> list = getParameterList(PARAMETER_META_DATA);
                String[] map = null;
                for (String[] mapIndexToValues : list) {
                    if (Integer.parseInt(mapIndexToValues[0]) == myIndex) {
                        map = mapIndexToValues;
                        break;
                    }
                }
                String[] metadata;
                String tuple;
                // if an entry for this attribute column did not exist, create a
                // new one:
                if (map == null) {
                    map = new String[2];
                    map[0] = Integer.toString(myIndex);
                    list.add(map);
                    metadata = new String[4];
                } else {
                    tuple = map[1];
                    metadata = ParameterTypeTupel.transformString2Tupel(tuple);
                }
                // create new entries with default values
                metadata[NAME_PARAMETER] = name;
                // selection is true
                metadata[IS_ACTIVATED_PARAMETER] = Boolean.toString(isActivated);
                // value type is nominal
                metadata[VALUE_TYPE_PARAMETER] = Integer.toString(valueType);
                // role is regular
                metadata[ROLE_PARAMETER] = role;

                // write everything back
                tuple = ParameterTypeTupel.transformTupel2String(metadata);
                map[1] = tuple;

                // list.set(index, map);

                // store modified metadata in the parameter;
                String entry = ParameterTypeList.transformList2String(list);
                setParameter(PARAMETER_META_DATA, entry);
            } catch (UndefinedParameterError e) {
                e.printStackTrace();
            }
        }

        /**
         * creates a new column and generated a attribute with the given name and nominal value type
         * 
         * @param attributeName
         */
        public AttributeColumn(String attributeName) {
            // default parameters for value type, ... are implicit created
            this.setName(attributeName);
            // this.setValueType(Ontology.NOMINAL);
        }
    }

    protected abstract class DataSet {
        /**
         * Proceed to the next row if existent. Should return true if such a row exists or false, if no such next row
         * exists.
         * 
         * @return
         */
        public abstract boolean next();

        /**
         * Returns the number of columns in the current row, i.e. the length of the row.
         * 
         * @return
         */
        public abstract int getNumberOfColumnsInCurrentRow();

        /**
         * Returns whether the value in the specified column in the current row is missing.
         * 
         * @param columnIndex
         *            index of the column
         * @return
         */
        public abstract boolean isMissing(int columnIndex);

        /**
         * Returns a numerical value contained in the specified column in the current row. Should return null if the
         * value is not a numerical or if the value is missing.
         * 
         * @param columnIndex
         * @return
         */
        public abstract Number getNumber(int columnIndex);

        /**
         * Returns a nominal value contained in the specified column in the current row. Should return null if the value
         * is not a nominal or a kind of string type or if the value is missing.
         * 
         * @param columnIndex
         * @return
         */
        public abstract String getString(int columnIndex);

        /**
         * Returns a date, time or date_time value contained in the specified column in the current row. Should return
         * null if the value is not a date or time value or if the value is missing.
         * 
         * @param columnIndex
         * @return
         */
        public abstract Date getDate(int columnIndex);

        /**
         * Closes the data source. May tear down a database connection or close a file which is re` from.
         * 
         * @throws OperatorException
         */
        public abstract void close() throws OperatorException;
    }

}