StataExampleSource.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.io;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.att.AttributeSet;
import com.rapidminer.tools.math.sampling.OrderedSamplingWithoutReplacement;


/**
 * This operator can read stata files. Currently only stata
 * files of version 113 or 114 are supported.
 * 
 * @rapidminer.index stata
 * @author Tobias Malbrecht
 * @version $Id: StataExampleSource.java,v 1.1 2008/06/03 22:26:50 tobiasmalbrecht Exp $
 */
public class StataExampleSource extends BytewiseExampleSource {

	/** The parameter name for "Determines which attribute properties should be used for attribute naming." */
	public static final String PARAMETER_ATTRIBUTE_NAMING_MODE = "attribute_naming_mode";

	/** The parameter name for "Specifies how to handle attributes with value labels, i.e. whether to ignore the labels or how to use them." */
	public static final String PARAMETER_HANDLE_VALUE_LABELS = "handle_value_labels";
	
	/** The parameter name for "The fraction of the data set which should be read (1 = all; only used if sample_size = -1)" */
	public static final String PARAMETER_SAMPLE_RATIO = "sample_ratio";

	/** The parameter name for "The exact number of samples which should be read (-1 = all; if not -1, sample_ratio will not have any effect)" */
	public static final String PARAMETER_SAMPLE_SIZE = "sample_size";
	
	/** The parameter name for "Use the given random seed instead of global random numbers (only for permutation, -1: use global)." */
	public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed";

    /** File suffix for stata files. */
    private static final String STATA_FILE_SUFFIX = "dta";
	
    /** Only use variable name as attribute name. */
	public static final int USE_VAR_NAME = 0;

    /** Only use variable label as attribute name. */
    public static final int USE_VAR_LABEL = 1;

    /** Use variable name with label in parentheses as attribute name. */
    public static final int USE_VAR_NAME_LABELED = 2;

    /** Use variable label with name in parentheses as attribute name. */
    public static final int USE_VAR_LABEL_NAMED = 3;

    /** String descriptions of attribute naming modes. */
    public static final String[] ATTRIBUTE_NAMING_MODES = { "name", "label", "name (label)", "label (name)" };
    
    /** Force attributes to be numeric even if value labels exist. */
    public static final int FORCE_NUMERIC = 0;

    /** Ignore existing value labels but let attribute be nominal. */
    public static final int IGNORE = 1;

    /** Use existing value labels for labeled values. */
    public static final int USE_ADDITIONALLY = 2;
    
    /** Use existing value labels and set all values without labels to unknown. */
    public static final int USE_EXCLUSIVELY = 3;
    
    /** String descriptions of value label handling modes. */
    public static final String[] HANDLE_VALUE_LABELS_MODES = { "force numeric", "ignore", "use additionally", "use exclusively" };
	
    /** File format constants... */
	private static final int CODE_STRING_TERMINATOR = 0x0;
	
	private static final int CODE_DS_FORMAT_VERSION_113 = 0x71;
	
	private static final int CODE_DS_FORMAT_VERSION_114 = 0x72;
	
	private static final int CODE_BYTEORDER_HILO = 0x01;
	
	private static final int CODE_BYTEORDER_LOHI = 0x02;
	
	private static final int CODE_FILETYPE = 0x01;
	
	private static final int LENGTH_HEADER = 109;
	
	private static final int INDEX_HEADER_DS_FORMAT = 0;
	
	private static final int INDEX_HEADER_BYTEORDER = 1;
	
	private static final int INDEX_HEADER_FILETYPE = 2;
	
	private static final int INDEX_HEADER_NUMBER_OF_ATTRIBUTES = 4;
	
	private static final int INDEX_HEADER_NUMBER_OF_EXAMPLES = 6;
	
	private static final int CODE_TYPE_BYTE = 0xfb;
	
	private static final int CODE_TYPE_INT = 0xfc;
	
	private static final int CODE_TYPE_LONG = 0xfd;
	
	private static final int CODE_TYPE_FLOAT = 0xfe;
	
	private static final int CODE_TYPE_DOUBLE = 0xff;
	
	private static final int LENGTH_TYPE_BYTE = 1;
	
	private static final int LENGTH_TYPE_INT = 2;
	
	private static final int LENGTH_TYPE_LONG = 4;
	
	private static final int LENGTH_TYPE_FLOAT = 4;
	
	private static final int LENGTH_TYPE_DOUBLE = 8;
	
	private static final int LENGTH_ATTRIBUTE_NAME = 33;
	
	private static final int LENGTH_ATTRIBUTE_FORMAT_VERSION_113 = 12;
	
	private static final int LENGTH_ATTRIBUTE_FORMAT_VERSION_114 = 49;
	
	private static final int LENGTH_ATTRIBUTE_VALUE_LABEL_IDENTIFIER = 33;
	
	private static final int LENGTH_ATTRIBUTE_LABEL = 81;
	
	private static final int LENGTH_EXPANSION_FIELD_HEADER = 5;
	
	private static final int INDEX_EXPANSION_FIELD_HEADER_TYPE = 0;
	
	private static final int INDEX_EXPANSION_FIELD_HEADER_LENGTH = 1;
	
	private static final int LENGTH_VALUE_LABEL_HEADER = 40;
	
	private static final int INDEX_VALUE_LABEL_HEADER_LENGTH = 0;
	
	private static final int INDEX_VALUE_LABEL_HEADER_NAME = 4;
	
	private static final int LENGTH_VALUE_LABEL_HEADER_NAME = 33;
	
	private static final int INDEX_VALUE_LABEL_TABLE_NUMBER_OF_ENTRIES = 0;
	
	private static final int INDEX_VALUE_LABEL_TABLE_TEXT_LENGTH = 4;
	
	private static final int INDEX_VALUE_LABEL_TABLE_OFFSETS = 8;
	
	private static final byte CODE_MAXIMUM_NONMISSING_BYTE = 100;
	
	private static final int CODE_MAXIMUM_NONMISSING_INT = 32740;
	
	private static final int CODE_MAXIMUM_NONMISSING_LONG = 2147483620;
	
	private static final double CODE_MAXIMUM_NONMISSING_FLOAT = 1.701e+38;
	
	private static final double CODE_MAXIMUM_NONMISSING_DOUBLE = 8.988e+307;
	
    public StataExampleSource(OperatorDescription description) {
        super(description);
    }

    protected String getFileSuffix() {
    	return STATA_FILE_SUFFIX;
    }
    
    protected ExampleSet readFile(File file, DataRowFactory dataRowFactory) throws IOException, UndefinedParameterError {
		int attributeNamingMode = getParameterAsInt(PARAMETER_ATTRIBUTE_NAMING_MODE);
		int handleValueLabelsMode = getParameterAsInt(PARAMETER_HANDLE_VALUE_LABELS);
		double sampleRatio = getParameterAsDouble(PARAMETER_SAMPLE_RATIO);
		int sampleSize = getParameterAsInt(PARAMETER_SAMPLE_SIZE);
		RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED));

		FileInputStream fileReader = new FileInputStream(file);
    	byte[] buffer = new byte[500];
    	boolean reverseEndian = false;

    	// read and check header
    	read(fileReader, buffer, LENGTH_HEADER);
    	int dataSetFormat = 0x000000FF & buffer[INDEX_HEADER_DS_FORMAT];
    	if (dataSetFormat != CODE_DS_FORMAT_VERSION_113 && dataSetFormat != CODE_DS_FORMAT_VERSION_114) {
    		throw new IOException("Unsupported data set format");
    	}
    	if (buffer[INDEX_HEADER_FILETYPE] != CODE_FILETYPE) {
    		throw new IOException(GENERIC_ERROR_MESSAGE);
    	}
    	byte byteOrder = buffer[INDEX_HEADER_BYTEORDER];
    	if (byteOrder != CODE_BYTEORDER_LOHI && byteOrder != CODE_BYTEORDER_HILO) {
    		throw new IOException(GENERIC_ERROR_MESSAGE);
    	}
    	reverseEndian = (byteOrder == CODE_BYTEORDER_LOHI) ? true : false;
    	int numberOfAttributes = extract2ByteInt(buffer, INDEX_HEADER_NUMBER_OF_ATTRIBUTES, reverseEndian);
    	int numberOfExamples = extractInt(buffer, INDEX_HEADER_NUMBER_OF_EXAMPLES, reverseEndian);
    	
    	// read descriptors
    	byte[] attributeTypes = new byte[numberOfAttributes];
    	read(fileReader, buffer, numberOfAttributes);
    	for (int i = 0; i < numberOfAttributes; i++) {
    		attributeTypes[i] = buffer[i];
    	}
    	String[] attributeNames = new String[numberOfAttributes];
    	for (int i = 0; i < numberOfAttributes; i++) {
    		read(fileReader, buffer, LENGTH_ATTRIBUTE_NAME);
    		String attributeNameString = new String(buffer, 0, LENGTH_ATTRIBUTE_NAME);
    		attributeNames[i] = attributeNameString.substring(0, attributeNameString.indexOf(CODE_STRING_TERMINATOR)).trim();
    	}

    	// read sort list
    	read(fileReader, buffer, 2 * (numberOfAttributes + 1));
    	
    	// read format list
    	for (int i = 0; i < numberOfAttributes; i++) {
    		if (dataSetFormat == CODE_DS_FORMAT_VERSION_113) {
    			read(fileReader, buffer, LENGTH_ATTRIBUTE_FORMAT_VERSION_113);
    		} else if (dataSetFormat == CODE_DS_FORMAT_VERSION_114) {
    			read(fileReader, buffer, LENGTH_ATTRIBUTE_FORMAT_VERSION_114);
    		}
    	}

    	// read value label identifiers
    	String[] valueLabelsIdentifiers = new String[numberOfAttributes];
    	boolean[] labeled = new boolean[numberOfAttributes];
    	for (int i = 0; i < numberOfAttributes; i++) {
    		read(fileReader, buffer, LENGTH_ATTRIBUTE_VALUE_LABEL_IDENTIFIER);
    		labeled[i] = buffer[0] != 0; 
    		String valueLabelsIdentifierString = new String(buffer, 0, LENGTH_ATTRIBUTE_VALUE_LABEL_IDENTIFIER);
    		valueLabelsIdentifiers[i] = valueLabelsIdentifierString.substring(0, valueLabelsIdentifierString.indexOf(CODE_STRING_TERMINATOR)).trim();
    		if (valueLabelsIdentifiers[i].equals("")) {
    			valueLabelsIdentifiers[i] = null;
    		}
    	}
    	
    	// read attribute labels
    	String[] attributeLabels = new String[numberOfAttributes];
    	for (int i = 0; i < numberOfAttributes; i++) {
    		read(fileReader, buffer, LENGTH_ATTRIBUTE_LABEL);
    		String attributeLabelString = new String(buffer, 0, LENGTH_ATTRIBUTE_LABEL);
    		attributeLabels[i] = attributeLabelString.substring(0, attributeLabelString.indexOf(CODE_STRING_TERMINATOR)).trim();
    		if (attributeLabels[i].equals("")) {
    			attributeLabels[i] = null;
    		}
    	}

    	// read expansion fields
    	for (;;) {
    		read(fileReader, buffer, LENGTH_EXPANSION_FIELD_HEADER);
    		int expansionFieldContentsLength = extractInt(buffer, INDEX_EXPANSION_FIELD_HEADER_LENGTH, reverseEndian);
    		if (buffer[INDEX_EXPANSION_FIELD_HEADER_TYPE] == 0 && expansionFieldContentsLength == 0) {
    			break;
    		} else {
    			read(fileReader, buffer, expansionFieldContentsLength);
    		}
    	}

    	// create attributes
    	LinkedHashMap<String, List<Attribute>> attributeValueLabelIdentifiersMap = new LinkedHashMap<String, List<Attribute>>();
    	AttributeSet attributeSet = new AttributeSet(numberOfAttributes);
    	for (int i = 0; i < numberOfAttributes; i++) {
    		int valueType = Ontology.ATTRIBUTE_VALUE;
			switch (0x000000FF & (int) attributeTypes[i]) {
			case CODE_TYPE_BYTE:
				valueType = Ontology.INTEGER;
				break;
			case CODE_TYPE_INT:
				valueType = Ontology.INTEGER;
				break;
			case CODE_TYPE_LONG:
				valueType = Ontology.INTEGER;
				break;
			case CODE_TYPE_FLOAT:
				valueType = Ontology.NUMERICAL;
				break;
			case CODE_TYPE_DOUBLE:
				valueType = Ontology.NUMERICAL;
				break;
			default:
				valueType = Ontology.NOMINAL;
			}
    		if (labeled[i]) {
    			if (handleValueLabelsMode != FORCE_NUMERIC) {
    				valueType = Ontology.NOMINAL;
    			}
    		}
            String attributeName = null;
            switch (attributeNamingMode) {
            case USE_VAR_NAME:
                attributeName = attributeNames[i];
                break;
            case USE_VAR_LABEL:
                attributeName = attributeLabels[i] == null ? attributeNames[i] : attributeLabels[i];
                break;
            case USE_VAR_NAME_LABELED:
                attributeName = attributeLabels[i] == null ? attributeNames[i] : attributeNames[i] + " (" + attributeLabels[i] + ")";
                break;
            case USE_VAR_LABEL_NAMED:
                attributeName = attributeLabels[i] == null ? attributeNames[i] : attributeLabels[i] + " (" + attributeNames[i] + ")";
                break;
            default:
                attributeName = attributeNames[i];
            }
    		Attribute attribute = AttributeFactory.createAttribute(attributeName, valueType);
    		attributeSet.addAttribute(attribute);
    		if (attributeValueLabelIdentifiersMap.get(valueLabelsIdentifiers[i]) == null) {
    			attributeValueLabelIdentifiersMap.put(valueLabelsIdentifiers[i], new LinkedList<Attribute>());
    		}
    		if (valueLabelsIdentifiers[i] != null) {
    			attributeValueLabelIdentifiersMap.get(valueLabelsIdentifiers[i]).add(attribute);
    		}
    	}

    	// initialize sampling functionality
    	OrderedSamplingWithoutReplacement sampling = null;
    	if (sampleSize != -1) {
    		sampling = new OrderedSamplingWithoutReplacement(randomGenerator, numberOfExamples, sampleSize);
    	} else {
    		sampling = new OrderedSamplingWithoutReplacement(randomGenerator, numberOfExamples, sampleRatio);
    	}
    	
    	// read data
    	MemoryExampleTable table = new MemoryExampleTable(attributeSet.getAllAttributes());
    	for (int j = 0; j < numberOfExamples; j++) {
    		DataRow dataRow = dataRowFactory.create(numberOfAttributes);
    		for (int i = 0; i < numberOfAttributes; i++) {
    			Attribute attribute = attributeSet.getAttribute(i);
    			double value = Double.NaN;
    			switch (0x000000FF & (int) attributeTypes[i]) {
    			case CODE_TYPE_BYTE:
        			read(fileReader, buffer, LENGTH_TYPE_BYTE);
        			byte byteValue = (byte) buffer[0];
        			value = byteValue > CODE_MAXIMUM_NONMISSING_BYTE ? Double.NaN : byteValue;
    				break;
    			case CODE_TYPE_INT:
        			read(fileReader, buffer, LENGTH_TYPE_INT);
        			int intValue = extract2ByteInt(buffer, 0, reverseEndian);
        			value = intValue > CODE_MAXIMUM_NONMISSING_INT ? Double.NaN : intValue;
    				break;
    			case CODE_TYPE_LONG:
        			read(fileReader, buffer, LENGTH_TYPE_LONG);
        			int longValue = extractInt(buffer, 0, reverseEndian);
        			value = longValue > CODE_MAXIMUM_NONMISSING_LONG ? Double.NaN : longValue;
    				break;
    			case CODE_TYPE_FLOAT:
        			read(fileReader, buffer, LENGTH_TYPE_FLOAT);
        			float floatValue = extractFloat(buffer, 0, reverseEndian);
        			value = floatValue > CODE_MAXIMUM_NONMISSING_FLOAT ? Double.NaN : floatValue;
    				break;
    			case CODE_TYPE_DOUBLE:
        			read(fileReader, buffer, LENGTH_TYPE_DOUBLE);
        			double doubleValue = extractDouble(buffer, 0, reverseEndian);
        			value = doubleValue > CODE_MAXIMUM_NONMISSING_DOUBLE ? Double.NaN : doubleValue;
    				break;
    			default:
    				int length = (int) 0x000000FF & attributeTypes[i];
    				read(fileReader, buffer, length);
    				String stringValue = new String(buffer, 0, length);
    				int stringTerminatorIndex = stringValue.indexOf(CODE_STRING_TERMINATOR);
    				if (stringTerminatorIndex < 0 || stringTerminatorIndex >= length) {
    					value = attribute.getMapping().mapString(stringValue.trim());
    				} else {
    					value = attribute.getMapping().mapString(stringValue.substring(0, stringTerminatorIndex).trim());
    				}
    			}
				dataRow.set(attribute, value);

    		}

			// add data to table
			if (sampling == null) {
	    		table.addDataRow(dataRow);				
			} else {
				if (sampling.acceptElement()) {
					table.addDataRow(dataRow);
				}
			}

    	}

    	// read value labels
    	int readLength = -1;
    	LinkedHashMap<Attribute, LinkedHashMap<Double, String>> valueMappingsMap = new LinkedHashMap<Attribute, LinkedHashMap<Double, String>>();
    	do {
    		readLength = readWithoutLengthCheck(fileReader, buffer, LENGTH_VALUE_LABEL_HEADER);
    		if (readLength > 0) {
    			int length = extractInt(buffer, INDEX_VALUE_LABEL_HEADER_LENGTH, reverseEndian);
    			String valueLabelIdentifierString = new String(buffer, INDEX_VALUE_LABEL_HEADER_NAME, LENGTH_VALUE_LABEL_HEADER_NAME);
        		String valueLabelIdentifier = valueLabelIdentifierString.substring(0, valueLabelIdentifierString.indexOf(CODE_STRING_TERMINATOR)).trim();

        		LinkedHashMap<Double, String> valueMap = new LinkedHashMap<Double, String>();
        		if (length > 500) {
        			buffer = new byte[length];
        		}
        		read(fileReader, buffer, length);
        		int numberOfEntries = extractInt(buffer, INDEX_VALUE_LABEL_TABLE_NUMBER_OF_ENTRIES, reverseEndian);
        		int textLength = extractInt(buffer, INDEX_VALUE_LABEL_TABLE_TEXT_LENGTH, reverseEndian);
        		int[] offset = new int[numberOfEntries];
        		for (int i = 0; i < numberOfEntries; i++) {
        			offset[i] = extractInt(buffer, INDEX_VALUE_LABEL_TABLE_OFFSETS + i * LENGTH_INT_32, reverseEndian);
        		}
        		double[] values = new double[numberOfEntries];
        		for (int i = 0; i < numberOfEntries; i++) {
        			values[i] = extractInt(buffer, INDEX_VALUE_LABEL_TABLE_OFFSETS + numberOfEntries * LENGTH_INT_32 + i * LENGTH_INT_32, reverseEndian);
        		}
        		String[] nominalValues = new String[numberOfEntries];
        		for (int i = 0; i < numberOfEntries; i++) {
        			nominalValues[i] = extractString(buffer, INDEX_VALUE_LABEL_TABLE_OFFSETS + 2 * numberOfEntries * LENGTH_INT_32 + offset[i], textLength - offset[i]);
        			int stringTerminatorIndex = nominalValues[i].indexOf(CODE_STRING_TERMINATOR);
        			if (stringTerminatorIndex < 0) {
        				valueMap.put(values[i], nominalValues[i].trim());
        			} else {
        				valueMap.put(values[i], nominalValues[i].substring(0, nominalValues[i].indexOf(CODE_STRING_TERMINATOR)).trim());
        			}
        		}
        		for (Attribute attribute : attributeValueLabelIdentifiersMap.get(valueLabelIdentifier)) {
        			valueMappingsMap.put(attribute, valueMap);
        		}
    		}
    	} while (readLength >= 0);
    	fileReader.close();

    	// add value labels to data
    	if (handleValueLabelsMode != FORCE_NUMERIC) {
	    	Attribute[] attributes = table.getAttributes();
	    	LinkedHashMap[] attributeValueMaps = new LinkedHashMap[numberOfAttributes];
	    	for (int i = 0; i < attributes.length; i++) {
	    		attributeValueMaps[i] = valueMappingsMap.get(attributes[i]);
	    	}
	    	for (Iterator<DataRow> iterator = table.getDataRowReader(); iterator.hasNext(); ) {
	    		DataRow dataRow = iterator.next();
		   		for (int i = 0; i < attributes.length; i++) {
		   			if (labeled[i] && attributeValueMaps[i] != null) {
			   			double originalValue = dataRow.get(attributes[i]);
			   			double value = Double.NaN;
		   				switch (handleValueLabelsMode) {
		   				case IGNORE:
		   					value = attributes[i].getMapping().mapString(Tools.formatIntegerIfPossible(originalValue));
		   					break;
		   				case USE_ADDITIONALLY: {
				   			String nominalValue = (String) attributeValueMaps[i].get(originalValue);
				   			if (nominalValue != null) {
				   				value = attributes[i].getMapping().mapString(nominalValue);
				   			} else {
				   				value = attributes[i].getMapping().mapString(Tools.formatIntegerIfPossible(originalValue));
				   			}
		   				}
				   			break;
		   				case USE_EXCLUSIVELY: {
				   			String nominalValue = (String) attributeValueMaps[i].get(originalValue);
				   			if (nominalValue != null) {
				   				value = attributes[i].getMapping().mapString(nominalValue);
				   			} else {
				   				value = Double.NaN;
				   			}
		   				}
				   			break;
		   				}
			   			dataRow.set(attributes[i], value);
		   			}
		   		}
	    	}
    	}
    	
    	// create example set
    	ExampleSet exampleSet = table.createExampleSet();
    	return exampleSet;
    }
    
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();
        ParameterType type = new ParameterTypeCategory(PARAMETER_ATTRIBUTE_NAMING_MODE, "Determines which variable properties should be used for attribute naming.", ATTRIBUTE_NAMING_MODES, USE_VAR_NAME);
        type.setExpert(false);
        types.add(type);
        type = new ParameterTypeCategory(PARAMETER_HANDLE_VALUE_LABELS, "Specifies how to handle attributes with value labels, i.e. whether to ignore the labels or how to use them.", HANDLE_VALUE_LABELS_MODES, USE_ADDITIONALLY);
        type.setExpert(false);
        types.add(type);
        type = new ParameterTypeDouble(PARAMETER_SAMPLE_RATIO, "The fraction of the data set which should be read (1 = all; only used if sample_size = -1)", 0.0d, 1.0d, 1.0d);
        type.setExpert(false);
        types.add(type);
        type = new ParameterTypeInt(PARAMETER_SAMPLE_SIZE, "The exact number of samples which should be read (-1 = all; if not -1, sample_ratio will not have any effect)", -1, Integer.MAX_VALUE, -1);
        type.setExpert(true);
        types.add(type);
        type = new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (for sampling by ratio, -1: use global).", -1, Integer.MAX_VALUE, -1);
        type.setExpert(true);
        types.add(type);
        return types;
    }
}