/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.io; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.Tools; import com.rapidminer.tools.att.AttributeSet; import com.rapidminer.tools.math.sampling.OrderedSamplingWithoutReplacement; /** * This operator can read spss files. * * @rapidminer.index spss * @author Tobias Malbrecht * @version $Id: SPSSExampleSource.java,v 1.10 2008/08/25 21:32:39 tobiasmalbrecht Exp $ */ public class SPSSExampleSource extends BytewiseExampleSource { /** The parameter name for "Determines which SPSS variable properties should be used for attribute naming." */ public static final String PARAMETER_ATTRIBUTE_NAMING_MODE = "attribute_naming_mode"; /** The parameter name for "Use SPSS value labels as values." */ public static final String PARAMETER_USE_VALUE_LABELS = "use_value_labels"; /** The parameter name for "Recode SPSS user missings to missing values." */ public static final String PARAMETER_RECODE_USER_MISSINGS = "recode_user_missings"; /** The parameter name for "The fraction of the data set which should be read (1 = all; only used if sample_size = -1)" */ public static final String PARAMETER_SAMPLE_RATIO = "sample_ratio"; /** The parameter name for "The exact number of samples which should be read (-1 = all; if not -1, sample_ratio will not have any effect)" */ public static final String PARAMETER_SAMPLE_SIZE = "sample_size"; /** The parameter name for "Use the given random seed instead of global random numbers (only for permutation, -1: use global)." */ public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed"; /** File suffix for spss files. */ private static final String SPSS_FILE_SUFFIX = "sav"; /** Only use variable name as attribute name. */ public static final int USE_VAR_NAME = 0; /** Only use variable label as attribute name. */ public static final int USE_VAR_LABEL = 1; /** Use variable name with label in parentheses as attribute name. */ public static final int USE_VAR_NAME_LABELED = 2; /** Use variable label with name in parentheses as attribute name. */ public static final int USE_VAR_LABEL_NAMED = 3; /** String descriptions of attribute naming modes. */ public static final String[] ATTRIBUTE_NAMING_MODES = { "name", "label", "name (label)", "label (name)" }; /** File format constants... */ private static final int CODE_HEADER = 0x24464C32; private static final int LENGTH_HEADER = 176; private static final int INDEX_CODE_HEADER = 0; private static final int INDEX_HEADER_PRODUCT_NAME = 4; private static final int LENGTH_HEADER_PRODUCT_NAME = 60; private static final int INDEX_HEADER_LAYOUT_CODE = 64; private static final int CODE_HEADER_LAYOUT_CODE = 2; private static final int INDEX_HEADER_CASE_SIZE = 68; private static final int INDEX_HEADER_COMPRESSED = 72; private static final int INDEX_HEADER_WEIGHT_INDEX = 76; private static final int INDEX_HEADER_NUMBER_OF_CASES = 80; private static final int INDEX_HEADER_BIAS = 84; private static final int INDEX_HEADER_DATE = 92; private static final int LENGTH_HEADER_DATE = 9; private static final int INDEX_HEADER_TIME = 101; private static final int LENGTH_HEADER_TIME = 8; private static final int INDEX_HEADER_DATASET_LABEL = 109; private static final int LENGTH_HEADER_DATASET_LABEL = 64; private static final int CODE_VARIABLE = 2; private static final int LENGTH_VARIABLE = 32; private static final int INDEX_VARIABLE_TYPE = 4; private static final int INDEX_VARIABLE_LABELED = 8; private static final int INDEX_VARIABLE_NUMBER_OF_MISSING_VALUES = 12; private static final int INDEX_VARIABLE_PRINT_FORMAT = 16; private static final int INDEX_VARIABLE_NAME = 24; private static final int LENGTH_VARIABLE_NAME = 8; private static final int FORMAT_DATE = 20; private static final int FORMAT_EDATE = 38; private static final int FORMAT_SDATE = 39; private static final int FORMAT_TIME = 21; private static final int FORMAT_DATETIME = 22; private static final int CODE_VALUE_LABEL = 3; private static final int CODE_VALUE_LABEL_VARIABLE = 4; private static final int CODE_DOCUMENT = 6; private static final int LENGTH_DOCUMENT_LINE = 80; private static final int CODE_INFORMATION_HEADER = 7; private static final int LENGTH_INFORMATION_HEADER = 12; private static final int INDEX_INFORMATION_HEADER_SUBTYPE = 0; private static final int INDEX_INFORMATION_HEADER_SIZE = 4; private static final int INDEX_INFORMATION_HEADER_COUNT = 8; private static final int CODE_INFORMATION_HEADER_SUBTYPE_MACHINE_32 = 3; private static final int LENGTH_INFORMATION_HEADER_SUBTYPE_MACHINE_32 = 32; private static final int CODE_INFORMATION_HEADER_SUBTYPE_MACHINE_64 = 4; private static final int LENGTH_INFORMATION_HEADER_SUBTYPE_MACHINE_64 = 24; private static final int CODE_INFORMATION_HEADER_AUXILIARY_VARIABLE_PARAMETERS = 11; private static final int LENGTH_INFORMATION_HEADER_SINGLE_AUXILIARY_VARIABLE_PARAMETERS = 12; private static final int CODE_LONG_VARIABLE_NAMES = 13; private static final int CODE_LONG_VARIABLE_NAME_RECORDS_DIVIDER = 9; private static final int CODE_DICTIONARY_TERMINATION = 999; private static final int LENGTH_COMMAND_CODE_BLOCK = 8; private static final int CODE_COMMAND_CODE_IGNORED = 0; private static final int CODE_COMMAND_CODE_EOF = 252; private static final int CODE_COMMAND_CODE_NOT_COMPRESSIBLE = 253; private static final int CODE_COMMAND_CODE_ALL_SPACES_STRING = 254; private static final int CODE_COMMAND_CODE_SYSTEM_MISSING = 255; private static final int LENGTH_VALUE_BLOCK = 8; private static final long GREGORIAN_CALENDAR_OFFSET_IN_MILLISECONDS = -12219379200000L; /** SPSS file variable header definition. */ private class Variable { private static final int TYPE_NUMERICAL = 0; private static final int MEASURE_NOMINAL = 1; private static final int MEASURE_ORDINAL = 2; private static final int MEASURE_CONTINUOUS = 3; private int type; private boolean labeled; private int printFormat; private int numberOfMissingValues; private String name; private String label; private double[] missingValues; private LinkedHashMap<Double, String> valueLabels; private int measure; private boolean isDateVariable() { return (printFormat == FORMAT_DATE) || (printFormat == FORMAT_EDATE) || (printFormat == FORMAT_SDATE); } private boolean isTimeVariable() { return (printFormat == FORMAT_TIME); } private boolean isDateTimeVariable() { return (printFormat == FORMAT_DATETIME); } } public SPSSExampleSource(OperatorDescription description) { super(description); } protected String getFileSuffix() { return SPSS_FILE_SUFFIX; } protected ExampleSet readFile(File file, DataRowFactory dataRowFactory) throws IOException, UndefinedParameterError { int attributeNamingMode = getParameterAsInt(PARAMETER_ATTRIBUTE_NAMING_MODE); boolean useValueLabels = getParameterAsBoolean(PARAMETER_USE_VALUE_LABELS); boolean recodeUserMissings = getParameterAsBoolean(PARAMETER_RECODE_USER_MISSINGS); int sampleSize = getParameterAsInt(PARAMETER_SAMPLE_SIZE); double sampleRatio = getParameterAsDouble(PARAMETER_SAMPLE_RATIO); RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED)); FileInputStream fileReader = new FileInputStream(file); byte[] buffer = new byte[500]; boolean reverseEndian = false; // read file header read(fileReader, buffer, LENGTH_HEADER); if (extractInt(buffer, INDEX_CODE_HEADER, false) != CODE_HEADER) { throw new IOException(GENERIC_ERROR_MESSAGE); } String productName = extractString(buffer, INDEX_HEADER_PRODUCT_NAME, LENGTH_HEADER_PRODUCT_NAME); int layoutCode = extractInt(buffer, INDEX_HEADER_LAYOUT_CODE, false); if (layoutCode != CODE_HEADER_LAYOUT_CODE) { reverseEndian = true; layoutCode = extractInt(buffer, INDEX_HEADER_LAYOUT_CODE, reverseEndian); if (layoutCode != CODE_HEADER_LAYOUT_CODE) { throw new IOException(GENERIC_ERROR_MESSAGE); } } int caseSize = extractInt(buffer, INDEX_HEADER_CASE_SIZE, reverseEndian); boolean compressed = ((extractInt(buffer, INDEX_HEADER_COMPRESSED, reverseEndian) == 1) ? true : false); int weightIndex = extractInt(buffer, INDEX_HEADER_WEIGHT_INDEX, reverseEndian); int numberOfExamples = extractInt(buffer, INDEX_HEADER_NUMBER_OF_CASES, reverseEndian); double bias = extractDouble(buffer, INDEX_HEADER_BIAS, reverseEndian); String date = extractString(buffer, INDEX_HEADER_DATE, LENGTH_HEADER_DATE); String time = extractString(buffer, INDEX_HEADER_TIME, LENGTH_HEADER_TIME); String dataSetLabel = extractString(buffer, INDEX_HEADER_DATASET_LABEL, LENGTH_HEADER_DATASET_LABEL); StringBuffer logMessage = new StringBuffer("SPSSExampleSource reads " + file.getAbsolutePath() + Tools.getLineSeparator()); logMessage.append((compressed ? "" : "un") + "compressed, written by " + productName + " at " + time + ", " + date + Tools.getLineSeparator()); if (dataSetLabel.equals("")) { logMessage.append("no file label, "); } else { logMessage.append("file label is " + dataSetLabel + Tools.getLineSeparator()); } logMessage.append("contains " + numberOfExamples + " examples, case size is " + caseSize + "x8=" + caseSize * 8 + " Bytes" + Tools.getLineSeparator()); logMessage.append("weight index is " + weightIndex + Tools.getLineSeparator()); log(logMessage.toString()); // read variables List<Variable> variables = new LinkedList<Variable>(); LinkedHashMap<Integer, Integer> variableNrTranslations = new LinkedHashMap<Integer, Integer>(); { int variableNr = 0; for (int i = 0; i < caseSize; i++) { read(fileReader, buffer, LENGTH_VARIABLE); if (extractInt(buffer, 0, reverseEndian) != CODE_VARIABLE) { throw new IOException("file corrupt (missing variable definitions)"); } Variable variable = new Variable(); variable.type = extractInt(buffer, INDEX_VARIABLE_TYPE, reverseEndian); variable.labeled = ((extractInt(buffer, INDEX_VARIABLE_LABELED, reverseEndian) == 1) ? true : false); variable.numberOfMissingValues = extractInt(buffer, INDEX_VARIABLE_NUMBER_OF_MISSING_VALUES, reverseEndian); variable.printFormat = (0x00FF0000 & extractInt(buffer, INDEX_VARIABLE_PRINT_FORMAT, reverseEndian)) >> 16; variable.name = extractString(buffer, INDEX_VARIABLE_NAME, LENGTH_VARIABLE_NAME); if (variable.labeled) { read(fileReader, buffer, LENGTH_VARIABLE, LENGTH_INT_32); int labelLength = extractInt(buffer, LENGTH_VARIABLE, reverseEndian); int adjLabelLength = labelLength; if (labelLength % LENGTH_INT_32 != 0) { adjLabelLength = labelLength + LENGTH_INT_32 - (labelLength % LENGTH_INT_32); } read(fileReader, buffer, adjLabelLength); variable.label = extractString(buffer, 0, labelLength); } if (variable.numberOfMissingValues != 0) { read(fileReader, buffer, variable.numberOfMissingValues * LENGTH_DOUBLE); variable.missingValues = new double[variable.numberOfMissingValues]; for (int j = 0; j < variable.numberOfMissingValues; j++) { variable.missingValues[j] = extractDouble(buffer, j * LENGTH_DOUBLE, reverseEndian); } } if (variable.type != -1) { variables.add(variable); variableNrTranslations.put(i, variableNr); variableNr++; } } } // read other header records boolean valueLabelsRead = false; LinkedHashMap<Double, String> valueLabels = null; boolean terminated = false; do { int count = 0; read(fileReader, buffer, LENGTH_INT_32); int recordType = extractInt(buffer, 0, reverseEndian); switch (recordType) { case CODE_VALUE_LABEL: read(fileReader, buffer, LENGTH_INT_32); count = extractInt(buffer, 0, reverseEndian); valueLabels = new LinkedHashMap<Double, String>(); for (int i = 0; i < count; i++) { read(fileReader, buffer, LENGTH_DOUBLE); double labelValue = extractDouble(buffer, 0, reverseEndian); read(fileReader, buffer, LENGTH_BYTE); int labelLength = buffer[0]; int adjLabelLength = labelLength + LENGTH_DOUBLE - (labelLength % LENGTH_DOUBLE) - 1; read(fileReader, buffer, adjLabelLength); String labelLabel = extractString(buffer, 0, adjLabelLength); valueLabels.put(labelValue, labelLabel); } valueLabelsRead = true; break; case CODE_VALUE_LABEL_VARIABLE: if (!valueLabelsRead) { throw new IOException(GENERIC_ERROR_MESSAGE + ": value labels have not been read"); } valueLabelsRead = false; read(fileReader, buffer, LENGTH_INT_32); count = extractInt(buffer, 0, reverseEndian); for (int i = 0; i < count; i++) { read(fileReader, buffer, LENGTH_INT_32); int variableNr = variableNrTranslations.get(extractInt(buffer, 0, reverseEndian) - 1); if (variableNr < variables.size()) { Variable variable = variables.get(variableNr); variable.valueLabels = valueLabels; } } break; case CODE_DOCUMENT: read(fileReader, buffer, LENGTH_INT_32); count = extractInt(buffer, 0, reverseEndian); for (int i = 0; i < count; i++) { read(fileReader, buffer, LENGTH_DOCUMENT_LINE); } break; case CODE_INFORMATION_HEADER: read(fileReader, buffer, 0, LENGTH_INFORMATION_HEADER); int subType = extractInt(buffer, INDEX_INFORMATION_HEADER_SUBTYPE, reverseEndian); int size = extractInt(buffer, INDEX_INFORMATION_HEADER_SIZE, reverseEndian); count = extractInt(buffer, INDEX_INFORMATION_HEADER_COUNT, reverseEndian); switch (subType) { case CODE_INFORMATION_HEADER_SUBTYPE_MACHINE_32: read(fileReader, buffer, LENGTH_INFORMATION_HEADER_SUBTYPE_MACHINE_32); break; case CODE_INFORMATION_HEADER_SUBTYPE_MACHINE_64: read(fileReader, buffer, LENGTH_INFORMATION_HEADER_SUBTYPE_MACHINE_64); break; case CODE_INFORMATION_HEADER_AUXILIARY_VARIABLE_PARAMETERS: for (int i = 0; i < variables.size(); i++) { read(fileReader, buffer, LENGTH_INFORMATION_HEADER_SINGLE_AUXILIARY_VARIABLE_PARAMETERS); Variable variable = variables.get(i); variable.measure = extractInt(buffer, 0, reverseEndian); } break; case CODE_LONG_VARIABLE_NAMES: buffer = new byte[count * size]; read(fileReader, buffer, count * size); String longVariableNamesString = new String(buffer); String[] longVariableNamePairs = longVariableNamesString.split(new String(new char[] { (byte) CODE_LONG_VARIABLE_NAME_RECORDS_DIVIDER })); for (int i = 0; i < longVariableNamePairs.length; i++) { String[] keyLongVariablePair = longVariableNamePairs[i].split("="); if (keyLongVariablePair.length != 2) { continue; } for (Variable variable : variables) { if (variable.name.equals(keyLongVariablePair[0])) { variable.name = keyLongVariablePair[1]; } } } buffer = new byte[500]; break; default: read(fileReader, buffer, count * size); break; } break; case CODE_DICTIONARY_TERMINATION: read(fileReader, buffer, LENGTH_INT_32); terminated = true; break; default: break; } } while (!terminated); // create attributes from variables AttributeSet attributeSet = new AttributeSet(); Attribute attribute = null; for (int i = 0; i < variables.size(); i++) { Variable variable = variables.get(i); String attributeName = null; if (variable.label == null) { variable.label = variable.name; } switch (attributeNamingMode) { case USE_VAR_NAME: attributeName = variable.name; break; case USE_VAR_LABEL: attributeName = variable.label; break; case USE_VAR_NAME_LABELED: attributeName = variable.name + " (" + variable.label + ")"; break; case USE_VAR_LABEL_NAMED: attributeName = variable.label + " (" + variable.name + ")"; break; default: attributeName = variable.name; } if (variable.type == Variable.TYPE_NUMERICAL) { // TODO: check completeness of date variable types if (variable.isDateVariable()) { attribute = AttributeFactory.createAttribute(attributeName, Ontology.DATE); } else if (variable.isTimeVariable()) { attribute = AttributeFactory.createAttribute(attributeName, Ontology.TIME); } else if (variable.isDateTimeVariable()) { attribute = AttributeFactory.createAttribute(attributeName, Ontology.DATE_TIME); } else { switch (variable.measure) { case Variable.MEASURE_NOMINAL: attribute = AttributeFactory.createAttribute(attributeName, Ontology.NOMINAL); break; case Variable.MEASURE_ORDINAL: attribute = AttributeFactory.createAttribute(attributeName, Ontology.NOMINAL); break; case Variable.MEASURE_CONTINUOUS: attribute = AttributeFactory.createAttribute(attributeName, Ontology.NUMERICAL); break; default: if (useValueLabels && variable.valueLabels != null) { attribute = AttributeFactory.createAttribute(attributeName, Ontology.NOMINAL); } else { attribute = AttributeFactory.createAttribute(attributeName, Ontology.NUMERICAL); } } } } else { attribute = AttributeFactory.createAttribute(attributeName, Ontology.STRING); } // map strings to values for nominal attributes if (attribute.isNominal()) { if (variable.valueLabels != null) { Iterator<Double> iterator = variable.valueLabels.keySet().iterator(); while (iterator.hasNext()) { Double numericValue = iterator.next(); boolean missing = false; if (recodeUserMissings) { for (int j = 0; j < variable.numberOfMissingValues; j++) { if (numericValue == variable.missingValues[j]) { missing = true; break; } } } if (!missing) { if (useValueLabels) { attribute.getMapping().mapString(variable.valueLabels.get(numericValue)); } else { attribute.getMapping().mapString(java.lang.Double.toString(numericValue)); } } } } } attributeSet.addAttribute(attribute); } // initialize sampling functionality OrderedSamplingWithoutReplacement sampling = null; if (sampleSize != -1) { sampling = new OrderedSamplingWithoutReplacement(randomGenerator, numberOfExamples, sampleSize); } else { sampling = new OrderedSamplingWithoutReplacement(randomGenerator, numberOfExamples, sampleRatio); } // read data Attribute weight = weightIndex == 0 ? null : attributeSet.getAttribute(variableNrTranslations.get(weightIndex - 1)); MemoryExampleTable table = new MemoryExampleTable(attributeSet.getAllAttributes()); int commandCodeCounter = 0; int bytesRead = 0; for (int i = 0; i < numberOfExamples; i++) { String[] values = new String[variables.size()]; if (!compressed) { for (int j = 0; j < variables.size(); j++) { read(fileReader, buffer, LENGTH_DOUBLE); values[j] = Double.toString(extractDouble(buffer, 0, reverseEndian)); } } else { for (int j = 0; j < variables.size(); j++) { boolean readValue = false; String value = null; Variable variable = variables.get(j); for (;;) { if (commandCodeCounter % LENGTH_COMMAND_CODE_BLOCK == 0) { commandCodeCounter = 0; bytesRead = read(fileReader, buffer, 0, LENGTH_COMMAND_CODE_BLOCK); if (bytesRead == -1) { break; } } int commandCode = 0x000000FF & buffer[commandCodeCounter]; switch (commandCode) { case CODE_COMMAND_CODE_IGNORED: break; case CODE_COMMAND_CODE_EOF: // clear remaining command buffer for safety for (int k = commandCodeCounter + 1; k < LENGTH_COMMAND_CODE_BLOCK; k++) { buffer[k] = (byte) 0; } break; case CODE_COMMAND_CODE_NOT_COMPRESSIBLE: bytesRead = read(fileReader, buffer, LENGTH_COMMAND_CODE_BLOCK, LENGTH_VALUE_BLOCK); if (bytesRead == -1) { throw new IOException("file corrupt (data inconsistency)"); } if (variable.type == 0) { double numericValue = extractDouble(buffer, LENGTH_COMMAND_CODE_BLOCK, reverseEndian); if (variable.isDateVariable() || variable.isTimeVariable() || variable.isDateTimeVariable()) { numericValue = (long) numericValue * 1000 + GREGORIAN_CALENDAR_OFFSET_IN_MILLISECONDS; } value = java.lang.Double.toString(numericValue); if (variable.measure != Variable.MEASURE_CONTINUOUS) { if (useValueLabels) { if (variable.valueLabels != null) { String label = variable.valueLabels.get(numericValue); value = label; } } } if (recodeUserMissings) { for (int k = 0; k < variable.numberOfMissingValues; k++) { if (Tools.isEqual(numericValue, variable.missingValues[k])) { value = null; } } } readValue = true; } else { if (value == null) { value = new String(buffer, LENGTH_COMMAND_CODE_BLOCK, LENGTH_VALUE_BLOCK); } else { value = value + new String(buffer, LENGTH_COMMAND_CODE_BLOCK, LENGTH_VALUE_BLOCK); } if (value.length() >= variables.get(j).type) { value = value.trim(); readValue = true; } } break; case CODE_COMMAND_CODE_ALL_SPACES_STRING: value = value == null ? String.valueOf(" ") : value.concat(String.valueOf(" ")); if (value.length() >= variables.get(j).type) { value = value.trim(); readValue = true; } break; case CODE_COMMAND_CODE_SYSTEM_MISSING: value = null; readValue = true; break; default: double numericValue = commandCode - bias; value = java.lang.Double.toString(numericValue); if (variable.measure != Variable.MEASURE_CONTINUOUS) { if (useValueLabels) { if (variable.valueLabels != null) { String label = variable.valueLabels.get(numericValue); value = label; } } } if (recodeUserMissings) { for (int k = 0; k < variable.numberOfMissingValues; k++) { if (Tools.isEqual(numericValue, variable.missingValues[k])) { value = null; } } } readValue = true; break; } commandCodeCounter++; if (readValue) { values[j] = value; break; } } } } // add data to table if (sampling == null) { table.addDataRow(dataRowFactory.create(values, table.getAttributes())); } else { if (sampling.acceptElement()) { table.addDataRow(dataRowFactory.create(values, table.getAttributes())); } } } fileReader.close(); ExampleSet exampleSet = table.createExampleSet(); exampleSet.getAttributes().setWeight(weight); return exampleSet; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeCategory(PARAMETER_ATTRIBUTE_NAMING_MODE, "Determines which SPSS variable properties should be used for attribute naming.", ATTRIBUTE_NAMING_MODES, USE_VAR_NAME); type.setExpert(false); types.add(type); type = new ParameterTypeBoolean(PARAMETER_USE_VALUE_LABELS, "Use SPSS value labels as values.", true); type.setExpert(false); types.add(type); type = new ParameterTypeBoolean(PARAMETER_RECODE_USER_MISSINGS, "Recode SPSS user missings to missing values.", true); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_SAMPLE_RATIO, "The fraction of the data set which should be read (1 = all; only used if sample_size = -1)", 0.0d, 1.0d, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_SAMPLE_SIZE, "The exact number of samples which should be read (-1 = all; if not -1, sample_ratio will not have any effect)", -1, Integer.MAX_VALUE, -1); type.setExpert(true); types.add(type); type = new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (for sampling by ratio, -1: use global).", -1, Integer.MAX_VALUE, -1); type.setExpert(true); types.add(type); return types; } }