/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
/**
* This operator allows to import data from DasyLab files (.DDF) into RapidMiner.
* Currently only universal format 1 is supported. External files (.DDB) and histogram
* data are currently not supported.
*
* The parameter <tt>timestamp</tt> allows to configure whether and what kind of
* timestamp should be included in the example set. If it is set to <i>relative</i>,
* the timestamp attribute captures the amount of milliseconds since the file start
* time. If it is set to <i>absolute</i>, the absolute time is used to timestamp the
* examples.
*
* @author Tobias Malbrecht
* @version $Id: DasyLabDataReader.java,v 1.1 2008/08/27 16:14:45 tobiasmalbrecht Exp $
*/
public class DasyLabDataReader extends BytewiseExampleSource {
public static final String PARAMETER_TIMESTAMP = "timestamp";
public static final String[] PARAMETER_TIMESTAMP_OPTIONS = { "none", "relative", "absolute" };
public static final int TIMESTAMP_NONE = 0;
public static final int TIMESTAMP_RELATIVE = 1;
public static final int TIMESTAMP_ABSOLUTE = 2;
private static final String NOT_YET_IMPLEMENTED_ERROR_MESSAGE = "feature not yet implemented, ";
private static final String FILE_HEADER_STRING = "DTDF";
private static final byte STRING_TERMINATOR_BYTE = 0;
private static final int FILE_TYPE_UNIVERSAL_FORMAT_1 = 1;
private class Channel {
// private static final int CONTINUOUS_TIME_DEPENDENT_SIGNAL = 0;
//
// private static final int FREQUENCY_DEPENDENT_FULL_BLOCK_SIZE = 10;
//
// private static final int FREQUENCY_DEPENDENT_HALF_BLOCK_SIZE = 11;
private static final int HISTOGRAM = 20;
private static final int HISTOGRAM_WITH_TIME_INFORMATION = 21;
// private int number;
//
// private int maximumBlockSize;
//
// private double sampleDelayTime;
private int type;
// private int flags;
//
// private String unit;
private String name;
}
public DasyLabDataReader(OperatorDescription description) {
super(description);
}
protected String getFileSuffix() {
return "ddf";
}
protected ExampleSet readFile(File file, DataRowFactory dataRowFactory) throws IOException, UndefinedParameterError {
int timestampMode = getParameterAsInt(PARAMETER_TIMESTAMP);
FileInputStream fileReader = new FileInputStream(file);
byte[] buffer = new byte[500];
int readBytes = -1;
// header "DTDF",0x0D
read(fileReader, buffer, 5);
if (!extractString(buffer, 0, 4).equals(FILE_HEADER_STRING)) {
throw new IOException(GENERIC_ERROR_MESSAGE);
}
// data file description string
StringBuffer stringBuffer = new StringBuffer();
for ( ; ; ) {
byte readByte = (byte) (0x000000FF & fileReader.read());
if (readByte == -1) {
throw new IOException(GENERIC_ERROR_MESSAGE);
}
if (readByte == STRING_TERMINATOR_BYTE) {
break;
}
stringBuffer.append((char) readByte);
}
// // parse file description string
// String[] descriptionStrings = stringBuffer.toString().split(new String(LINE_FEED_SEQUENCE));
// for (int i = 0; i < descriptionStrings.length; i++) {
// if (descriptionStrings[i].contains("=")) {
// String[] keyValuePair = descriptionStrings[i].split("=");
// }
// }
read(fileReader, buffer, 3);
if (!extractString(buffer, 1, 2).equals("IN")) {
throw new IOException(GENERIC_ERROR_MESSAGE);
}
// header size
read(fileReader, buffer, 2);
// file type
read(fileReader, buffer, 2);
int fileType = extract2ByteInt(buffer, 0, true);
// version number
read(fileReader, buffer, 2);
// size of second global header
read(fileReader, buffer, 2);
// size of channel header
read(fileReader, buffer, 2);
// size of block header
read(fileReader, buffer, 2);
// data is stored in a separate file
read(fileReader, buffer, 2);
boolean separateFile = extract2ByteInt(buffer, 0, true) == 1;
if (separateFile) {
throw new IOException(NOT_YET_IMPLEMENTED_ERROR_MESSAGE + "separate files not allowed");
}
// number of channels
read(fileReader, buffer, 2);
// time delay between samples in seconds
read(fileReader, buffer, 8);
// datum
read(fileReader, buffer, 4);
if (fileType != FILE_TYPE_UNIVERSAL_FORMAT_1) {
throw new IOException(NOT_YET_IMPLEMENTED_ERROR_MESSAGE + "file types other than universal format 1 not supported");
}
// size of header
read(fileReader, buffer, 2);
// number of channels
read(fileReader, buffer, 2);
int numberOfChannels = extract2ByteInt(buffer, 0, true);
// multiplexed?
read(fileReader, buffer, 2);
// number of channels collected on each input channel
read(fileReader, buffer, 32);
Channel[] channels = new Channel[numberOfChannels];
for (int i = 0; i < numberOfChannels; i++) {
Channel channel = new Channel();
// size of channel header (2-byte int)
read(fileReader, buffer, 2);
// channel number (2-byte int)
read(fileReader, buffer, 2);
// maximum block size (2-byte int)
read(fileReader, buffer, 2);
// time delay between samples (double)
read(fileReader, buffer, 8);
// channel type
read(fileReader, buffer, 2);
channel.type = extract2ByteInt(buffer, 0, true);
if (channel.type == Channel.HISTOGRAM ||
channel.type == Channel.HISTOGRAM_WITH_TIME_INFORMATION) {
throw new IOException(NOT_YET_IMPLEMENTED_ERROR_MESSAGE + "histogram data not supported");
}
// channel flags (2-byte int)
read(fileReader, buffer, 2);
// unused
read(fileReader, buffer, 16);
// channel unit
readBytes = read(fileReader, buffer, (char) 0);
// if (readBytes != -1) {
// channel.unit = extractString(buffer, 0, readBytes);
// }
// channel name
readBytes = read(fileReader, buffer, (char) 0);
if (readBytes != -1) {
channel.name = extractString(buffer, 0, readBytes);
}
channels[i] = channel;
}
read(fileReader, buffer, 4);
if (!extractString(buffer, 0, 4).equals("DATA")) {
throw new IOException(GENERIC_ERROR_MESSAGE);
}
ArrayList<Attribute> attributes = new ArrayList<Attribute>(numberOfChannels);
switch (timestampMode) {
case TIMESTAMP_NONE:
break;
case TIMESTAMP_RELATIVE:
attributes.add(AttributeFactory.createAttribute("timestamp", Ontology.REAL));
break;
case TIMESTAMP_ABSOLUTE:
attributes.add(AttributeFactory.createAttribute("timestamp", Ontology.DATE_TIME));
break;
}
for (int i = 0; i < numberOfChannels; i++) {
Attribute attribute = AttributeFactory.createAttribute(channels[i].name, Ontology.REAL);
attributes.add(attribute);
}
// number of bytes in channel
read(fileReader, buffer, 2);
// start datum
read(fileReader, buffer, 4);
double startTime = (long) extractInt(buffer, 0, true) * 1000;
// unused
read(fileReader, buffer, 8);
MemoryExampleTable table = new MemoryExampleTable(attributes);
HashMap<Double, Double[]> valuesMap = new HashMap<Double, Double[]>();
HashMap<Double, Integer> counterMap = new HashMap<Double, Integer>();
boolean eof = false;
while (!eof) {
readBytes = fileReader.read(buffer, 0, 20);
if (readBytes != 20) {
eof = true;
break;
}
int channelNr = extract2ByteInt(buffer, 0, true);
double time = extractDouble(buffer, 2, true);
double delay = extractDouble(buffer, 10, true);
int blockSize = extract2ByteInt(buffer, 18, true);
for (int i = 0; i < blockSize; i++) {
readBytes = fileReader.read(buffer, 20, 4);
if (readBytes != 4) {
eof = true;
break;
}
double value = extractFloat(buffer, 20, true);
Double[] values = null;
if (!valuesMap.containsKey(time)) {
counterMap.put(time, 1);
values = new Double[timestampMode == TIMESTAMP_NONE ? numberOfChannels : numberOfChannels + 1];
for (int j = 1; j < values.length; j++) {
values[j] = Double.NaN;
}
valuesMap.put(time, values);
} else {
Integer counter = counterMap.get(time) + 1;
counterMap.put(time, counter);
values = valuesMap.get(time);
}
if (values != null) {
switch (timestampMode) {
case TIMESTAMP_NONE:
values[channelNr] = value;
break;
case TIMESTAMP_RELATIVE:
values[0] = (double) (long) (time * 1000);
values[channelNr + 1] = value;
break;
case TIMESTAMP_ABSOLUTE:
values[0] = (double) ((long) startTime + (long) (time * 1000));
values[channelNr + 1] = value;
break;
}
}
if (counterMap.get(time) == numberOfChannels) {
table.addDataRow(dataRowFactory.create(valuesMap.get(time), attributes.toArray(new Attribute[attributes.size()])));
counterMap.remove(time);
valuesMap.remove(time);
}
time += delay;
}
}
fileReader.close();
ExampleSet exampleSet = table.createExampleSet();
if (timestampMode != TIMESTAMP_NONE) {
exampleSet.getAttributes().setId(attributes.get(0));
}
return exampleSet;
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeCategory(PARAMETER_TIMESTAMP, "Specifies whether to include an absolute timestamp, a timestamp relative to the beginning of the file (in seconds) or no timestamp at all.", PARAMETER_TIMESTAMP_OPTIONS, 2));
return types;
}
}