/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.data;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.logging.Logger;
import at.tuwien.ifs.somtoolbox.SOMToolboxException;
import at.tuwien.ifs.somtoolbox.input.SOMLibFileFormatException;
import at.tuwien.ifs.somtoolbox.util.FileUtils;
import at.tuwien.ifs.somtoolbox.util.StringUtils;
/**
* This class provides information about the real output value for the {@link InputData} input vectors.<br>
* <p>
* The file format consists of a <code>header</code> and the content as follows:
* </p>
* <b>$TYPE</b> string, mandatory. Fixed to <i>output_information.</i> <br>
* <b>$XDIM</b> integer, mandatory: number of units in x-direction.<br>
* <b>$YDIM</b> integer, mandatory: dimensionality of the regression file, equals the number of input vectors (
* {@link InputData#numVectors()}). <br>
* <b>labelName_n outputValue</b> <br>
* <p>
* Alternatively, the file format can be more simple, and not contain any file header. Then, there is only a list of
* lines with two tabulator-separated <code>Strings</code> in the form of <code>labelName regressionValue</code>.<br>
* </p>
*
* @author Rudolf Mayer
* @version $Id: SOMLibRegressInformation.java 3583 2010-05-21 10:07:41Z mayer $
*/
public class SOMLibRegressInformation {
private static final Logger logger = Logger.getLogger("at.tuwien.ifs.somtoolbox");
/** The file name to read from. */
private String regressionInformationFileName = null;
private LinkedHashMap<String, Double> dataHash = new LinkedHashMap<String, Double>();
private double maxPrediction;
private double minPrediction;
private double meanPrediction;
/**
* Creates a new class information object by trying to read the given file in both the versions with a file header (
* {@link #readSOMLibRegressionInformationFile()}) and the tab separated file (
* {@link SOMLibClassInformation#readTabSepClassInformationFile()}).
*/
public SOMLibRegressInformation(String regressionInformationFileName) throws SOMToolboxException {
this.regressionInformationFileName = regressionInformationFileName;
try {
readSOMLibRegressionInformationFile();
} catch (ClassInfoHeaderNotFoundException e) {
try {
logger.info(e.getMessage());
logger.info("Trying to read tab/space separated regression info file...");
readTabSepRegressionInformationFile();
} catch (Exception e2) {
e2.printStackTrace();
throw new SOMLibFileFormatException("Problems reading regression information file "
+ regressionInformationFileName + ": ' " + e.getMessage() + "'. Aborting.");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Regression information file correctly loaded.");
}
/** Reads a regression information file containing a header and class indices. */
private void readSOMLibRegressionInformationFile() throws IOException, SOMToolboxException {
int columns = 0;
BufferedReader br = FileUtils.openFile("regression information file", regressionInformationFileName);
Map<String, String> headers = FileUtils.readSOMLibFileHeaders(br, "regression information");
String line = headers.get("FIRST_CONTENT_LINE");
int index = Integer.parseInt(headers.get("LINE_NUMBER"));// line counter
if (index < 2) {
throw new ClassInfoHeaderNotFoundException(
"Regression information file: no header line starting with $ found");
}
columns = Integer.parseInt(headers.get("$XDIM"));
if (columns < 2) {
throw new SOMLibFileFormatException(
"Regression information file format corrupt. At least 2 columns (name, predictedValue) required. Aborting.");
}
int numData = Integer.parseInt(headers.get("$YDIM"));
// READ REST OF THE FILE
if (numData == 0) {
throw new SOMLibFileFormatException(
"Regression information file format corrupt. Missing $YDIM value. Aborting.");
}
index = 0;
while (line != null) {
// TODO if line is no comment line ($)
index++;
String[] lineElements = line.split(StringUtils.REGEX_SPACE_OR_TAB);
if (lineElements.length != columns) {
throw new SOMLibFileFormatException("Regression information file format corrupt in element number "
+ index + ", incorrect number of columns: XDIM: " + columns + ", columns: "
+ lineElements.length + ". Aborting.");
} else {
try {
dataHash.put(lineElements[0], Double.parseDouble(lineElements[1]));
} catch (NumberFormatException e) {
throw new SOMLibFileFormatException("Output number format corrupt in element number " + index
+ ": '" + lineElements[1] + "'. Aborting.");
}
}
line = br.readLine();
}
if (index != numData) {
throw new SOMLibFileFormatException(
"Output information file format corrupt. Incorrect number of data items. Aborting.\n"
+ Integer.toString(index) + " " + Integer.toString(numData));
}
br.close();
computeStats();
}
private void readTabSepRegressionInformationFile() throws SOMToolboxException, IOException {
String line = null;
int index = 0; // line counter
BufferedReader br = FileUtils.openFile("Class information file", regressionInformationFileName);
while ((line = br.readLine()) != null) {
if (line.trim().length() == 0) {
continue;
}
index++;
String[] lineElements = line.split("\t");
if (lineElements.length != 2) {
br.close();
throw new SOMLibFileFormatException("Number of elements per line must be exactly 2! Error in line "
+ index);
}
try {
dataHash.put(lineElements[0], Double.parseDouble(lineElements[1]));
} catch (NumberFormatException e) {
throw new SOMLibFileFormatException("Output number format corrupt in element number " + index + ": '"
+ lineElements[1] + "'. Aborting.");
}
}
br.close();
computeStats();
}
public void computeStats() {
maxPrediction = Double.MIN_VALUE;
minPrediction = Double.MAX_VALUE;
for (Double value : dataHash.values()) {
meanPrediction += value;
if (value < minPrediction) {
minPrediction = value;
}
if (value > maxPrediction) {
maxPrediction = value;
}
}
meanPrediction = meanPrediction / dataHash.size();
}
public double getPrediction(String vectorname) {
if (!dataHash.containsKey(vectorname)) {
logger.warning("Could not find prediction for input '" + vectorname + "'.");
return 0;
}
return dataHash.get(vectorname);
}
/** @return Returns the maximum prediction value */
public double getMaxPrediction() {
return maxPrediction;
}
/** @return Returns the mean prediction value */
public double getMeanPrediction() {
return meanPrediction;
}
/** @return Returns the minimum prediction value */
public double getMinPrediction() {
return minPrediction;
}
}