/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.output.labeling;
import java.io.IOException;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import at.tuwien.ifs.somtoolbox.apps.SOMToolboxApp;
import at.tuwien.ifs.somtoolbox.apps.config.AbstractOptionFactory;
import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory;
import at.tuwien.ifs.somtoolbox.data.InputData;
import at.tuwien.ifs.somtoolbox.data.InputDataFactory;
import at.tuwien.ifs.somtoolbox.data.InputDatum;
import at.tuwien.ifs.somtoolbox.input.SOMLibFormatInputReader;
import at.tuwien.ifs.somtoolbox.layers.Label;
import at.tuwien.ifs.somtoolbox.layers.Unit;
import at.tuwien.ifs.somtoolbox.models.AbstractNetworkModel;
import at.tuwien.ifs.somtoolbox.models.GHSOM;
import at.tuwien.ifs.somtoolbox.models.GrowingSOM;
import at.tuwien.ifs.somtoolbox.output.SOMLibMapOutputter;
import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter;
/**
* Implements the <code>LabelSOM</code> labelling method, as described in <i><b>A. Rauber, and D. Merkl</b>: Automatic
* Labeling of Self-Organizing Maps for Information Retrieval In: Journal of Systems Research and Information Systems
* (JSRIS), Vol. 10, Nr. 10, pp 23-45, OPA, Gordon and Breach Science Publishers, December 2001.</i>
*
* @author Michael Dittenbach
* @version $Id: LabelSOM.java 3883 2010-11-02 17:13:23Z frank $
*/
public class LabelSOM extends AbstractLabeler implements Labeler, SOMToolboxApp {
public static final Parameter[] OPTIONS = new Parameter[] { OptionFactory.getOptInputVectorFile(true),
OptionFactory.getOptTemplateVectorFile(true), OptionFactory.getOptWeightVectorFile(true),
OptionFactory.getOptUnitDescriptionFile(true), OptionFactory.getOptNumberLabels(false, "5"),
OptionFactory.getSwitchIsDenseData(), OptionFactory.getSwitchIgnoreLabelsWithZero(),
OptionFactory.getOptMapDescriptionFile(false) };
public static final String DESCRIPTION = "Implements the LabelSOM labelling method";
public static final String LONG_DESCRIPTION = DESCRIPTION;
public static final Type APPLICATION_TYPE = Type.Helper;
/**
* Method for stand-alone execution of map labelling. Options are:<br/>
* <ul>
* <li>-v Input file containing the input vectors of.</li>
* <li>-t Template vector file containing vector element labels.</li>
* <li>-w Weight vector filename, mand.</li>
* <li>-u Unit description file, mand.</li>
* <li>-n Number of labels, opt., default = 5</li>
* <li>-d Set if input data vectors are densely populated.</li>
* <li>-m Map description file, opt.</li>
* </ul>
*
* @param args the execution arguments as stated above.
*/
public static void main(String[] args) {
JSAPResult config = OptionFactory.parseResults(args, OPTIONS);
int numLabels = config.getInt("numberLabels", AbstractNetworkModel.DEFAULT_LABEL_COUNT);
String inputVectorFilename = AbstractOptionFactory.getFilePath(config, "inputVectorFile");
boolean denseData = config.getBoolean("denseData", false);
boolean ignoreLabelsWithZero = config.getBoolean("ignoreLabelsWithZero", false);
String templateVectorFilename = AbstractOptionFactory.getFilePath(config, "templateVectorFile");
String unitDescriptionFilename = AbstractOptionFactory.getFilePath(config, "unitDescriptionFile");
String weightVectorFilename = AbstractOptionFactory.getFilePath(config, "weightVectorFile");
String mapDescriptionFilename = AbstractOptionFactory.getFilePath(config, "mapDescriptionFile");
String outputDirName = unitDescriptionFilename.substring(0,
unitDescriptionFilename.lastIndexOf(System.getProperty("file.separator")) + 1);
if (StringUtils.isBlank(outputDirName)) {
outputDirName = ".";
}
String outputFileName = unitDescriptionFilename.substring(
unitDescriptionFilename.lastIndexOf(System.getProperty("file.separator")) + 1,
unitDescriptionFilename.indexOf('.',
unitDescriptionFilename.lastIndexOf(System.getProperty("file.separator")) + 1));
Logger.getLogger("at.tuwien.ifs.somtoolbox").info(
"Labelling map '" + outputFileName + "' to output directory: " + outputDirName);
GrowingSOM gsom = null;
try {
gsom = new GrowingSOM(new SOMLibFormatInputReader(weightVectorFilename, unitDescriptionFilename,
mapDescriptionFilename));
} catch (Exception e) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
return;
}
// TODO: cacheBlock=1, no problem
InputData data = InputDataFactory.open(inputVectorFilename, templateVectorFilename, !denseData, true, 1,
7);
LabelSOM labeler = new LabelSOM();
labeler.label(gsom, data, numLabels, ignoreLabelsWithZero);
try {
// TODO: make output format an argument, zipped output
SOMLibMapOutputter.writeUnitDescriptionFile(gsom, outputDirName, outputFileName, true);
} catch (IOException e) { // TODO: create new exception type
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(
"Could not open or write to output file: " + e.getMessage() + ": " + e.getMessage());
return;
}
}
@Override
public void label(GHSOM ghsom, InputData data, int num) {
label(ghsom.topLayerMap(), data, num);
}
@Override
public void label(GrowingSOM gsom, InputData data, int num) {
label(gsom, data, num, false);
}
@Override
public void label(GrowingSOM gsom, InputData data, int num, boolean ignoreLabelsWithZero) {
if (num > data.templateVector().dim()) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").warning(
"Specified number of labels (" + num + ") exceeds number of features in template vector ("
+ data.templateVector().dim()
+ ") - defaulting to number of features as maximum possible value.");
num = data.templateVector().dim();
}
Unit[] units = gsom.getLayer().getAllUnits();
StdErrProgressWriter progress = new StdErrProgressWriter(units.length, "Labelling unit ");
for (int i = 0; i < units.length; i++) { // do labeling for each unit
progress.progress(i);
if (units[i].getNumberOfMappedInputs() != 0) {
InputDatum[] unitData = data.getInputDatum(units[i].getMappedInputNames());
Label[] allLabels = new Label[data.dim()];
// for each feature, check all inputs (qe und durchschnittsvalue)
for (int ve = 0; ve < data.dim(); ve++) {
double meanVal = 0;
double qeVal = 0;
for (InputDatum element : unitData) {
meanVal += element.getVector().get(ve);
qeVal += Math.abs(element.getVector().get(ve) - units[i].getWeightVector()[ve]);
}
meanVal = meanVal / unitData.length;
qeVal = qeVal / unitData.length;
// if we shall ignore zero labels, ignore those with mean==0, and very small qe
if (ignoreLabelsWithZero && meanVal == 0 && qeVal * 100 < 0.1) {
allLabels[ve] = new Label("", meanVal, qeVal);
} else {
allLabels[ve] = new Label(data.templateVector().getLabel(ve), meanVal, qeVal);
}
}
Label[] labelSortedByQe = new Label[data.dim()];
Label[] labelSortedByMean = new Label[data.dim()];
for (int j = 0; j < data.dim(); j++) {
labelSortedByQe[j] = allLabels[j];
labelSortedByMean[j] = allLabels[j];
}
Label.sortByQe(labelSortedByQe, Label.SORT_ASC);
Label.sortByValue(labelSortedByMean, Label.SORT_ASC);
// determine select num top labels
Label[] labels = new Label[num];
int found = 0;
int lab = 0;
while (found < num && lab < data.dim()) { // go through list sorted by qe
boolean found2 = false;
int lab2 = data.dim() - 1;
while (found2 == false && lab2 >= data.dim() - num) {
if (labelSortedByMean[lab2].equals(labelSortedByQe[lab])) {
found2 = true;
labels[found] = labelSortedByQe[lab];
found++;
}
lab2--;
}
lab++;
}
Label.sortByValueQe(labels, Label.SORT_DESC, Label.SORT_ASC);
units[i].setLabels(labels);
if (units[i].getMappedSOM() != null) { // label subordinate maps as well
label(units[i].getMappedSOM(), data, num);
}
}
}
gsom.setLabelled(true);
}
}