/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.models;
import java.io.IOException;
import java.util.Collection;
import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import at.tuwien.ifs.somtoolbox.SOMToolboxException;
import at.tuwien.ifs.somtoolbox.apps.SOMToolboxApp;
import at.tuwien.ifs.somtoolbox.apps.config.AbstractOptionFactory;
import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory;
import at.tuwien.ifs.somtoolbox.data.InputData;
import at.tuwien.ifs.somtoolbox.data.SOMVisualisationData;
import at.tuwien.ifs.somtoolbox.data.SharedSOMVisualisationData;
import at.tuwien.ifs.somtoolbox.input.SOMInputReader;
import at.tuwien.ifs.somtoolbox.input.SOMLibDataWinnerMapping;
import at.tuwien.ifs.somtoolbox.layers.GrowingLayer;
import at.tuwien.ifs.somtoolbox.layers.LayerAccessException;
import at.tuwien.ifs.somtoolbox.layers.Unit;
import at.tuwien.ifs.somtoolbox.layers.quality.AbstractQualityMeasure;
import at.tuwien.ifs.somtoolbox.layers.quality.QualityMeasure;
import at.tuwien.ifs.somtoolbox.layers.quality.QualityMeasureNotFoundException;
import at.tuwien.ifs.somtoolbox.output.HTMLOutputter;
import at.tuwien.ifs.somtoolbox.output.SOMLibMapOutputter;
import at.tuwien.ifs.somtoolbox.output.labeling.AbstractLabeler;
import at.tuwien.ifs.somtoolbox.output.labeling.Labeler;
import at.tuwien.ifs.somtoolbox.properties.FileProperties;
import at.tuwien.ifs.somtoolbox.properties.GHSOMProperties;
import at.tuwien.ifs.somtoolbox.properties.PropertiesException;
/**
* This class implements the Growing Hierarchical Self-Organizing Map. It is basically a wrapper for the
* {@link at.tuwien.ifs.somtoolbox.models.GrowingSOM} and mainly handles command line execution and parameters. It
* implements the {@link at.tuwien.ifs.somtoolbox.models.NetworkModel} interface wich is currently not used, but may be
* used in the future. It is also not clear, if this class will be removed and replaced by the <code>GrowingSOM</code>,
* becaus it already contains the hierarchical functionality, only the training procedure would have to be updated.
*
* @author Michael Dittenbach
* @version $Id: GHSOM.java 3993 2011-01-18 13:15:17Z mayer $
*/
public class GHSOM extends AbstractNetworkModel implements SOMToolboxApp {
public static String DESCRIPTION = "The Growing Hierarchical SOM grows a hierarchy of maps, depending on the structure of the data set.";
public static final Type APPLICATION_TYPE = Type.Training;
// TODO: Long_Description
public static String LONG_DESCRIPTION = DESCRIPTION;
public static final Parameter[] OPTIONS = new Parameter[] { OptionFactory.getSwitchHtmlOutput(false),
OptionFactory.getOptLabeling(false), OptionFactory.getOptNumberLabels(false),
OptionFactory.getOptNumberWinners(false), OptionFactory.getSwitchSkipDataWinnerMapping(),
OptionFactory.getOptProperties(true) };
private class ExpandedUnits {
private Vector<Double> qualities;
private Vector<Unit> units;
public ExpandedUnits() {
units = new Vector<Unit>();
qualities = new Vector<Double>();
}
public void addAll(ExpandedUnits newUnits) {
units.addAll(newUnits.getUnits());
qualities.addAll(newUnits.getQualities());
}
public void addUnit(Unit unit, double qual) {
units.addElement(unit);
qualities.addElement(new Double(qual));
}
public int getNumElements() {
return units.size();
}
public Collection<Double> getQualities() {
return qualities;
}
public double getQuality(int i) {
return qualities.elementAt(i).doubleValue();
}
public Unit getUnit(int i) {
return units.elementAt(i);
}
public Collection<Unit> getUnits() {
return units;
}
public void remove(int i) {
units.remove(i);
qualities.remove(i);
}
}
/**
* Method for stand-alone execution of map training.<br>
* Options are:
* <ul>
* <li>-h toggles HTML output</li>
* <li>-l name of class implementing the labeling algorithm</li>
* <li>-n number of labels to generate</li>
* <li>-w name of weight vector file in case of training an already trained map</li>
* <li>-m name of map description file in case of training an already trained map</li>
* <li>--noDWM switch to not write the data winner mapping file</li>
* <li>properties name of properties file, mandatory</li>
* </ul>
*
* @param args the execution arguments as stated above.
*/
public static void main(String[] args) {
InputData data = null;
FileProperties fileProps = null;
GHSOM som = null;
GHSOMProperties somProps = null;
String networkModelName = "GHSOM";
// register and parse all options
JSAPResult config = OptionFactory.parseResults(args, OPTIONS);
Logger.getLogger("at.tuwien.ifs.somtoolbox").info("starting" + networkModelName);
// Logger.getLogger("at.tuwien.ifs.somtoolbox").setLevel(Level.FINER);
String propFileName = AbstractOptionFactory.getFilePath(config, "properties");
String weightFileName = config.getString("weightVectorFile", null);
// String mapDescFileName = config.getString("mapDescriptionFile", null);
String labelerName = config.getString("labeling", null);
int numLabels = config.getInt("numberLabels", DEFAULT_LABEL_COUNT);
boolean skipDataWinnerMapping = config.getBoolean("skipDataWinnerMapping", false);
Labeler labeler = null;
// TODO: use parameter for max
int numWinners = config.getInt("numberWinners", SOMLibDataWinnerMapping.MAX_DATA_WINNERS);
if (labelerName != null) { // if labeling then label
try {
labeler = AbstractLabeler.instantiate(labelerName);
Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Instantiated labeler " + labelerName);
} catch (Exception e) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(
"Could not instantiate labeler \"" + labelerName + "\".");
System.exit(-1);
}
}
if (weightFileName == null) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Training a new SOM.");
} else {
Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Further training of an already trained SOM.");
}
try {
fileProps = new FileProperties(propFileName);
somProps = new GHSOMProperties(propFileName);
} catch (PropertiesException e) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
System.exit(-1);
}
data = getInputData(fileProps);
som = new GHSOM(data.dim(), data.isNormalizedToUnitLength(), somProps, data);
som.setSharedInputObjects(new SharedSOMVisualisationData(null, null, null, null,
fileProps.vectorFileName(true), fileProps.templateFileName(true), null));
som.getSharedInputObjects().setData(SOMVisualisationData.INPUT_VECTOR, data);
som.train(data, somProps);
if (labelerName != null) { // if labeling then label
labeler.label(som, data, numLabels);
}
try {
// TODO: make output format an argument
SOMLibMapOutputter.write(som, fileProps.outputDirectory(), fileProps.namePrefix(false), true, somProps,
fileProps);
} catch (IOException e) { // TODO: create new exception type
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(
"Could not open or write to output file " + fileProps.namePrefix(false) + ": " + e.getMessage());
System.exit(-1);
}
if (!skipDataWinnerMapping) {
try {
SOMLibMapOutputter.writeDataWinnerMappingFile(som, data, numWinners, fileProps.outputDirectory(),
fileProps.namePrefix(false), true);
} catch (IOException e) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(
"Could not open or write to output file " + fileProps.namePrefix(false) + ": " + e.getMessage());
System.exit(-1);
}
} else {
Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Skipping writing data winner mapping file");
}
if (config.getBoolean("htmlOutput") == true) {
try {
// TODO: make output format an argument, zipped output
new HTMLOutputter().write(som, fileProps.outputDirectory(), fileProps.namePrefix(false));
} catch (IOException e) { // TODO: create new exception type
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(
"Could not open or write to output file " + fileProps.namePrefix(false) + ": " + e.getMessage());
System.exit(-1);
}
}
Logger.getLogger("at.tuwien.ifs.somtoolbox").info("finished" + networkModelName);
}
private GrowingLayer layer0 = null;
private GrowingSOM topLayerMap = null;
/**
* Constructs a new <code>GHSOM</code> with <code>dim</code>-dimensional weight vectors. Argument <code>norm</code>
* determines whether the randlomy initialized weight vectors should be normalized to unit length or not.
*
* @param dim the dimensionality of the weight vectors.
* @param norm specifies if the weight vectors are to be normalized to unit length.
* @param props the network properties.
*/
public GHSOM(int dim, boolean norm, GHSOMProperties props, InputData data) {
layer0 = new GrowingLayer(1, 1, "at.tuwien.ifs.somtoolbox.layers.metrics.L2Metric", dim, norm, props.pca(), 7,
data);
try {
topLayerMap = new GrowingSOM(0, layer0.getUnit(0, 0), dim, norm, props, data);
} catch (LayerAccessException e) { /* does not happen */
assert false;
}
}
/* (non-Javadoc)
* @see at.tuwien.ifs.somtoolbox.models.AbstractNetworkModel#setSharedInputObjects(at.tuwien.ifs.somtoolbox.data.SharedSOMVisualisationData)
*/
@Override
public void setSharedInputObjects(SharedSOMVisualisationData sharedInputObjects) {
super.setSharedInputObjects(sharedInputObjects);
if (topLayerMap != null) {
topLayerMap.setSharedInputObjects(sharedInputObjects);
}
}
/**
* Constructs an already trained <code>GHSOM</code> with a <code>SOMInputReader</code> provided by argument
* <code>ir</code>.
*
* @param ir an object implementing the <code>SOMinputReader</code> interface to load an already trained model.
*/
public GHSOM(SOMInputReader ir) {
topLayerMap = new GrowingSOM(ir);
}
// FIXME: this method should be moved to at.tuwien.ifs.somtoolbox.layers.GrowingLayer
public ExpandedUnits getExpandedUnits(GrowingLayer layer, QualityMeasure qm, String qmName, double fraction,
double totalQuality) {
ExpandedUnits expUnits = new ExpandedUnits();
double[][] quality = null;
try {
quality = qm.getUnitQualities(qmName);
} catch (QualityMeasureNotFoundException e) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
System.exit(-1);
}
try {
for (int j = 0; j < layer.getYSize(); j++) {
for (int i = 0; i < layer.getXSize(); i++) {
boolean willExpand = quality[i][j] > fraction * totalQuality
&& layer.getUnit(i, j).getNumberOfMappedInputs() > 0;
if (Logger.getLogger("at.tuwien.ifs.somtoolbox").isLoggable(Level.INFO)) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").info(
"Expanding unit " + i + "/" + j + ": " + willExpand + "( unit quality: "
+ quality[i][j] + ", required quality: " + fraction + " * " + totalQuality
+ " =" + fraction * totalQuality + ")");
}
if (willExpand) {
expUnits.addUnit(layer.getUnit(i, j), quality[i][j]);
}
}
}
} catch (LayerAccessException e) { /* does not happen */
e.printStackTrace();
}
return expUnits;
}
/**
* Returns the top-layer map
*
* @return the top-layer map.
*/
public GrowingSOM topLayerMap() {
return topLayerMap;
}
/**
* Trains the GHSOM with the input data and training parameters specified in the properties provided by argument
* <code>props</code>.
*
* @param data input data to train the map with.
* @param props the training properties.
*/
public void train(InputData data, GHSOMProperties props) {
// String qualityMeasureName1 = "at.tuwien.ifs.somtoolbox.layers.quality.QuantizationError.mqe";
String growthQMName = props.growthQualityMeasureName();
String expandQMName = props.expandQualityMeasureName();
String[] growthQM = AbstractQualityMeasure.splitNameAndMethod(growthQMName);
String[] expandQM = AbstractQualityMeasure.splitNameAndMethod(expandQMName);
// set layer 0 unit to mean of data
try {
layer0.getUnit(0, 0).setWeightVector(data.getMeanVector().toArray());
layer0.getUnit(0, 0).addMappedInput(data, false);
} catch (SOMToolboxException e) { /* does not happen */
}
// calculate map error
QualityMeasure qm0 = null;
try {
qm0 = AbstractQualityMeasure.instantiate(growthQM[0], layer0, data);// new QuantizationError(layer0, data);
} catch (Exception e) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Could not instantiate quality measure.");
System.exit(-1);
}
double totalMqe = Double.MAX_VALUE;
try {
totalMqe = qm0.getMapQuality(growthQM[1]);
} catch (QualityMeasureNotFoundException e) {
Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting.");
System.exit(-1);
}
QualityMeasure qm1 = topLayerMap.train(data, props, totalMqe, growthQMName);
// check units for expansion
ExpandedUnits unitQueue = getExpandedUnits(topLayerMap.getLayer(), qm1, expandQM[1], props.tau2(), totalMqe);
Logger.getLogger("at.tuwien.ifs.somtoolbox").info(unitQueue.getNumElements() + " units to expand.");
int id = 1;
while (unitQueue.getNumElements() > 0) {
// pick next unit
Unit currentUnit = unitQueue.getUnit(0);
double currentQuality = unitQueue.getQuality(0);
Logger.getLogger("at.tuwien.ifs.somtoolbox").info(unitQueue.getNumElements() + " units to expand.");
// create new map and assign to unit
GrowingSOM newGSOM = new GrowingSOM(++id, currentUnit, data.dim(), data.isNormalizedToUnitLength(), props,
data);
newGSOM.setSharedInputObjects(topLayerMap.getSharedInputObjects());
currentUnit.setMappedSOM(newGSOM);
// generate subset of data
InputData newData = data.subset(currentUnit.getMappedInputNames());
// train map
QualityMeasure qm = currentUnit.getMappedSOM().train(newData, props, currentQuality, growthQMName);
// calcuate quality
// QualityMeasure qm = new QuantizationError(currentUnit.getMappedSOM().getLayer(), newData);
ExpandedUnits newUnits = getExpandedUnits(newGSOM.getLayer(), qm, expandQM[1], props.tau2(), totalMqe);
unitQueue.addAll(newUnits);
unitQueue.remove(0);
}
}
}