/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.apps.helper; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.apps.SOMToolboxApp; import at.tuwien.ifs.somtoolbox.apps.config.AbstractOptionFactory; import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory; import at.tuwien.ifs.somtoolbox.data.InputData; import at.tuwien.ifs.somtoolbox.data.InputDatum; import at.tuwien.ifs.somtoolbox.data.SOMLibClassInformation; import at.tuwien.ifs.somtoolbox.data.SOMLibSparseInputData; import at.tuwien.ifs.somtoolbox.input.SOMLibFileFormatException; import at.tuwien.ifs.somtoolbox.input.SOMLibFormatInputReader; import at.tuwien.ifs.somtoolbox.layers.GrowingLayer; import at.tuwien.ifs.somtoolbox.layers.Unit; import at.tuwien.ifs.somtoolbox.layers.metrics.L2Metric; import at.tuwien.ifs.somtoolbox.models.AbstractNetworkModel; import at.tuwien.ifs.somtoolbox.models.GrowingSOM; import at.tuwien.ifs.somtoolbox.output.SOMLibMapOutputter; import at.tuwien.ifs.somtoolbox.output.labeling.AbstractLabeler; import at.tuwien.ifs.somtoolbox.output.labeling.Labeler; import at.tuwien.ifs.somtoolbox.util.FileUtils; import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter; /** * Maps inputs to an already trained SOM. * * @author Angela Roiger * @author Rudolf Mayer * @version $Id: DataMapper.java 3987 2011-01-10 15:23:49Z mayer $ */ public class DataMapper implements SOMToolboxApp { public static final Parameter[] OPTIONS = new Parameter[] { OptionFactory.getOptWeightVectorFile(true), OptionFactory.getOptMapDescriptionFile(false), OptionFactory.getOptInputVectorFile(true), OptionFactory.getOptUnitDescriptionFile(false), OptionFactory.getOptClassInformationFile(false), OptionFactory.getOptClasslist(false), OptionFactory.getOptLabeling(false), OptionFactory.getOptNumberLabels(false), OptionFactory.getOptNumberWinners(false), OptionFactory.getSwitchSkipDataWinnerMapping(), OptionFactory.getOptOutputFileName(false) }; public static final String DESCRIPTION = "Maps inputs to an already trained SOM."; public static final String LONG_DESCRIPTION = DESCRIPTION.concat(" If a unit-file is given, the data items are added to the loaded map, without a unti file the mapping starts with an empty map."); public static final Type APPLICATION_TYPE = Type.Utils; public static void main(String[] args) throws FileNotFoundException, IOException, SOMToolboxException { new DataMapper(args); } public DataMapper(String[] args) throws FileNotFoundException, IOException, SOMToolboxException { // register and parse all options JSAPResult config = OptionFactory.parseResults(args, OPTIONS); String mapDescFileName = AbstractOptionFactory.getFilePath(config, "mapDescriptionFile"); String weightVectorFileName = AbstractOptionFactory.getFilePath(config, "weightVectorFile"); String unitDescriptionFileName = AbstractOptionFactory.getFilePath(config, "unitDescriptionFile"); String classInformationFileName = AbstractOptionFactory.getFilePath(config, "classInformationFile"); String outputPrefix = AbstractOptionFactory.getFilePath(config, "output"); String skipClassesString = config.getString("classList"); boolean skipDataWinnerMapping = config.getBoolean("skipDataWinnerMapping", false); int numDataWinners = config.getInt("numberWinners"); String labelerName = config.getString("labeling", null); int numLabels = config.getInt("numberLabels", AbstractNetworkModel.DEFAULT_LABEL_COUNT); ArrayList<String> mappingExceptions = new ArrayList<String>(); if (StringUtils.isNotBlank(skipClassesString)) { String[] tmp = skipClassesString.split(","); for (String element : tmp) { mappingExceptions.add(element); } } GrowingSOM som = null; /* restore SOM */ try { som = new GrowingSOM(new SOMLibFormatInputReader(weightVectorFileName, unitDescriptionFileName, mapDescFileName)); } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage() + " Aborting."); e.printStackTrace(); System.exit(-1); } SOMLibClassInformation classInfo = null; if (classInformationFileName != null) { try { classInfo = new SOMLibClassInformation(classInformationFileName); } catch (SOMToolboxException e1) { e1.printStackTrace(); } } InputData data = new SOMLibSparseInputData(AbstractOptionFactory.getFilePath(config, "inputVectorFile")); // map the data mapCompleteDataAfterTraining(som, data, classInfo, mappingExceptions, labelerName, numLabels); // compute quality measure // TODO FIXME : pass the quality measure as parameter! String qualityMeasureName = "at.tuwien.ifs.somtoolbox.layers.quality.QuantizationError.mqe"; som.getLayer().setQualityMeasure(qualityMeasureName); if (outputPrefix == null) { outputPrefix = FileUtils.extractSOMLibInputPrefix(FileUtils.stripPathPrefix(weightVectorFileName)) + ".remapped"; } try { SOMLibMapOutputter.writeUnitDescriptionFile(som, "", outputPrefix, true); } catch (IOException e) { // TODO: create new exception type Logger.getLogger("at.tuwien.ifs.somtoolbox").severe( "Could not open or write to output file " + outputPrefix + ": " + e.getMessage()); System.exit(-1); } if (!skipDataWinnerMapping) { int numWinners = Math.min(numDataWinners, som.getLayer().getXSize() * som.getLayer().getYSize()); try { SOMLibMapOutputter.writeDataWinnerMappingFile(som, data, numWinners, "", outputPrefix, true); } catch (IOException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe( "Could not open or write to output file " + outputPrefix + ": " + e.getMessage()); System.exit(-1); } } else { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Skipping writing data winner mapping file"); } // just copy along the class information file, so we have a copy with the same name-prefix, eases SOMViewer // starting.. if (classInformationFileName != null) { String classInfoDestination = outputPrefix + ".cls" + (classInformationFileName.endsWith(".gz") ? ".gz" : ""); FileUtils.copyFile(classInformationFileName, classInfoDestination); } } /** * @see GrowingLayer#mapCompleteDataAfterTraining */ // FIXME: this is just a copy of GrowingLayer#mapCompleteDataAfterTraining, would be good to have some code // re-used.. // FIXME: this would also profit from multi-threading... private void mapCompleteDataAfterTraining(GrowingSOM som, InputData data, SOMLibClassInformation classInfo, ArrayList<String> mappingExceptions, String labelerName, int numLabels) { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Start mapping data."); InputDatum datum = null; Unit winner = null; int numVectors = data.numVectors(); int skippedInstances = 0; for (int i = 0; i < data.numVectors(); i++) { try { InputDatum currentInput = data.getInputDatum(i); String inpLabel = currentInput.getLabel(); if (classInfo != null && mappingExceptions.contains(classInfo.getClassName(inpLabel))) { skippedInstances++; } } catch (SOMLibFileFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (mappingExceptions.size() > 0) { Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Skipping classes: " + mappingExceptions + ", containing a total of " + skippedInstances + " inputs."); } StdErrProgressWriter progressWriter = new StdErrProgressWriter(numVectors - skippedInstances, "Mapping datum ", 50); L2Metric metric = new L2Metric(); for (int i = 0; i < numVectors; i++) { datum = data.getInputDatum(i); String inpLabel = datum.getLabel(); try { if (classInfo != null && mappingExceptions.contains(classInfo.getClassName(inpLabel))) { continue; // Skips this mapping step } else { winner = som.getLayer().getWinner(datum, metric); winner.addMappedInput(datum, false); // TODO: think about recursion progressWriter.progress(); } } catch (SOMLibFileFormatException e) { // TODO Auto-generated catch block Logger.getLogger("at.tuwien.ifs.somtoolbox").info("This should never happen"); e.printStackTrace(); } } Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Finished mapping data."); som.getLayer().calculateQuantizationErrorForUnits(); som.getLayer().clearLabels(); Labeler labeler = null; if (labelerName != null) { // if labeling then label try { labeler = AbstractLabeler.instantiate(labelerName); Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Instantiated labeler " + labelerName); } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe( "Could not instantiate labeler \"" + labelerName + "\"."); System.exit(-1); } } if (labelerName != null) { // if labeling then label labeler.label(som, data, numLabels); } } }