/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.output; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintStream; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Stack; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.logging.Logger; import java.util.zip.GZIPOutputStream; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.data.InputData; import at.tuwien.ifs.somtoolbox.data.InputDatum; import at.tuwien.ifs.somtoolbox.input.SOMLibDataWinnerMapping; import at.tuwien.ifs.somtoolbox.input.SOMLibMapDescription; import at.tuwien.ifs.somtoolbox.layers.GrowingLayer; import at.tuwien.ifs.somtoolbox.layers.LayerAccessException; import at.tuwien.ifs.somtoolbox.layers.Unit; import at.tuwien.ifs.somtoolbox.layers.Layer.GridLayout; import at.tuwien.ifs.somtoolbox.layers.Layer.GridTopology; import at.tuwien.ifs.somtoolbox.layers.metrics.L2Metric; import at.tuwien.ifs.somtoolbox.layers.quality.AbstractQualityMeasure; import at.tuwien.ifs.somtoolbox.layers.quality.QualityMeasureNotFoundException; import at.tuwien.ifs.somtoolbox.models.GHSOM; import at.tuwien.ifs.somtoolbox.models.GrowingSOM; import at.tuwien.ifs.somtoolbox.properties.FileProperties; import at.tuwien.ifs.somtoolbox.properties.SOMProperties; import at.tuwien.ifs.somtoolbox.util.FileUtils; import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter; import at.tuwien.ifs.somtoolbox.util.StringUtils; import at.tuwien.ifs.somtoolbox.util.VectorTools; import at.tuwien.ifs.somtoolbox.util.comparables.UnitDistance; /** * Gathers methods to write trained maps in the SOMLib file format * * @author Michael Dittenbach * @version $Id: SOMLibMapOutputter.java 3981 2010-12-16 17:32:27Z mayer $ */ public class SOMLibMapOutputter { /** * Writes a SOM (or SOM-like) structure to SOMLib format. Usually writes a Map Description File, Weight Vector File * and Unit Description File. * * @param gsom The growing SOM to be written. * @param fDir Directory where to write the files to. * @param fName Filename without suffix. Usually the name of the training run. * @param gzipped Determines whether the written files should be gzipped or not. A ".gz" suffix is * automatically appended. */ public static void write(GrowingSOM gsom, String fDir, String fName, boolean gzipped, SOMProperties somProps, FileProperties fileProps) throws IOException { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Start saving SOMLib output files"); writeWeightVectorFile(gsom, fDir, fName, gzipped); writeUnitDescriptionFile(gsom, fDir, fName, gzipped); writeMapDescriptionFile(gsom, fDir, fName, somProps, fileProps); if (gsom.getLayer().getVirtualLayer() != null) { // write adaptive coordinates file gsom.getLayer().getVirtualLayer().writeToFile(fDir, fName); } Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Finished saving SOMLib output files"); } public static void write(GHSOM ghsom, String fDir, String fName, boolean gzipped, SOMProperties somProps, FileProperties fileProps) throws IOException { write(ghsom.topLayerMap(), fDir, fName, gzipped, somProps, fileProps); } /** * Writes the Weight Vector File. * * @param gsom The growing SOM to be written. * @param fDir Directory where to write the file to. * @param fName fName Filename without suffix. Usually the name of the training run. * @param gzipped Determines whether the written files should be gzipped or not. A ".gz" suffix is * automatically appended. */ public static void writeWeightVectorFile(GrowingSOM gsom, String fDir, String fName, boolean gzipped, String... extraHeaders) throws IOException { if (!fDir.endsWith(File.separator)) { fDir = fDir + File.separator; } int xDim = gsom.getLayer().getXSize(); int yDim = gsom.getLayer().getYSize(); int zDim = gsom.getLayer().getZSize(); int vecDim = gsom.getLayer().getDim(); String idString = gsom.getLayer().getIdString(); BufferedWriter bw = writeWeightVectorFileHeader(fDir, fName, gzipped, xDim, yDim, zDim, gsom.getLayer().getGridLayout(), gsom.getLayer().getGridTopology(), vecDim, idString, extraHeaders); int numUnits = xDim * yDim * zDim; StdErrProgressWriter progressWriter = new StdErrProgressWriter(numUnits, "Writing weight vector of unit "); // int unitsSoFar = 0; for (int k = 0; k < zDim; k++) { for (int j = 0; j < yDim; j++) { for (int i = 0; i < xDim; i++) { progressWriter.progress(); // "Writing weight vector of unit " + i + "/" + j + "/" + k + ", ", // ++unitsSoFar); Unit u = null; try { u = gsom.getLayer().getUnit(i, j, k); } catch (LayerAccessException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage()); System.exit(-1); } // adapted to menmonic (sparse) SOMs if (u != null) { if (u.getMappedSOM() != null) { writeWeightVectorFile(u.getMappedSOM(), fDir, fName, gzipped); } for (int ve = 0; ve < vecDim; ve++) { // bw.write(form.format(u.weightVector()[ve])+" "); bw.write(u.getWeightVector()[ve] + " "); } } else { bw.write("null "); } bw.write("SOM_MAP_" + fName + "_(" + i + "/" + j + "/" + k + ")"); bw.newLine(); } } } bw.close(); } /** * Writes the Weight Vector File from given weight values, used e.g. when interpolating a SOM. */ public static void writeWeightVectorFile(double[][][][] weights, GridLayout gridLayout, GridTopology gridTopology, String fDir, String fName, boolean gzipped) throws IOException { int xDim = weights.length; int yDim = weights[0].length; int zDim = weights[0][0].length; int vecDim = weights[0][0][0].length; String idString = ""; BufferedWriter bw = writeWeightVectorFileHeader(fDir, fName, gzipped, xDim, yDim, zDim, gridLayout, gridTopology, vecDim, idString); int numUnits = xDim * yDim * zDim; StdErrProgressWriter progressWriter = new StdErrProgressWriter(numUnits, "Writing weight vector of unit "); // int writtenSoFar = 0; for (int k = 0; k < zDim; k++) { for (int j = 0; j < yDim; j++) { for (int i = 0; i < xDim; i++) { progressWriter.progress();// "Writing weight vector of unit " + i + "/" + j + "/" + k + ", ", // ++writtenSoFar); for (int ve = 0; ve < vecDim; ve++) { bw.write(weights[i][j][k][ve] + " "); } bw.write("SOM_MAP_" + fName + "_(" + i + "/" + j + "/" + k + ")"); bw.newLine(); } } } bw.close(); } /** Writes the header part of the weight vector file. */ protected static BufferedWriter writeWeightVectorFileHeader(String fDir, String fName, boolean gzipped, int xDim, int yDim, int zDim, GridLayout gridLayout, GridTopology gridTopology, int vecDim, String idString, String... extraHeaders) throws IOException, FileNotFoundException { BufferedWriter bw = null; String finalName = getWeightVectorFileName(fDir, fName, idString, gzipped); if (gzipped == true) { bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(finalName)))); } else { bw = new BufferedWriter(new FileWriter(finalName)); } Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Saving SOMLib weight vector file" + finalName + " (" + new File(finalName).getAbsolutePath() + ")"); bw.write("$TYPE som"); bw.newLine(); bw.write(SOMLibMapDescription.GRID_LAYOUT + " " + gridLayout.toString()); bw.newLine(); bw.write(SOMLibMapDescription.GRID_TOPOLOGY + " " + gridTopology.toString()); bw.newLine(); bw.write("$XDIM " + xDim); bw.newLine(); bw.write("$YDIM " + yDim); bw.newLine(); bw.write("$ZDIM " + zDim); bw.newLine(); bw.write("$VEC_DIM " + vecDim); bw.newLine(); for (String header : extraHeaders) { bw.write(header); bw.newLine(); } return bw; } private static BufferedWriter openAndWriteUnitFileHeader(String fDir, String fName, String id, int xSize, int ySize, int zSize, GridLayout gridLayout, GridTopology gridTopology, boolean gzipped) throws IOException { BufferedWriter bw = null; String finalName = getUnitDescriptionFileName(fDir, fName, id, gzipped); if (gzipped == true) { bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(finalName)))); } else { bw = new BufferedWriter(new FileWriter(finalName)); } Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Saving SOMLib unit description file " + finalName + " (" + new File(finalName).getAbsolutePath() + ")"); bw.write("$TYPE som"); bw.newLine(); bw.write(SOMLibMapDescription.GRID_LAYOUT + " " + gridLayout.toString()); bw.newLine(); bw.write(SOMLibMapDescription.GRID_TOPOLOGY + " " + gridTopology.toString()); bw.newLine(); bw.write("$FILE_FORMAT_VERSION 1.2"); // added by frank, because of 3d-som bw.newLine(); // added by frank bw.write("$XDIM " + xSize); bw.newLine(); bw.write("$YDIM " + ySize); bw.newLine(); // Important: older versions of the SOMToolbox have pretty stupid reader implementations, and will fail if // there's a $ZDIM // thus, write ZDIM only if relevant, i.e. > 1; additionally, this also saves space in the output file if (zSize > 1) { bw.write("$ZDIM " + zSize); bw.newLine(); } return bw; } /** * Writes the Unit Description File. * * @param gsom The growing SOM to be written. * @param fDir Directory where to write the file to. * @param fName fName Filename without suffix. Usually the name of the training run. * @param gzipped Determines whether the written files should be gzipped or not. A ".gz" suffix is * automatically appended. */ public static void writeUnitDescriptionFile(GrowingSOM gsom, String fDir, String fName, boolean gzipped) throws IOException { BufferedWriter bw = openAndWriteUnitFileHeader(fDir, fName, gsom.getLayer().getIdString(), gsom.getLayer().getXSize(), gsom.getLayer().getYSize(), gsom.getLayer().getZSize(), gsom.getLayer().getGridLayout(), gsom.getLayer().getGridTopology(), gzipped); for (int k = 0; k < gsom.getLayer().getZSize(); k++) { for (int j = 0; j < gsom.getLayer().getYSize(); j++) { for (int i = 0; i < gsom.getLayer().getXSize(); i++) { Unit u = null; try { u = gsom.getLayer().getUnit(i, j, k); } catch (LayerAccessException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage()); System.exit(-1); } // adapted to mnemonic (sparse) SOMs if (u != null) { writeUnitInfoBegin(fName, bw, j, i, k, gsom.getLayer().getZSize()); if (u.getLayer().getQualityMeasure() != null) { if (u.getLayer().getQualityMeasure().getClass().getName().equals( "at.tuwien.ifs.somtoolbox.layers.quality.QuantizationError")) { try { bw.write("$QUANTERROR_UNIT " + u.getLayer().getQualityMeasure().getUnitQualities("qe")[u.getXPos()][u.getYPos()]); bw.newLine(); bw.write("$QUANTERROR_UNIT_AVG " + u.getLayer().getQualityMeasure().getUnitQualities("mqe")[u.getXPos()][u.getYPos()]); bw.newLine(); } catch (QualityMeasureNotFoundException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe( e.getMessage() + " Aborting. BTW: the must be a major flaw" + "in the quality measure class that has been used."); System.exit(-1); } } } // we also want to write $NR_VEC_MAPPED 0 if there are no vectors. bw.write("$NR_VEC_MAPPED " + u.getNumberOfMappedInputs()); bw.newLine(); if (u.getNumberOfMappedInputs() > 0) { writeMappedVectors(bw, u.getMappedInputNames(), u.getMappedInputDistances()); } if (u.getMappedSOM() != null) { bw.write("$NR_SOMS_MAPPED 1"); bw.newLine(); bw.write("$URL_MAPPED_SOMS " + fName + u.getMappedSOM().getLayer().getIdString()); // TODO: // removed // +suffix bw.newLine(); writeUnitDescriptionFile(u.getMappedSOM(), fDir, fName, gzipped); } if (u.getLabels() != null) { bw.write("$NR_UNIT_LABELS " + u.getLabels().length); bw.newLine(); bw.write("$UNIT_LABELS"); for (int l = 0; l < u.getLabels().length; l++) { bw.write(" " + u.getLabels()[l].getName()); } bw.newLine(); bw.write("$UNIT_LABELS_QE"); for (int l = 0; l < u.getLabels().length; l++) { bw.write(" " + u.getLabels()[l].getQe()); } bw.newLine(); bw.write("$UNIT_LABELS_WGT"); for (int l = 0; l < u.getLabels().length; l++) { bw.write(" " + u.getLabels()[l].getValue()); } bw.newLine(); } } } } } bw.close(); } private static void writeUnitInfoBegin(String fName, BufferedWriter bw, int j, int i, int k, int zSize) throws IOException { bw.write("$POS_X " + i); bw.newLine(); bw.write("$POS_Y " + j); bw.newLine(); // Important: older versions of the SOMToolbox have pretty stupid reader implementations, and will fail if // there's a $POS_Z // thus, write $POS_Z only if relevant, i.e. > 1; additionally, this also saves space in the output file if (zSize > 1) { bw.write("$POS_Z " + k); bw.newLine(); bw.write("$UNIT_ID " + fName + "_(" + i + "/" + j + "/" + k + ")"); } else { bw.write("$UNIT_ID " + fName + "_(" + i + "/" + j + ")"); } bw.newLine(); } private static void writeMappedVectors(BufferedWriter bw, String[] datalabels, double[] dataDistances) throws IOException { bw.write("$MAPPED_VECS"); bw.newLine(); // added by lidy for (String datalabel : datalabels) { bw.write(datalabel); // changed by lidy in order to support spaces in filenames bw.newLine(); // added by lidy in order to support spaces in filenames } bw.write("$MAPPED_VECS_DIST"); for (double dataDistance : dataDistances) { bw.write(" " + dataDistance); } bw.newLine(); } /** Method to write a unit description file of an empty map. */ public static void writeUnitDescriptionFile(int xdim, int ydim, GridLayout gridLayout, GridTopology gridTopology, String fDir, String fName, boolean gzipped) throws IOException { BufferedWriter bw = openAndWriteUnitFileHeader(fDir, fName, null, xdim, ydim, 1, gridLayout, gridTopology, gzipped); for (int j = 0; j < ydim; j++) { for (int i = 0; i < xdim; i++) { writeUnitInfoBegin(fName, bw, j, i, 1, 1); bw.write("$NR_VEC_MAPPED " + 0); bw.newLine(); } } bw.close(); } public static void writeUnitDescriptionFile(String[][][] labels, GridLayout gridLayout, GridTopology gridTopology, String fDir, String fName, boolean gzipped) throws IOException { BufferedWriter bw = openAndWriteUnitFileHeader(fDir, fName, null, labels.length, labels[0].length, 1, gridLayout, gridTopology, gzipped); for (int i = 0; i < labels.length; i++) { for (int j = 0; j < labels[i].length; j++) { writeUnitInfoBegin(fName, bw, j, i, 1, 1); bw.write("$NR_VEC_MAPPED " + labels[i][j].length); bw.newLine(); writeMappedVectors(bw, labels[i][j], VectorTools.generateVectorWithValue(labels[i][j].length, 0)); } } bw.close(); } public static void writeMapDescriptionFile(GrowingSOM topLevelMap, String fDir, String fName, SOMProperties somProps, FileProperties fileProps) throws IOException { Stack<GrowingSOM> maps = new Stack<GrowingSOM>(); // store all maps to process maps.add(topLevelMap); // start from first map while (maps.size() > 0) { // process all remaining maps GrowingSOM gsom = maps.pop(); maps.addAll(gsom.getLayer().getAllSubMaps()); String idString = gsom.getLayer().getIdString(); String finalName = getMapDescriptionFileName(fDir, fName, idString, false); Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Saving SOMLib map description file " + finalName + " (" + new File(finalName).getAbsolutePath() + ")"); SOMLibMapDescription mapDescription = new SOMLibMapDescription(); final InputData inputData = gsom.getSharedInputObjects().getInputData(); try { // TODO in the future: check if GHSOM, Growing Grid, Mnemomic SOM, ... mapDescription.setProperty(SOMLibMapDescription.TYPE, "som"); mapDescription.setProperty(SOMLibMapDescription.GRID_LAYOUT, gsom.getLayer().getGridLayout().toString()); mapDescription.setProperty(SOMLibMapDescription.GRID_TOPOLOGY, gsom.getLayer().getGridTopology().toString()); mapDescription.setProperty(SOMLibMapDescription.X_DIM, gsom.getLayer().getXSize()); mapDescription.setProperty(SOMLibMapDescription.Y_DIM, gsom.getLayer().getYSize()); mapDescription.setProperty(SOMLibMapDescription.Z_DIM, gsom.getLayer().getZSize()); mapDescription.setProperty(SOMLibMapDescription.VEC_DIM, gsom.getLayer().getDim()); mapDescription.setProperty(SOMLibMapDescription.STORAGE_DATE, new SimpleDateFormat().format(new Date())); mapDescription.setProperty(SOMLibMapDescription.TRAINING_TIME, ""); // TODO mapDescription.setProperty(SOMLibMapDescription.LEARNRATE_TYPE, "exponential");// TODO: detailed formula mapDescription.setProperty(SOMLibMapDescription.LEARNRATE_INIT, somProps.learnrate()); mapDescription.setProperty(SOMLibMapDescription.NEIGHBOURHOOD_TYPE, "exponential");// TODO: detailed // formula mapDescription.setProperty(SOMLibMapDescription.NEIGHBOURHOOD_INIT, somProps.sigma()); mapDescription.setProperty(SOMLibMapDescription.RANDOM_INIT, somProps.randomSeed()); mapDescription.setProperty(SOMLibMapDescription.TOTAL_ITERATIONS, somProps.trainedIterations(inputData.numVectors())); mapDescription.setProperty(SOMLibMapDescription.TOTAL_TRAINING_VECTORS, inputData.numVectors()); if (somProps.adaptiveCoordinatesTreshold() != null) { mapDescription.setProperty(SOMLibMapDescription.ADAPTIVE_COORDINATES_THRESHOLD, somProps.adaptiveCoordinatesTreshold()); } mapDescription.setProperty(SOMLibMapDescription.VECTORS_NORMALISED, inputData.isNormalizedToUnitLength()); try { // FIXME: more generic file format supporting different measures than mqe String[] growthQM = AbstractQualityMeasure.splitNameAndMethod(somProps.growthQualityMeasureName()); final double mapQuality = gsom.getLayer().getQualityMeasure().getMapQuality(growthQM[1]); mapDescription.setProperty(SOMLibMapDescription.QUANT_ERROR_MAP, mapQuality); mapDescription.setProperty(SOMLibMapDescription.QUANT_ERROR_VECTOR, mapQuality / inputData.numVectors()); } catch (QualityMeasureNotFoundException e) { mapDescription.setProperty(SOMLibMapDescription.QUANT_ERROR_MAP, "<error receiving value: " + e.getMessage() + ">"); mapDescription.setProperty(SOMLibMapDescription.QUANT_ERROR_VECTOR, "<error receiving value: " + e.getMessage() + ">");// TODO e.printStackTrace(); } mapDescription.setProperty(SOMLibMapDescription.URL_TRAINING_VECTOR, fileProps.vectorFileName(true)); mapDescription.setProperty(SOMLibMapDescription.URL_TRAINING_VECTOR_DESCRIPTION, "");// TODO String contentType = inputData.getContentType(); if (org.apache.commons.lang.StringUtils.isNotBlank(inputData.getContentSubType())) { contentType += "-" + inputData.getContentSubType(); } mapDescription.setProperty(SOMLibMapDescription.DATA_TYPE, contentType); if (inputData.getFeatureMatrixColumns() != -1 && inputData.getFeatureMatrixRows() != -1) { mapDescription.setProperty(SOMLibMapDescription.DATA_DIM, inputData.getFeatureMatrixColumns() + "x" + inputData.getFeatureMatrixRows()); } mapDescription.setProperty(SOMLibMapDescription.URL_WEIGHT_VECTOR, getWeightVectorFileName(fDir, fName, idString, false)); mapDescription.setProperty(SOMLibMapDescription.URL_QUANT_ERROR_MAP, "");// TODO mapDescription.setProperty(SOMLibMapDescription.URL_MAPPED_INPUT_VECTOR, "");// TODO mapDescription.setProperty(SOMLibMapDescription.URL_MAPPED_INPUT_VECTOR_DESCRIPTION, "");// TODO mapDescription.setProperty(SOMLibMapDescription.URL_UNIT_DESCRIPTION, getUnitDescriptionFileName(fDir, fName, idString, false)); mapDescription.setProperty(SOMLibMapDescription.URL_TEMPLATE_VECTOR, fileProps.templateFileName(true)); mapDescription.setProperty(SOMLibMapDescription.URL_DATA_WINNER_MAPPING, getDataWinnerMappingFileName( fDir, fName, gsom.getLayer().getIdString(), false)); mapDescription.setProperty(SOMLibMapDescription.URL_CLASS_INFO, ""); // TODO mapDescription.setProperty(SOMLibMapDescription.METRIC, gsom.getLayer().getMetric().getClass().getName()); mapDescription.setProperty(SOMLibMapDescription.LAYER_REVISION, gsom.getLayer().getRevision()); mapDescription.setProperty(SOMLibMapDescription.DESCRIPTION, "");// TODO mapDescription.writeMapDescriptionFile(finalName); maps.addAll(gsom.getLayer().getAllSubMaps()); } catch (SOMToolboxException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } } /** * Simple SOMLib Map description file writer, if you don't have a {@link GrowingSOM} object at hand, e.g. when * converting maps. */ public static void writeMapDescriptionFile(int xSize, int ySize, int zSize, int dim, int numVectors, String fDir, String fName) throws IOException { String idString = ""; String finalName = getMapDescriptionFileName(fDir, fName, idString, false); Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Saving SOMLib map description file " + finalName + " (" + new File(finalName).getAbsolutePath() + ")"); try { SOMLibMapDescription mapDescription = new SOMLibMapDescription(); // TODO in the future: check if GHSOM, Growing Grid, Mnemomic SOM, ... mapDescription.setProperty(SOMLibMapDescription.TYPE, "som"); mapDescription.setProperty(SOMLibMapDescription.GRID_LAYOUT, GridLayout.rectangular.toString()); mapDescription.setProperty(SOMLibMapDescription.GRID_TOPOLOGY, GridTopology.planar.toString()); mapDescription.setProperty(SOMLibMapDescription.X_DIM, xSize); mapDescription.setProperty(SOMLibMapDescription.Y_DIM, ySize); mapDescription.setProperty(SOMLibMapDescription.Z_DIM, zSize); mapDescription.setProperty(SOMLibMapDescription.VEC_DIM, dim); mapDescription.setProperty(SOMLibMapDescription.STORAGE_DATE, new SimpleDateFormat().format(new Date())); mapDescription.setProperty(SOMLibMapDescription.TOTAL_TRAINING_VECTORS, numVectors); mapDescription.setProperty(SOMLibMapDescription.URL_WEIGHT_VECTOR, getWeightVectorFileName(fDir, fName, idString, false)); mapDescription.setProperty(SOMLibMapDescription.URL_UNIT_DESCRIPTION, getUnitDescriptionFileName(fDir, fName, idString, false)); mapDescription.setProperty(SOMLibMapDescription.METRIC, new L2Metric().getClass().getName()); mapDescription.writeMapDescriptionFile(finalName); } catch (SOMToolboxException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } public static void writeDataWinnerMappingFile(GHSOM ghsom, InputData data, int numWinners, String fDir, String fName, boolean gzipped) throws IOException { Stack<GrowingSOM> maps = new Stack<GrowingSOM>(); // store all maps to process maps.add(ghsom.topLayerMap()); // start from first map while (maps.size() > 0) { // process all remaining maps GrowingSOM map = maps.pop(); int winners = Math.min(numWinners, map.getLayer().getXSize() * map.getLayer().getYSize()); // use subset of data InputData newData = data.subset(map.getLayer().getSuperUnit().getMappedInputNames()); writeDataWinnerMappingFile(map, newData, winners, fDir, fName, gzipped); maps.addAll(map.getLayer().getAllSubMaps()); } } public static void writeDataWinnerMappingFile(GrowingSOM gsom, InputData data, int numWinners, String fDir, String fName, boolean gzipped) throws IOException { BufferedWriter bw = null; String finalName = getDataWinnerMappingFileName(fDir, fName, gsom.getLayer().getIdString(), gzipped); if (gzipped == true) { bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(finalName)))); } else { bw = new BufferedWriter(new FileWriter(finalName)); } Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Saving SOMLib data winner mapping file" + finalName + " (" + new File(finalName).getAbsolutePath() + ")"); int numVectors = data.numVectors(); int maxWinners = gsom.getLayer().getXSize() * gsom.getLayer().getYSize(); if (numWinners > maxWinners) { numWinners = maxWinners; } final boolean is3DSOM = gsom.getLayer().getZSize() > 1; // FIXME: the header should be independent of whether we write an zSize >0 or not, we should just write an zDim // if we actually do have it // FIXME: add X/Y/ZDIM headers to the file // we don't want to always write the zDim, it just unnecessarily bloats the file size... if (is3DSOM) { bw.write("$FILE_FORMAT_VERSION 1.2"); // added by frank for 3d-SOM support. } else { bw.write("$FILE_FORMAT_VERSION 1.1"); // added by lidy, because of changed fileformat (see below) } bw.newLine(); // added by lidy bw.write("$NUM_WINNERS " + numWinners); bw.newLine(); bw.write("$NUM_VECTORS " + numVectors); bw.newLine(); // added by rudi, also write the metric used bw.write("$METRIC " + gsom.getLayer().getMetric().getClass().getName()); bw.newLine(); UnitDistance[] winners; StdErrProgressWriter progressWriter = new StdErrProgressWriter(numVectors, "Getting winners for datum ", 10); // FIXME: number of threads should be a param int noThreads = Math.min(Runtime.getRuntime().availableProcessors(), 4); // do some "clever" decision whether to use multi-threading or not, i.e. when we have many units, or many // inputs, or both if (noThreads > 1 && gsom.getLayer().getUnitCount() * gsom.getLayer().getDim() > 100 * 100) { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Working with " + noThreads + " parallel threads"); ExecutorService e = Executors.newFixedThreadPool(noThreads); CountDownLatch latch = null; ArrayList<DataWinnerMappingThread> threads = new ArrayList<DataWinnerMappingThread>(noThreads); for (int i = 0; i < noThreads; i++) { threads.add(new DataWinnerMappingThread(gsom.getLayer(), numWinners)); } for (int d = 0; d < numVectors;) { if (d + noThreads > numVectors) { noThreads = numVectors - d; System.out.println("d: " + d + ", capped threads to " + noThreads); } latch = new CountDownLatch(noThreads); for (int i = 0; i < noThreads; i++) { threads.get(i).setData(latch, data.getInputDatum(d++)); e.execute(threads.get(i)); } progressWriter.progress(d); try { latch.await(); // wait for all processes to finish } catch (InterruptedException ie) { ie.printStackTrace(); } for (int i = 0; i < noThreads; i++) { bw.write(threads.get(i).output.toString()); bw.newLine(); } } e.shutdown(); } else { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Working single-threaded"); for (int d = 0; d < numVectors; d++) { progressWriter.progress(); bw.write(data.getInputDatum(d).getLabel()); bw.newLine(); // added by lidy in order to support spaces in filenames winners = gsom.getLayer().getWinnersAndDistances(data.getInputDatum(d), numWinners); // adapted to mnemonic (sparse) SOMs for (int w = 0; w < numWinners; w++) { // was: gsom.getLayer().getUnitCount() bw.write(winners[w].getUnit().printCoordinatesSpaceSeparated() + " " + StringUtils.format(winners[w].getDistance(), 5) + " "); } bw.newLine(); } } bw.close(); } public static void writeDataWinnerMappingFile(SOMLibDataWinnerMapping dwm, String fDir, String fName, boolean gzip) throws IOException { PrintStream bw = null; String finalName = getDataWinnerMappingFileName(fDir, fName, "", gzip); if (gzip == true) { bw = new PrintStream(new GZIPOutputStream(new FileOutputStream(finalName))); } else { bw = new PrintStream(new FileOutputStream(finalName)); } Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Saving SOMLib data winner mapping file" + finalName + " (" + new File(finalName).getAbsolutePath() + ")"); boolean is3DSOM = dwm.is3D(); // FIXME: the header should be independent of whether we write an zSize >0 or not, we should just write an zDim // if we actually do have it // FIXME: add X/Y/ZDIM headers to the file // we don't want to always write the zDim, it just unnecessarily bloats the file size... if (is3DSOM) { bw.println("$FILE_FORMAT_VERSION 1.2"); // added by frank for 3d-SOM support. } else { bw.println("$FILE_FORMAT_VERSION 1.1"); // added by lidy, because of changed fileformat (see below) } bw.println("$NUM_WINNERS " + dwm.getNumBMUs()); bw.println("$NUM_VECTORS " + dwm.getNumVectors()); // added by rudi, also write the metric used bw.println("$METRIC " + dwm.getMetric()); for (String label : dwm.getLabels()) { try { bw.print(label); if (is3DSOM) { bw.println(); } else { bw.print(" "); } int dIndex = dwm.getVectPos(label); int[] xs = dwm.getXPos(dIndex), ys = dwm.getYPos(dIndex), zs = dwm.getZPos(dIndex); double[] dists = dwm.getDists(dIndex); for (int i = 0; i < dists.length; i++) { bw.printf(" %d", xs[i]); bw.printf(" %d", ys[i]); if (is3DSOM) { bw.printf(" %d", zs[i]); } bw.printf(" %f", dists[i]); } bw.println(); } catch (SOMToolboxException e) { throw new IOException(e); } } bw.close(); } static class DataWinnerMappingThread implements Runnable { InputDatum datum; UnitDistance[] winners; GrowingLayer layer; int numWinners; StringBuilder output; CountDownLatch latch; DataWinnerMappingThread(GrowingLayer layer, int numWinners) { this.layer = layer; this.numWinners = numWinners; } void setData(CountDownLatch latch, InputDatum datum) { this.latch = latch; this.datum = datum; output = new StringBuilder(datum.getLabel()).append("\n"); } @Override public void run() { winners = layer.getWinnersAndDistances(datum, numWinners); for (int w = 0; w < numWinners; w++) { // was: gsom.getLayer().getUnitCount() output.append(winners[w].getUnit().printCoordinatesSpaceSeparated()).append(" ").append( StringUtils.format(winners[w].getDistance(), 5)).append(" "); } latch.countDown(); } } protected static String getUnitDescriptionFileName(String fDir, String fName, String idString, boolean gzipped) { return FileUtils.getPathPrefix(fDir) + fName + (idString != null ? idString : "") + FileUtils.getSuffix("unit", gzipped); } protected static String getWeightVectorFileName(String fDir, String fName, String idString, boolean gzipped) { return FileUtils.getPathPrefix(fDir) + fName + idString + FileUtils.getSuffix("wgt", gzipped); } protected static String getDataWinnerMappingFileName(String fDir, String fName, String idString, boolean gzipped) { return FileUtils.getPathPrefix(fDir) + fName + idString + FileUtils.getSuffix("dwm", gzipped); } protected static String getMapDescriptionFileName(String fDir, String fName, String idString, boolean gzipped) { return FileUtils.getPathPrefix(fDir) + fName + idString + FileUtils.getSuffix("map", gzipped); } }