/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.apps.helper; import java.io.IOException; import java.io.PrintWriter; import org.apache.commons.lang.StringUtils; import com.martiansoftware.jsap.JSAPResult; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory; import at.tuwien.ifs.somtoolbox.data.InputData; import at.tuwien.ifs.somtoolbox.data.SOMLibSparseInputData; import at.tuwien.ifs.somtoolbox.data.distance.InputVectorDistanceMatrix; import at.tuwien.ifs.somtoolbox.data.distance.LeightWeightMemoryInputVectorDistanceMatrix; import at.tuwien.ifs.somtoolbox.layers.metrics.AbstractMetric; import at.tuwien.ifs.somtoolbox.layers.metrics.DistanceMetric; import at.tuwien.ifs.somtoolbox.util.FileUtils; import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter; /** * Writes the nearest/most similar vectors for a given data set. * * @author Rudolf Mayer * @version $Id: VectorSimilarityWriter.java 3704 2010-07-20 10:42:42Z mayer $ */ public class VectorSimilarityWriter { public static void main(String[] args) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, SOMToolboxException { JSAPResult config = OptionFactory.parseResults(args, OptionFactory.OPTIONS_INPUT_SIMILARITY_COMPUTER); String inputVectorDistanceMatrix = config.getString("inputVectorDistanceMatrix"); String inputVectorFileName = config.getString("inputVectorFile"); int numNeighbours = config.getInt("numberNeighbours"); String outputFormat = config.getString("outputFormat"); InputVectorDistanceMatrix matrix = null; InputData data = new SOMLibSparseInputData(inputVectorFileName); if (StringUtils.isNotBlank(inputVectorDistanceMatrix)) { matrix = InputVectorDistanceMatrix.initFromFile(inputVectorDistanceMatrix); } else { String metricName = config.getString("metric"); DistanceMetric metric = AbstractMetric.instantiate(metricName); matrix = new LeightWeightMemoryInputVectorDistanceMatrix(data, metric); } String outputFileName = config.getString("output"); PrintWriter w = FileUtils.openFileForWriting("Similarity File", outputFileName); if (outputFormat.equals("SAT-DB")) { // find feature type String type = ""; if (inputVectorFileName.endsWith(".rh") || inputVectorFileName.endsWith(".rp") || inputVectorFileName.endsWith(".ssd")) { type = "_" + inputVectorFileName.substring(inputVectorFileName.lastIndexOf(".") + 1); } w.println("INSERT INTO `sat_track_similarity_ifs" + type + "` (`TRACKID`, `SIMILARITYCOUNT`, `SIMILARITYIDS`) VALUES "); } int numVectors = matrix.numVectors(); // numVectors = 10; // for testing StdErrProgressWriter progress = new StdErrProgressWriter(numVectors, "Writing similarities for vector ", 1); for (int i = 0; i < numVectors; i++) { int[] nearest = matrix.getNNearest(i, numNeighbours); if (outputFormat.equals("SAT-DB")) { w.print(" (" + i + " , NULL, '"); for (int j = 0; j < nearest.length; j++) { String label = data.getLabel(nearest[j]); w.print(label.replace(".mp3", "")); // strip ending if (j + 1 < nearest.length) { w.print(","); } else { w.print("')"); } } if (i + 1 < numVectors) { w.print(","); } } else { w.print(data.getLabel(i) + ","); for (int j = 0; j < nearest.length; j++) { w.print(data.getLabel(nearest[j])); if (j + 1 < nearest.length) { w.print(","); } } } w.println(); w.flush(); progress.progress(); } if (outputFormat.equals("SAT-DB")) { w.print(";"); } w.flush(); w.close(); } }