/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.apps.helper; import java.io.File; import java.io.IOException; import java.util.LinkedList; import java.util.List; import java.util.logging.Logger; import cern.colt.matrix.DoubleMatrix1D; import cern.colt.matrix.impl.DenseDoubleMatrix1D; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import at.tuwien.ifs.somtoolbox.apps.SOMToolboxApp; import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory; import at.tuwien.ifs.somtoolbox.data.InputData; import at.tuwien.ifs.somtoolbox.data.InputDataFactory; import at.tuwien.ifs.somtoolbox.data.InputDataWriter; import at.tuwien.ifs.somtoolbox.data.InputDatum; import at.tuwien.ifs.somtoolbox.data.SOMLibSparseInputData; import at.tuwien.ifs.somtoolbox.data.SOMLibTemplateVector; import at.tuwien.ifs.somtoolbox.util.VectorTools; /** * Merges two vector files. * * @author Jakob Frank * @version $Id: VectorFileMerger.java 3846 2010-10-11 14:49:19Z frank $ */ public class VectorFileMerger implements SOMToolboxApp { public static final Parameter[] OPTIONS = new Parameter[] { OptionFactory.getOptOutputVector(), OptionFactory.getOptInput(), OptionFactory.getOptNormalizeWeights(), OptionFactory.getSwitchWriteTVFile() }; public static String DESCRIPTION = "Merge SOMLibVectorFiles"; public static String LONG_DESCRIPTION = "Merge two or more VectorFiles containing different Features of the same Data into one Vector file"; public static final Type APPLICATION_TYPE = Type.Helper; /** * @param args Command line args. */ public static void main(String[] args) { // TODO: Add option inner-, (left|right|both)outer-join, default is inner JSAPResult config = OptionFactory.parseResults(args, OPTIONS); File[] inFiles = config.getFileArray("input"); File outFile = config.getFile("output"); boolean normalise = true; float w[] = new float[inFiles.length]; if (config.userSpecified("weights")) { for (int i = 0; i < w.length; i++) { w[i] = 1; } float wgts[] = config.getFloatArray("weights"); for (int i = 0; i < Math.min(wgts.length, w.length); i++) { w[i] = wgts[i]; } } else { normalise = false; } Logger log = Logger.getLogger(VectorFileMerger.class.getName()); InputData data[] = new InputData[inFiles.length]; for (int i = 0; i < data.length; i++) { data[i] = InputDataFactory.open(inFiles[i].getAbsolutePath()); } List<InputDatum> idList = new LinkedList<InputDatum>(); log.info("Starting merge..."); String[] labels = data[0].getLabels(); for (String label : labels) { InputDatum[] datum = new InputDatum[data.length]; boolean incomplete = false; for (int i = 0; i < datum.length; i++) { datum[i] = data[i].getInputDatum(label); if (datum[i] == null) { incomplete = true; break; } } if (incomplete) { continue; } if (normalise) { for (int i = 0; i < datum.length; i++) { datum[i] = VectorTools.normaliseByLength(datum[i], w[i]); } } DoubleMatrix1D[] vec = new DoubleMatrix1D[datum.length]; int resSize = 0; for (int i = 0; i < vec.length; i++) { vec[i] = datum[i].getVector(); resSize += vec[i].size(); } DoubleMatrix1D res = new DenseDoubleMatrix1D(resSize); int offset = 0; for (DoubleMatrix1D element : vec) { for (int j = 0; j < element.size(); j++) { res.setQuick(offset + j, element.get(j)); } offset += element.size(); } InputDatum id = new InputDatum(label, res); idList.add(id); } log.info("Merge finished. Writing result."); SOMLibSparseInputData id = new SOMLibSparseInputData(idList.toArray(new InputDatum[] {}), null); try { InputDataWriter.writeAsSOMLib(id, outFile.getAbsolutePath()); } catch (IOException e) { e.printStackTrace(); } log.info("Vector written."); if (config.getBoolean("writeTV")) { log.info("Generating TemplateVector"); try { String[] tvAttr = new String[id.dim()]; int offset = 0; for (InputData element : data) { for (int j = 0; j < element.dim(); j++) { tvAttr[offset + j] = element.getContentSubType() + "_" + j; } offset += element.dim(); } SOMLibTemplateVector tv = new SOMLibTemplateVector(id.numVectors(), tvAttr); InputDataWriter.writeAsSOMLib(tv, outFile + ".tv"); } catch (IOException e) { e.printStackTrace(); } } log.info("Done"); } }