/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.apps.helper; import java.io.File; import java.io.IOException; import com.martiansoftware.jsap.JSAPResult; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory; import at.tuwien.ifs.somtoolbox.data.InputDataWriter; import at.tuwien.ifs.somtoolbox.data.SOMLibClassInformation; import at.tuwien.ifs.somtoolbox.data.SOMLibSparseInputData; /** * This class "cleans" a class info file, i.e. it removes from the class info file instances that are not present in the * input vector. This can happen e.g. with text data, where not all input documents are used in the final map due to * sparsity reasons. * * @author Rudolf Mayer * @version $Id: ClassInfoFileCleaner.java 3589 2010-05-21 10:42:01Z mayer $ */ public class ClassInfoFileCleaner { public static void main(String[] args) throws IOException, SOMToolboxException { // register and parse all options for the JSAPResult config = OptionFactory.parseResults(args, OptionFactory.getOptInputVectorFile(true), OptionFactory.getOptClassInformationFile(true), OptionFactory.getOptOutputFileName(true), OptionFactory.getOptOutputDirectory(false)); String vectorFileName = config.getString("inputVectorFile"); String classInfoFile = config.getString("classInformationFile"); String outputDir = config.getString("outputDirectory", "."); String outputFileName = config.getString("output"); SOMLibSparseInputData inputData = new SOMLibSparseInputData(vectorFileName); SOMLibClassInformation classInfo = new SOMLibClassInformation(classInfoFile); classInfo.removeNotPresentElements(inputData); InputDataWriter.writeAsSOMLib(classInfo, outputDir + File.separator + outputFileName); } }