/* * This file is part of ELKI: * Environment for Developing KDD-Applications Supported by Index-Structures * * Copyright (C) 2017 * ELKI Development Team * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.lmu.ifi.dbs.elki.result; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.util.List; import de.lmu.ifi.dbs.elki.data.Cluster; import de.lmu.ifi.dbs.elki.data.Clustering; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.datasource.parser.ClusteringVectorParser; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.It; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.StringParameter; /** * <p> * Class to output a clustering result in a simple and compact ascii format: * whitespace separated cluster indexes * </p> * * This format can be read using {@link ClusteringVectorParser} for meta * analysis, or read as clustering using * {@link de.lmu.ifi.dbs.elki.algorithm.clustering.meta.ExternalClustering}. * * @author Erich Schubert * @since 0.7.0 */ public class ClusteringVectorDumper implements ResultHandler { /** * Class logger. */ private static final Logging LOG = Logging.getLogger(ClusteringVectorDumper.class); /** * Output file. */ private File outputFile; /** * Optional label to force for this output. */ private String forceLabel; /** * Always append to the output file. */ private boolean append; /** * Constructor. * * @param outputFile Output file * @param append Append to output file (overwrite otherwise). * @param forceLabel Forced label to use for the output, may be {@code null}. */ public ClusteringVectorDumper(File outputFile, boolean append, String forceLabel) { super(); this.outputFile = outputFile; this.forceLabel = forceLabel; this.append = append; } /** * Constructor. * * @param outputFile Output file * @param append Append to output file (overwrite otherwise). */ public ClusteringVectorDumper(File outputFile, boolean append) { this(outputFile, append, null); } @Override public void processNewResult(ResultHierarchy hier, Result newResult) { List<Clustering<?>> cs = Clustering.getClusteringResults(newResult); if(cs.isEmpty()) { return; } if(forceLabel != null && forceLabel.length() > 0 && cs.size() > 1) { LOG.warning("Found more than one clustering result, they will have the same (forced) label."); } // Open output stream - or use stdout. if(outputFile != null) { try (FileOutputStream os = new FileOutputStream(outputFile, append); // PrintStream writer = new PrintStream(os)) { // TODO: dump settings, too? for(Clustering<?> c : cs) { dumpClusteringOutput(writer, hier, c); } append = true; // Append future results. } catch(IOException e) { LOG.exception("Error writing to output stream.", e); } } else { for(Clustering<?> c : cs) { dumpClusteringOutput(System.out, hier, c); } } } /** * Dump a single clustering result. * * @param writer Output writer * @param hierarchy Cluster hierarchy to process * @param c Clustering result */ protected void dumpClusteringOutput(PrintStream writer, ResultHierarchy hierarchy, Clustering<?> c) { DBIDRange ids = null; for(It<Relation<?>> iter = hierarchy.iterParents(c).filter(Relation.class); iter.valid(); iter.advance()) { DBIDs pids = iter.get().getDBIDs(); if(pids instanceof DBIDRange) { ids = (DBIDRange) pids; break; } LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass()); } // Fallback: try to locate a database. if(ids == null) { for(It<Database> iter = hierarchy.iterAll().filter(Database.class); iter.valid(); iter.advance()) { DBIDs pids = iter.get().getRelation(TypeUtil.ANY).getDBIDs(); if(pids instanceof DBIDRange) { ids = (DBIDRange) pids; break; } LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass()); } } if(ids == null) { LOG.warning("Cannot dump cluster assignment, as I do not have a well-defined DBIDRange to use for a unique column assignment. DBIDs must be a continuous range."); return; } WritableIntegerDataStore map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP); int cnum = 0; for(Cluster<?> clu : c.getAllClusters()) { for(DBIDIter iter = clu.getIDs().iter(); iter.valid(); iter.advance()) { map.putInt(iter, cnum); } ++cnum; } for(DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) { if(iter.getOffset() > 0) { writer.append(' '); } writer.append(Integer.toString(map.intValue(iter))); } if(forceLabel != null) { if(forceLabel.length() > 0) { writer.append(' ').append(forceLabel); } } else { writer.append(' ').append(c.getLongName()); } writer.append('\n'); } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { /** * Output file name parameter. */ public static final OptionID OUT_ID = new OptionID("clustering.output", "Output file name. When not given, the result will be written to stdout."); /** * Append flag. */ public static final OptionID APPEND_ID = new OptionID("clustering.output.append", "Always append to the output file."); /** * Force label parameter. */ public static final OptionID FORCE_LABEL_ID = new OptionID("clustering.label", "Parameter to override the clustering label, mostly to give a more descriptive label."); /** * Output file. */ private File outputFile = null; /** * Optional label to force for this output. */ private String forceLabel; /** * Always append to the output file. */ private boolean append; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); FileParameter outputP = new FileParameter(OUT_ID, FileParameter.FileType.OUTPUT_FILE) // .setOptional(true); if(config.grab(outputP)) { outputFile = outputP.getValue(); } Flag appendF = new Flag(APPEND_ID); if(config.grab(appendF)) { append = appendF.isTrue(); } StringParameter labelP = new StringParameter(FORCE_LABEL_ID) // .setOptional(true); if(config.grab(labelP)) { forceLabel = labelP.getValue(); } } @Override protected ClusteringVectorDumper makeInstance() { return new ClusteringVectorDumper(outputFile, append, forceLabel); } } }