ClusteringVectorDumper.java example

Explorer
elki-master
/*
 * This file is part of ELKI:
 * Environment for Developing KDD-Applications Supported by Index-Structures
 *
 * Copyright (C) 2017
 * ELKI Development Team
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package de.lmu.ifi.dbs.elki.result;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.List;

import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.datasource.parser.ClusteringVectorParser;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.It;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.StringParameter;

/**
 * <p>
 * Class to output a clustering result in a simple and compact ascii format:
 * whitespace separated cluster indexes
 * </p>
 * 
 * This format can be read using {@link ClusteringVectorParser} for meta
 * analysis, or read as clustering using
 * {@link de.lmu.ifi.dbs.elki.algorithm.clustering.meta.ExternalClustering}.
 * 
 * @author Erich Schubert
 * @since 0.7.0
 */
public class ClusteringVectorDumper implements ResultHandler {
  /**
   * Class logger.
   */
  private static final Logging LOG = Logging.getLogger(ClusteringVectorDumper.class);

  /**
   * Output file.
   */
  private File outputFile;

  /**
   * Optional label to force for this output.
   */
  private String forceLabel;

  /**
   * Always append to the output file.
   */
  private boolean append;

  /**
   * Constructor.
   * 
   * @param outputFile Output file
   * @param append Append to output file (overwrite otherwise).
   * @param forceLabel Forced label to use for the output, may be {@code null}.
   */
  public ClusteringVectorDumper(File outputFile, boolean append, String forceLabel) {
    super();
    this.outputFile = outputFile;
    this.forceLabel = forceLabel;
    this.append = append;
  }

  /**
   * Constructor.
   * 
   * @param outputFile Output file
   * @param append Append to output file (overwrite otherwise).
   */
  public ClusteringVectorDumper(File outputFile, boolean append) {
    this(outputFile, append, null);
  }

  @Override
  public void processNewResult(ResultHierarchy hier, Result newResult) {
    List<Clustering<?>> cs = Clustering.getClusteringResults(newResult);
    if(cs.isEmpty()) {
      return;
    }
    if(forceLabel != null && forceLabel.length() > 0 && cs.size() > 1) {
      LOG.warning("Found more than one clustering result, they will have the same (forced) label.");
    }

    // Open output stream - or use stdout.
    if(outputFile != null) {
      try (FileOutputStream os = new FileOutputStream(outputFile, append); //
          PrintStream writer = new PrintStream(os)) {
        // TODO: dump settings, too?
        for(Clustering<?> c : cs) {
          dumpClusteringOutput(writer, hier, c);
        }
        append = true; // Append future results.
      }
      catch(IOException e) {
        LOG.exception("Error writing to output stream.", e);
      }
    }
    else {
      for(Clustering<?> c : cs) {
        dumpClusteringOutput(System.out, hier, c);
      }
    }
  }

  /**
   * Dump a single clustering result.
   * 
   * @param writer Output writer
   * @param hierarchy Cluster hierarchy to process
   * @param c Clustering result
   */
  protected void dumpClusteringOutput(PrintStream writer, ResultHierarchy hierarchy, Clustering<?> c) {
    DBIDRange ids = null;
    for(It<Relation<?>> iter = hierarchy.iterParents(c).filter(Relation.class); iter.valid(); iter.advance()) {
      DBIDs pids = iter.get().getDBIDs();
      if(pids instanceof DBIDRange) {
        ids = (DBIDRange) pids;
        break;
      }
      LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
    }
    // Fallback: try to locate a database.
    if(ids == null) {
      for(It<Database> iter = hierarchy.iterAll().filter(Database.class); iter.valid(); iter.advance()) {
        DBIDs pids = iter.get().getRelation(TypeUtil.ANY).getDBIDs();
        if(pids instanceof DBIDRange) {
          ids = (DBIDRange) pids;
          break;
        }
        LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
      }
    }
    if(ids == null) {
      LOG.warning("Cannot dump cluster assignment, as I do not have a well-defined DBIDRange to use for a unique column assignment. DBIDs must be a continuous range.");
      return;
    }

    WritableIntegerDataStore map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP);
    int cnum = 0;
    for(Cluster<?> clu : c.getAllClusters()) {
      for(DBIDIter iter = clu.getIDs().iter(); iter.valid(); iter.advance()) {
        map.putInt(iter, cnum);
      }
      ++cnum;
    }
    for(DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
      if(iter.getOffset() > 0) {
        writer.append(' ');
      }
      writer.append(Integer.toString(map.intValue(iter)));
    }
    if(forceLabel != null) {
      if(forceLabel.length() > 0) {
        writer.append(' ').append(forceLabel);
      }
    }
    else {
      writer.append(' ').append(c.getLongName());
    }
    writer.append('\n');
  }

  /**
   * Parameterization class.
   * 
   * @author Erich Schubert
   * 
   * @apiviz.exclude
   */
  public static class Parameterizer extends AbstractParameterizer {
    /**
     * Output file name parameter.
     */
    public static final OptionID OUT_ID = new OptionID("clustering.output", "Output file name. When not given, the result will be written to stdout.");

    /**
     * Append flag.
     */
    public static final OptionID APPEND_ID = new OptionID("clustering.output.append", "Always append to the output file.");

    /**
     * Force label parameter.
     */
    public static final OptionID FORCE_LABEL_ID = new OptionID("clustering.label", "Parameter to override the clustering label, mostly to give a more descriptive label.");

    /**
     * Output file.
     */
    private File outputFile = null;

    /**
     * Optional label to force for this output.
     */
    private String forceLabel;

    /**
     * Always append to the output file.
     */
    private boolean append;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      FileParameter outputP = new FileParameter(OUT_ID, FileParameter.FileType.OUTPUT_FILE) //
          .setOptional(true);
      if(config.grab(outputP)) {
        outputFile = outputP.getValue();
      }

      Flag appendF = new Flag(APPEND_ID);
      if(config.grab(appendF)) {
        append = appendF.isTrue();
      }

      StringParameter labelP = new StringParameter(FORCE_LABEL_ID) //
          .setOptional(true);
      if(config.grab(labelP)) {
        forceLabel = labelP.getValue();
      }
    }

    @Override
    protected ClusteringVectorDumper makeInstance() {
      return new ClusteringVectorDumper(outputFile, append, forceLabel);
    }
  }
}