OutlierRankingEvaluation.java example

Explorer
elki-master
/*
 * This file is part of ELKI:
 * Environment for Developing KDD-Applications Supported by Index-Structures
 *
 * Copyright (C) 2017
 * ELKI Development Team
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package de.lmu.ifi.dbs.elki.evaluation.outlier;

import java.util.List;
import java.util.regex.Pattern;

import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.DatabaseUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.evaluation.scores.*;
import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest;
import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter;
import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
import de.lmu.ifi.dbs.elki.result.*;
import de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;

/**
 * Evaluate outlier scores by their ranking
 *
 * @author Erich Schubert
 * @since 0.2
 *
 * @apiviz.landmark
 *
 * @apiviz.uses OutlierResult
 * @apiviz.has EvaluationResult oneway - - «create»
 */
public class OutlierRankingEvaluation implements Evaluator {
  /**
   * The logger.
   */
  private static final Logging LOG = Logging.getLogger(OutlierRankingEvaluation.class);

  /**
   * Stores the "positive" class.
   */
  private Pattern positiveClassName;

  /**
   * Key prefix for statistics logging.
   */
  private String key = OutlierRankingEvaluation.class.getName();

  /**
   * Constructor.
   *
   * @param positive_class_name Positive class name pattern
   */
  public OutlierRankingEvaluation(Pattern positive_class_name) {
    super();
    this.positiveClassName = positive_class_name;
  }

  private EvaluationResult evaluateOutlierResult(int size, SetDBIDs positiveids, OutlierResult or) {
    EvaluationResult res = EvaluationResult.findOrCreate(or.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
    DBIDsTest test = new DBIDsTest(positiveids);

    final int pos = positiveids.size();
    final double rate = pos / (double) size;
    MeasurementGroup g = res.findOrCreateGroup("Evaluation measures");
    double rocauc = ROCEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    if(!g.hasMeasure("ROC AUC")) {
      g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
    }
    double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("Average Precision", avep, 0., 1., rate, false);
    double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
    double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
    double maxdcg = DCGEvaluation.maximum(pos);
    double dcg = DCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("DCG", dcg, 0., maxdcg, DCGEvaluation.STATIC.expected(pos, size), false);
    double ndcg = NDCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
    g.addMeasure("NDCG", ndcg, 0., 1., NDCGEvaluation.STATIC.expected(pos, size), false);

    g = res.findOrCreateGroup("Adjusted for chance");
    double adjauc = 2 * rocauc - 1;
    g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
    double adjavep = (avep - rate) / (1 - rate);
    g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
    double adjrprec = (rprec - rate) / (1 - rate);
    g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
    double adjmaxf1 = (maxf1 - rate) / (1 - rate);
    g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
    double endcg = NDCGEvaluation.STATIC.expected(pos, size);
    double adjndcg = (ndcg - endcg) / (1. - endcg);
    g.addMeasure("Adjusted DCG", adjndcg, 0., 1., 0., false);

    if(LOG.isStatistics()) {
      LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
      LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
      LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
      LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
      LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
      LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
      LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
      LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
      LOG.statistics(new DoubleStatistic(key + ".dcg", dcg));
      LOG.statistics(new DoubleStatistic(key + ".dcg.normalized", ndcg));
      LOG.statistics(new DoubleStatistic(key + ".dcg.adjusted", adjndcg));
    }
    return res;
  }

  private EvaluationResult evaluateOrderingResult(int size, SetDBIDs positiveids, DBIDs order) {
    if(order.size() != size) {
      throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
    }

    EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation");
    DBIDsTest test = new DBIDsTest(positiveids);

    double rate = positiveids.size() / (double) size;
    MeasurementGroup g = res.newGroup("Evaluation measures:");
    double rocauc = ROCEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
    double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("Average Precision", avep, 0., 1., rate, false);
    double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
    double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
    g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);

    g = res.newGroup("Adjusted for chance:");
    double adjauc = 2 * rocauc - 1;
    g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
    double adjavep = (avep - rate) / (1 - rate);
    g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
    double adjrprec = (rprec - rate) / (1 - rate);
    g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
    double adjmaxf1 = (maxf1 - rate) / (1 - rate);
    g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);

    if(LOG.isStatistics()) {
      LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
      LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
      LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
      LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
      LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
      LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
      LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
      LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
    }
    return res;
  }

  @Override
  public void processNewResult(ResultHierarchy hier, Result result) {
    Database db = ResultUtil.findDatabase(hier);
    SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));

    if(positiveids.size() == 0) {
      LOG.warning("Cannot evaluate outlier results - no objects matched the given pattern.");
      return;
    }

    boolean nonefound = true;
    List<OutlierResult> oresults = OutlierResult.getOutlierResults(result);
    List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
    // Outlier results are the main use case.
    for(OutlierResult o : oresults) {
      db.getHierarchy().add(o, evaluateOutlierResult(o.getScores().size(), positiveids, o));
      // Process them only once.
      orderings.remove(o.getOrdering());
      nonefound = false;
    }

    // FIXME: find appropriate place to add the derived result
    // otherwise apply an ordering to the database IDs.
    for(OrderingResult or : orderings) {
      DBIDs sorted = or.order(or.getDBIDs());
      db.getHierarchy().add(or, evaluateOrderingResult(or.getDBIDs().size(), positiveids, sorted));
      nonefound = false;
    }

    if(nonefound) {
      return;
      // LOG.warning("No results found to process with ROC curve analyzer. Got
      // "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
    }
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer extends AbstractParameterizer {
    /**
     * The pattern to identify positive classes.
     *
     * <p>
     * Key: {@code -rocauc.positive}
     * </p>
     */
    public static final OptionID POSITIVE_CLASS_NAME_ID = new OptionID("outliereval.positive", "Class label for the 'positive' class.");

    /**
     * Pattern for positive class.
     */
    protected Pattern positiveClassName = null;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
      if(config.grab(positiveClassNameP)) {
        positiveClassName = positiveClassNameP.getValue();
      }
    }

    @Override
    protected OutlierRankingEvaluation makeInstance() {
      return new OutlierRankingEvaluation(positiveClassName);
    }
  }
}