/*
* This file is part of ELKI:
* Environment for Developing KDD-Applications Supported by Index-Structures
*
* Copyright (C) 2017
* ELKI Development Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.lmu.ifi.dbs.elki.index.distancematrix;
import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.ids.*;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.index.DistanceIndex;
import de.lmu.ifi.dbs.elki.index.IndexFactory;
import de.lmu.ifi.dbs.elki.index.KNNIndex;
import de.lmu.ifi.dbs.elki.index.RangeIndex;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Distance matrix, for precomputing similarity for a small data set.
*
* This class uses a linear memory layout (not a ragged array), and assumes
* symmetry as well as strictness. This way, it only stores the upper triangle
* matrix with double precision. It has to store (n-1) * (n-2) distance values
* in memory, requiring 8 * (n-1) * (n-2) bytes. Since Java has a size limit of
* arrays of 31 bits (signed integer), we can store at most 2^16 objects
* (precisely, 65536 objects) in a single array, which needs about 16 GB of RAM.
*
* @author Erich Schubert
* @since 0.7.0
*
* @apiviz.has PrecomputedDistanceQuery
* @apiviz.has PrecomputedKNNQuery
* @apiviz.has PrecomputedRangeQuery
*
* @param <O> Object type
*/
public class PrecomputedDistanceMatrix<O> implements DistanceIndex<O>, RangeIndex<O>, KNNIndex<O> {
/**
* Class logger.
*/
private static final Logging LOG = Logging.getLogger(PrecomputedDistanceMatrix.class);
/**
* Data relation.
*/
protected final Relation<O> relation;
/**
* Nested distance function.
*/
protected final DistanceFunction<? super O> distanceFunction;
/**
* Nested distance query.
*/
protected DistanceQuery<O> distanceQuery;
/**
* Distance matrix.
*/
private double[] matrix = null;
/**
* DBID range.
*/
private DBIDRange ids;
/**
* Size of DBID range.
*/
private int size;
/**
* Constructor.
*
* @param relation Data relation
* @param distanceFunction Distance function
*/
public PrecomputedDistanceMatrix(Relation<O> relation, DistanceFunction<? super O> distanceFunction) {
super();
this.relation = relation;
this.distanceFunction = distanceFunction;
if(!distanceFunction.isSymmetric()) {
throw new AbortException("Distance matrixes currently only support symmetric distance functions (Patches welcome).");
}
}
@Override
public void initialize() {
DBIDs rids = relation.getDBIDs();
if(!(rids instanceof DBIDRange)) {
throw new AbortException("Distance matrixes are currently only supported for DBID ranges (as used by static databases) for performance reasons (Patches welcome).");
}
ids = (DBIDRange) rids;
size = ids.size();
if(size > 65536) {
throw new AbortException("Distance matrixes currently have a limit of 65536 objects (~16 GB). After this, the array size exceeds the Java integer range, and a different data structure needs to be used.");
}
distanceQuery = distanceFunction.instantiate(relation);
final int msize = triangleSize(size);
matrix = new double[msize];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distance matrix", msize, LOG) : null;
int pos = 0;
for(ix.seek(0); ix.valid(); ix.advance()) {
// y < x -- must match {@link #getOffset}!
for(iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
matrix[pos] = distanceQuery.distance(ix, iy);
pos++;
}
if(prog != null) {
prog.setProcessed(prog.getProcessed() + ix.getOffset(), LOG);
}
}
LOG.ensureCompleted(prog);
}
/**
* Compute the size of a complete x by x triangle (minus diagonal)
*
* @param x Offset
* @return Size of complete triangle
*/
protected static int triangleSize(int x) {
return (x * (x - 1)) >>> 1;
}
/**
* Array offset computation.
*
* @param x X parameter
* @param y Y parameter
* @return Array offset
*/
private int getOffset(int x, int y) {
return (y < x) ? (triangleSize(x) + y) : (triangleSize(y) + x);
}
@Override
public void logStatistics() {
if(matrix != null) {
LOG.statistics(new LongStatistic(this.getClass().getName() + ".matrix-size", matrix.length));
}
}
@Override
public String getLongName() {
return "Precomputed Distance Matrix";
}
@Override
public String getShortName() {
return "distance-matrix";
}
@Override
public DistanceQuery<O> getDistanceQuery(DistanceFunction<? super O> distanceFunction, Object... hints) {
if(this.distanceFunction.equals(distanceFunction)) {
return new PrecomputedDistanceQuery();
}
return null;
}
@Override
public KNNQuery<O> getKNNQuery(DistanceQuery<O> distanceQuery, Object... hints) {
if(this.distanceFunction.equals(distanceQuery.getDistanceFunction())) {
return new PrecomputedKNNQuery();
}
return null;
}
@Override
public RangeQuery<O> getRangeQuery(DistanceQuery<O> distanceQuery, Object... hints) {
if(this.distanceFunction.equals(distanceQuery.getDistanceFunction())) {
return new PrecomputedRangeQuery();
}
return null;
}
/**
* Distance query using the precomputed matrix.
*
* @author Erich Schubert
*/
private class PrecomputedDistanceQuery implements DistanceQuery<O> {
@Override
public double distance(DBIDRef id1, DBIDRef id2) {
final int x = ids.getOffset(id1), y = ids.getOffset(id2);
return (x != y) ? matrix[getOffset(x, y)] : 0.;
}
@Override
public double distance(O o1, DBIDRef id2) {
return distanceQuery.distance(o1, id2);
}
@Override
public double distance(DBIDRef id1, O o2) {
return distanceQuery.distance(id1, o2);
}
@Override
public double distance(O o1, O o2) {
return distanceQuery.distance(o1, o2);
}
@Override
public DistanceFunction<? super O> getDistanceFunction() {
return distanceQuery.getDistanceFunction();
}
@Override
public Relation<? extends O> getRelation() {
return relation;
}
}
/**
* Range query using the distance matrix.
*
* @author Erich Schubert
*/
private class PrecomputedRangeQuery implements RangeQuery<O> {
@Override
public DoubleDBIDList getRangeForDBID(DBIDRef id, double range) {
ModifiableDoubleDBIDList ret = DBIDUtil.newDistanceDBIDList();
getRangeForDBID(id, range, ret);
ret.sort();
return ret;
}
@Override
public void getRangeForDBID(DBIDRef id, double range, ModifiableDoubleDBIDList result) {
result.add(0., id);
DBIDArrayIter it = ids.iter();
final int x = ids.getOffset(id);
// Case y < x: triangleSize(x) + y
int pos = triangleSize(x);
for(int y = 0; y < x; y++) {
final double dist = matrix[pos];
if(dist <= range) {
result.add(dist, it.seek(y));
}
pos++;
}
assert (pos == triangleSize(x + 1));
// Case y > x: triangleSize(y) + x
pos = triangleSize(x + 1) + x;
for(int y = x + 1; y < size; y++) {
final double dist = matrix[pos];
if(dist <= range) {
result.add(dist, it.seek(y));
}
pos += y;
}
}
@Override
public DoubleDBIDList getRangeForObject(O obj, double range) {
throw new AbortException("Preprocessor KNN query only supports ID queries.");
}
@Override
public void getRangeForObject(O obj, double range, ModifiableDoubleDBIDList result) {
throw new AbortException("Preprocessor KNN query only supports ID queries.");
}
}
/**
* kNN query using the distance matrix.
*
* @author Erich Schubert
*/
private class PrecomputedKNNQuery implements KNNQuery<O> {
@Override
public KNNList getKNNForDBID(DBIDRef id, int k) {
KNNHeap heap = DBIDUtil.newHeap(k);
heap.insert(0., id);
DBIDArrayIter it = ids.iter();
double max = Double.POSITIVE_INFINITY;
final int x = ids.getOffset(id);
// Case y < x: triangleSize(x) + y
int pos = triangleSize(x);
for(int y = 0; y < x; y++) {
final double dist = matrix[pos];
if(dist <= max) {
max = heap.insert(dist, it.seek(y));
}
pos++;
}
assert (pos == triangleSize(x + 1));
// Case y > x: triangleSize(y) + x
pos = triangleSize(x + 1) + x;
for(int y = x + 1; y < size; y++) {
final double dist = matrix[pos];
if(dist <= max) {
max = heap.insert(dist, it.seek(y));
}
pos += y;
}
return heap.toKNNList();
}
@Override
public List<? extends KNNList> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) {
// TODO: optimize
List<KNNList> ret = new ArrayList<>(ids.size());
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
ret.add(getKNNForDBID(iter, k));
}
return ret;
}
@Override
public KNNList getKNNForObject(O obj, int k) {
throw new AbortException("Preprocessor KNN query only supports ID queries.");
}
}
/**
* Factory for the index.
*
* @author Erich Schubert
*
* @apiviz.has PrecomputedDistanceMatrix
*
* @param <O> Object type
*/
public static class Factory<O> implements IndexFactory<O, PrecomputedDistanceMatrix<O>> {
/**
* Nested distance function.
*/
final protected DistanceFunction<? super O> distanceFunction;
/**
* Constructor.
*
* @param distanceFunction Distance function
*/
public Factory(DistanceFunction<? super O> distanceFunction) {
super();
this.distanceFunction = distanceFunction;
}
@Override
public PrecomputedDistanceMatrix<O> instantiate(Relation<O> relation) {
return new PrecomputedDistanceMatrix<>(relation, distanceFunction);
}
@Override
public TypeInformation getInputTypeRestriction() {
return distanceFunction.getInputTypeRestriction();
}
/**
* Parameterizer.
*
* @author Erich Schubert
*
* @apiviz.exclude
*
* @param <O> Object type
*/
public static class Parameterizer<O> extends AbstractParameterizer {
/**
* Option parameter for the precomputed distance matrix.
*/
public static final OptionID DISTANCE_ID = new OptionID("matrix.distance", "Distance function for the precomputed distance matrix.");
/**
* Nested distance function.
*/
protected DistanceFunction<? super O> distanceFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<DistanceFunction<? super O>> distanceP = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class);
if(config.grab(distanceP)) {
distanceFunction = distanceP.instantiateClass(config);
}
}
@Override
protected Factory<O> makeInstance() {
return new Factory<>(distanceFunction);
}
}
}
}