/*
* This file is part of ELKI:
* Environment for Developing KDD-Applications Supported by Index-Structures
*
* Copyright (C) 2017
* ELKI Development Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.lmu.ifi.dbs.elki.application.cache;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import de.lmu.ifi.dbs.elki.application.AbstractApplication;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Precompute the k nearest neighbors in a disk cache.
*
* @author Erich Schubert
* @since 0.2
*
* @apiviz.has DistanceFunction
*
* @param <O> Object type
*/
public class CacheDoubleDistanceRangeQueries<O> extends AbstractApplication {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(CacheDoubleDistanceRangeQueries.class);
/**
* Data source to process.
*/
private Database database;
/**
* Distance function that is to be cached.
*/
private DistanceFunction<? super O> distance;
/**
* Query radius.
*/
private double radius;
/**
* Output file.
*/
private File out;
/**
* Magic number to identify files.
*
* Note, when cloning this class, and performing any incompatible change to
* the file format, you should also change this magic ID!
*/
public static final int RANGE_CACHE_MAGIC = 0xCAC43333;
/**
* Constructor.
*
* @param database Data source
* @param distance Distance function
* @param radius Query radius
* @param out Matrix output file
*/
public CacheDoubleDistanceRangeQueries(Database database, DistanceFunction<? super O> distance, double radius, File out) {
super();
this.database = database;
this.distance = distance;
this.radius = radius;
this.out = out;
}
@Override
public void run() {
database.initialize();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
RangeQuery<O> rangeQ = database.getRangeQuery(distanceQuery, radius, DatabaseQuery.HINT_HEAVY_USE);
LOG.verbose("Performing range queries with radius " + radius);
// open file.
try (RandomAccessFile file = new RandomAccessFile(out, "rw");
FileChannel channel = file.getChannel();
// and acquire a file write lock
FileLock lock = channel.lock()) {
// write magic header
file.writeInt(RANGE_CACHE_MAGIC);
// write the query radius.
file.writeDouble(radius);
int bufsize = 100 * 12 * 2 + 10; // Initial size, enough for 100.
ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing range queries", relation.size(), LOG) : null;
ModifiableDoubleDBIDList nn = DBIDUtil.newDistanceDBIDList();
DoubleDBIDListIter ni = nn.iter();
for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
nn.clear();
rangeQ.getRangeForDBID(it, radius, nn);
nn.sort();
final int nnsize = nn.size();
// Grow the buffer when needed:
if(nnsize * 12 + 10 > bufsize) {
while(nnsize * 12 + 10 > bufsize) {
bufsize <<= 1;
}
LOG.verbose("Resizing buffer to " + bufsize + " to store " + nnsize + " results:");
buffer = ByteBuffer.allocateDirect(bufsize);
}
buffer.clear();
ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
int c = 0;
for(ni.seek(0); ni.valid(); ni.advance(), c++) {
ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
buffer.putDouble(ni.doubleValue());
}
if(c != nn.size()) {
throw new AbortException("Sizes did not agree. Cache is invalid.");
}
buffer.flip();
channel.write(buffer);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
lock.release();
}
catch(IOException e) {
LOG.exception(e);
}
// FIXME: close!
}
/**
* Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class Parameterizer<O> extends AbstractApplication.Parameterizer {
/**
* Parameter that specifies the name of the directory to be re-parsed.
* <p>
* Key: {@code -loader.diskcache}
* </p>
*/
public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create.");
/**
* Parameter that specifies the name of the directory to be re-parsed.
* <p>
* Key: {@code -loader.distance}
* </p>
*/
public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache.");
/**
* Parameter that specifies the query radius to precompute.
* <p>
* Key: {@code -loader.radius}
* </p>
*/
public static final OptionID RADIUS_ID = new OptionID("loader.radius", "Query radius for precomputation.");
/**
* Data source to process.
*/
private Database database = null;
/**
* Distance function that is to be cached.
*/
private DistanceFunction<? super O> distance = null;
/**
* Number of neighbors to precompute.
*/
private double radius;
/**
* Output file.
*/
private File out = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final ObjectParameter<Database> dbP = new ObjectParameter<>(DATABASE_ID, Database.class, StaticArrayDatabase.class);
if (config.grab(dbP)) {
database = dbP.instantiateClass(config);
}
// Distance function parameter
final ObjectParameter<DistanceFunction<? super O>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class);
if(config.grab(dpar)) {
distance = dpar.instantiateClass(config);
}
final DoubleParameter kpar = new DoubleParameter(RADIUS_ID);
kpar.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
if(config.grab(kpar)) {
radius = kpar.doubleValue();
}
// Output file parameter
final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE);
if(config.grab(cpar)) {
out = cpar.getValue();
}
}
@Override
protected CacheDoubleDistanceRangeQueries<O> makeInstance() {
return new CacheDoubleDistanceRangeQueries<>(database, distance, radius, out);
}
}
/**
* Main method, delegate to super class.
*
* @param args Command line arguments
*/
public static void main(String[] args) {
runCLIApplication(CacheDoubleDistanceRangeQueries.class, args);
}
}