/* * This file is part of ELKI: * Environment for Developing KDD-Applications Supported by Index-Structures * * Copyright (C) 2017 * ELKI Development Team * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.lmu.ifi.dbs.elki.index.vafile; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import de.lmu.ifi.dbs.elki.data.DoubleVector; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction; import de.lmu.ifi.dbs.elki.index.AbstractRefiningIndex; import de.lmu.ifi.dbs.elki.index.IndexFactory; import de.lmu.ifi.dbs.elki.index.KNNIndex; import de.lmu.ifi.dbs.elki.index.RangeIndex; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.statistics.Counter; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.persistent.AbstractPageFileFactory; import de.lmu.ifi.dbs.elki.utilities.datastructures.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; import net.jafama.FastMath; /** * PartialVAFile. In-memory only implementation. * * Reference: * <p> * Hans-Peter Kriegel, Peer Kröger, Matthias Schubert, Ziyue Zhu:<br /> * Efficient Query Processing in Arbitrary Subspaces Using Vector Approximations * <br /> * in Proc. 18th Int. Conf. on Scientific and Statistical Database Management * (SSDBM 06), Wien, Austria, 2006. * </p> * * @author Thomas Bernecker * @author Erich Schubert * @since 0.5.0 * * @apiviz.landmark * * @apiviz.composedOf DAFile * @apiviz.has PartialVACandidate * @apiviz.has PartialVAFileRangeQuery * @apiviz.has PartialVAFileKNNQuery * * @param <V> Vector type */ @Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Matthias Schubert, Ziyue Zhu", // title = "Efficient Query Processing in Arbitrary Subspaces Using Vector Approximations", // booktitle = "Proc. 18th Int. Conf. on Scientific and Statistical Database Management (SSDBM 06), Wien, Austria, 2006", // url = "http://dx.doi.org/10.1109/SSDBM.2006.23") public class PartialVAFile<V extends NumberVector> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> { /** * Class logger. */ private static final Logging LOG = Logging.getLogger(PartialVAFile.class); /** * Partial VA files. */ List<DAFile> daFiles; /** * Number of partitions. */ private final int partitions; /** * Page size. */ private final int pageSize; /** * Splitting grid. */ private double[][] splitPartitions; /** * Statistics. */ protected Statistics stats; /** * The (full - we are in-memory only right now) vector approximations. */ private ArrayList<VectorApproximation> vectorApprox; /** * Constructor. * * @param pageSize Page size * @param relation Data relation * @param partitions Number of partitions */ public PartialVAFile(int pageSize, Relation<V> relation, int partitions) { super(relation); this.pageSize = pageSize; this.partitions = partitions; this.stats = new Statistics(this.getClass().getName()); } @Override public void initialize() throws IllegalStateException { if(splitPartitions != null) { throw new IllegalStateException("Data already inserted."); } if(MathUtil.log2(partitions) != (int) MathUtil.log2(partitions)) { throw new IllegalArgumentException("Number of partitions must be a power of 2!"); } final int dimensions = RelationUtil.dimensionality(relation); splitPartitions = new double[dimensions][]; daFiles = new ArrayList<>(dimensions); for(int d = 0; d < dimensions; d++) { final DAFile f = new DAFile(relation, d, partitions); splitPartitions[d] = f.getSplitPositions(); daFiles.add(f); } vectorApprox = new ArrayList<>(); for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { DBID id = DBIDUtil.deref(iter); V dv = relation.get(id); VectorApproximation va = calculateFullApproximation(id, dv); vectorApprox.add(va); } } @Override public String getShortName() { return "pva-file"; } @Override public String getLongName() { return "partial va-file"; } @Override public Logging getLogger() { return LOG; } @Override public void logStatistics() { stats.logStatistics(); } /** * Calculate the VA file position given the existing borders. * * @param id Object ID * @param dv Data vector * @return Vector approximation */ protected VectorApproximation calculateFullApproximation(DBID id, V dv) { int[] approximation = new int[dv.getDimensionality()]; for(int d = 0; d < splitPartitions.length; d++) { double[] split = daFiles.get(d).getSplitPositions(); final double val = dv.doubleValue(d); final int lastBorderIndex = split.length - 1; // Value is below data grid if(val < split[0]) { approximation[d] = 0; if(id != null) { LOG.warning("Vector outside of VAFile grid!"); } } // Value is above data grid else if(val > split[lastBorderIndex]) { approximation[d] = lastBorderIndex - 1; if(id != null) { LOG.warning("Vector outside of VAFile grid!"); } } // normal case else { // Search grid position int pos = Arrays.binarySearch(split, val); pos = (pos >= 0) ? pos : ((-pos) - 2); approximation[d] = pos; } } return new VectorApproximation(id, approximation); } @Override public KNNQuery<V> getKNNQuery(DistanceQuery<V> distanceQuery, Object... hints) { DistanceFunction<? super V> df = distanceQuery.getDistanceFunction(); if(df instanceof SubspaceLPNormDistanceFunction) { double p = ((SubspaceLPNormDistanceFunction) df).getP(); long[] bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions(); return new PartialVAFileKNNQuery(distanceQuery, p, bits); } if(df instanceof LPNormDistanceFunction) { double p = ((LPNormDistanceFunction) df).getP(); long[] bits = BitsUtil.ones(RelationUtil.dimensionality(distanceQuery.getRelation())); return new PartialVAFileKNNQuery(distanceQuery, p, bits); } // Not supported. return null; } @Override public RangeQuery<V> getRangeQuery(DistanceQuery<V> distanceQuery, Object... hints) { DistanceFunction<? super V> df = distanceQuery.getDistanceFunction(); if(df instanceof SubspaceLPNormDistanceFunction) { double p = ((SubspaceLPNormDistanceFunction) df).getP(); long[] bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions(); return new PartialVAFileRangeQuery(distanceQuery, p, bits); } if(df instanceof LPNormDistanceFunction) { double p = ((LPNormDistanceFunction) df).getP(); long[] bits = BitsUtil.ones(RelationUtil.dimensionality(distanceQuery.getRelation())); return new PartialVAFileRangeQuery(distanceQuery, p, bits); } // Not supported. return null; } /** * Calculate selectivity coefficients. * * @param daFiles List of files to use * @param query Query vector * @param epsilon Epsilon radius */ protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector query, double epsilon) { final int dimensions = query.getDimensionality(); double[] lowerVals = new double[dimensions]; double[] upperVals = new double[dimensions]; VectorApproximation queryApprox = calculatePartialApproximation(null, query, daFiles); for(int i = 0; i < dimensions; i++) { final double val = query.doubleValue(i); lowerVals[i] = val - epsilon; upperVals[i] = val + epsilon; } DoubleVector lowerEpsilon = DoubleVector.wrap(lowerVals); VectorApproximation lowerEpsilonPartitions = calculatePartialApproximation(null, lowerEpsilon, daFiles); DoubleVector upperEpsilon = DoubleVector.wrap(upperVals); VectorApproximation upperEpsilonPartitions = calculatePartialApproximation(null, upperEpsilon, daFiles); for(int i = 0; i < daFiles.size(); i++) { int coeff = (queryApprox.getApproximation(i) - lowerEpsilonPartitions.getApproximation(i)) + (upperEpsilonPartitions.getApproximation(i) - queryApprox.getApproximation(i)) + 1; daFiles.get(i).first = coeff; } } /** * Calculate partial vector approximation. * * @param id Object ID * @param dv Object vector * @param daFiles List of approximations to use * @return Vector approximation */ protected static VectorApproximation calculatePartialApproximation(DBID id, NumberVector dv, List<DoubleObjPair<DAFile>> daFiles) { int[] approximation = new int[dv.getDimensionality()]; for(int i = 0; i < daFiles.size(); i++) { double val = dv.doubleValue(i); double[] borders = daFiles.get(i).second.getSplitPositions(); assert borders != null : "borders are null"; int lastBorderIndex = borders.length - 1; // value is lower outlier if(val < borders[0]) { approximation[i] = 0; } // value is upper outlier else if(val > borders[lastBorderIndex]) { approximation[i] = lastBorderIndex - 1; } // normal case else { for(int s = 0; s < lastBorderIndex; s++) { if(val >= borders[s] && val < borders[s + 1] && approximation[i] != -1) { approximation[i] = s; } } } } return new VectorApproximation(id, approximation); } /** * Class for tracking Partial VA file statistics. * * TODO: refactor into a common statistics API * * @apiviz.exclude */ public static class Statistics { private Counter scannedBytes; private Counter queryTime; private Counter issuedQueries; private Counter refinements; protected Statistics(String parent) { scannedBytes = LOG.isStatistics() ? LOG.newCounter(parent + ".scannedBytes") : null; queryTime = LOG.isStatistics() ? LOG.newCounter(parent + ".queryTime") : null; issuedQueries = LOG.isStatistics() ? LOG.newCounter(parent + ".issuedQueries") : null; refinements = LOG.isStatistics() ? LOG.newCounter(parent + ".refinements") : null; } public void logStatistics() { if(scannedBytes != null) { LOG.statistics(scannedBytes); } if(queryTime != null) { LOG.statistics(queryTime); } if(issuedQueries != null) { LOG.statistics(issuedQueries); } if(refinements != null) { LOG.statistics(refinements); } } protected void incrementScannedBytes(long bytes) { if(scannedBytes != null) { scannedBytes.increment(bytes); } } protected void incrementQueryTime(long time) { if(queryTime != null) { queryTime.increment(time); } } protected void incrementIssuedQueries() { if(issuedQueries != null) { issuedQueries.increment(); } } protected void incrementRefinements() { if(refinements != null) { refinements.increment(); } } } /** * Object in a VA approximation. * * @author Thomas Bernecker * @author Erich Schubert */ protected static class PartialVACandidate implements Comparable<PartialVACandidate> { /** * (Current) maximum distance of this candidate. */ protected double maxDistP = 0.0; /** * (Current) minimum distance of this candidate. */ protected double minDistP = 0.0; /** * The actual approximation. */ final private VectorApproximation approx; /** * * Constructor. * * @param approx The actual approximation */ public PartialVACandidate(VectorApproximation approx) { super(); this.approx = approx; } public int getApproximation(int dimension) { return approx.getApproximation(dimension); } public DBID getId() { return approx.getId(); } @Override public String toString() { return approx.toString() + ", bounds^p: [" + minDistP + ", " + maxDistP + "]"; } @Override public int compareTo(PartialVACandidate o) { return Double.compare(this.minDistP, o.minDistP); } } /** * Range query for this index. * * @author Erich Schubert * @author Thomas Bernecker */ public class PartialVAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery { /** * Lp-Norm p. */ private double p; /** * Subspace. */ private long[] subspace; /** * Constructor. * * @param ddq Distance query * @param p LP Norm p * @param subspace Subspace */ public PartialVAFileRangeQuery(DistanceQuery<V> ddq, double p, long[] subspace) { super(ddq); this.p = p; this.subspace = subspace; } @Override public void getRangeForObject(V query, double range, ModifiableDoubleDBIDList result) { stats.incrementIssuedQueries(); long t = System.nanoTime(); final double epsilonP = FastMath.pow(range, p); // generate query approximation and lookup table final VectorApproximation queryApprox = calculateFullApproximation(null, query); final VALPNormDistance dist = new VALPNormDistance(p, splitPartitions, query, queryApprox); // perform multi-step range query // filter step // calculate selectivity coefficients List<DoubleObjPair<DAFile>> subspaceDAFiles = new ArrayList<>(BitsUtil.cardinality(subspace)); for(int d = BitsUtil.nextSetBit(subspace, 0); d >= 0; d = BitsUtil.nextSetBit(subspace, d + 1)) { DAFile daFile = daFiles.get(d); subspaceDAFiles.add(new DoubleObjPair<>(-1, daFile)); } calculateSelectivityCoeffs(subspaceDAFiles, query, range); // sort DA files by selectivity // TODO: validate that this is the correct order Collections.sort(subspaceDAFiles, Collections.reverseOrder()); // create candidate list (all objects) and prune candidates w.r.t. // mindist (i.e. remove them from the list) // important: this structure contains the maxDist values for refinement! int candidates = 0; for(VectorApproximation va : vectorApprox) { DBID id = va.getId(); PartialVACandidate pva = new PartialVACandidate(va); boolean pruned = false; for(DoubleObjPair<DAFile> da : subspaceDAFiles) { int dimension = da.second.getDimension(); int objectCell = va.getApproximation(dimension); pva.minDistP += dist.getPartialMinDist(dimension, objectCell); pva.maxDistP += dist.getPartialMaxDist(dimension, objectCell); if(pva.minDistP > epsilonP) { pruned = true; break; } } if(!pruned) { candidates++; if(pva.maxDistP <= epsilonP) { // candidate cannot be dropped // TODO: actually: no refinement needed - need API that allows // reporting maxdists only. result.add(refine(id, query), id); } else { // refine candidate - true refinement double dis = refine(id, query); stats.incrementRefinements(); if(dis <= range) { result.add(dis, id); } } } } result.sort(); stats.incrementScannedBytes(relation.size() * VectorApproximation.byteOnDisk(BitsUtil.cardinality(subspace), partitions)); stats.incrementQueryTime(System.nanoTime() - t); if(LOG.isDebuggingFine()) { LOG.fine("query = " + query); LOG.fine("database: " + relation.size() + ", candidates: " + candidates + ", results: " + result.size()); } } } /** * KNN query for this index. * * @author Erich Schubert * @author Thomas Bernecker */ public class PartialVAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery { /** * Lp-Norm p. */ private double p; /** * Subspace. */ private long[] subspace; /** * Constructor. * * @param ddq Distance query * @param p LP-norm p * @param subspace Subspace to query */ public PartialVAFileKNNQuery(DistanceQuery<V> ddq, double p, long[] subspace) { super(ddq); this.p = p; this.subspace = subspace; } @Override public KNNList getKNNForObject(V query, int k) { stats.incrementIssuedQueries(); long t = System.nanoTime(); // generate query approximation and lookup table VectorApproximation queryApprox = calculateFullApproximation(null, query); final VALPNormDistance dist = new VALPNormDistance(p, splitPartitions, query, queryApprox); // sort DA files by worst case distance List<DAFile> daFiles = getWorstCaseDistOrder(dist, subspace); final int currentSubspaceDims = BitsUtil.cardinality(subspace); int reducedDims = (2 * currentSubspaceDims) / 3; reducedDims = Math.max(1, reducedDims); if(LOG.isDebuggingFine()) { LOG.fine("subspaceDims=" + currentSubspaceDims + ", reducedDims=" + reducedDims); } // filter 1 LinkedList<PartialVACandidate> candidates1 = filter1(k, reducedDims, daFiles, queryApprox, currentSubspaceDims, dist); if(LOG.isDebuggingFine()) { LOG.fine("candidate set after filter 1: " + candidates1.size()); } // filters 2+ LinkedList<PartialVACandidate> candidates2 = null; int addition = reducedDims; int filterStep = 2; if(currentSubspaceDims <= reducedDims) { candidates2 = candidates1; } else { // continue filtering until I/O costs of refining candidates < I/O // costs of loading new DA files while(candidates2 == null || (getIOCosts(candidates2.size(), currentSubspaceDims) >= getIOCosts(daFiles.get(0), currentSubspaceDims - addition)) && addition < currentSubspaceDims) { if(candidates2 != null && LOG.isDebuggingFine()) { LOG.fine("filter " + filterStep + ": refining costs " + getIOCosts(candidates2.size(), currentSubspaceDims) + " (" + candidates2.size() + "/" + currentSubspaceDims + "), DA file costs " + getIOCosts(daFiles.get(0), currentSubspaceDims - addition) + " (dim " + (addition + 1) + " of " + currentSubspaceDims + ")"); } if(candidates2 != null) { candidates1 = candidates2; } candidates2 = new LinkedList<>(); DoubleMaxHeap kMinMaxDists = new DoubleMaxHeap(k + 1); for(PartialVACandidate va : candidates1) { int dimension = daFiles.get(addition).getDimension(); int objectCell = va.getApproximation(dimension); va.minDistP += dist.getPartialMinDist(dimension, objectCell); va.maxDistP += dist.getPartialMaxDist(dimension, objectCell) - dist.getPartialMaxMaxDist(dimension); if(kMinMaxDists.size() < k || va.minDistP <= kMinMaxDists.peek()) { candidates2.add(va); kMinMaxDists.add(va.maxDistP, k); } } if(LOG.isDebuggingFine()) { LOG.fine("candidate set after filter " + filterStep + ": " + candidates2.size()); } addition++; filterStep++; } } stats.incrementScannedBytes(relation.size() * VectorApproximation.byteOnDisk(addition, partitions)); // refinement step ArrayList<PartialVACandidate> sortedCandidates = new ArrayList<>(candidates2); // sort candidates by lower bound (minDist) Collections.sort(sortedCandidates); KNNList result = retrieveAccurateDistances(sortedCandidates, k, subspace, query); stats.incrementQueryTime(System.nanoTime() - t); return result; } private LinkedList<PartialVACandidate> filter1(int k, int reducedDims, List<DAFile> daFiles, VectorApproximation queryApprox, int subspaceDims, VALPNormDistance dist) { LinkedList<PartialVACandidate> candidates1 = new LinkedList<>(); DoubleMaxHeap minmaxdist = new DoubleMaxHeap(k + 1); for(VectorApproximation va : vectorApprox) { PartialVACandidate pva = new PartialVACandidate(va); for(int d = 0; d < reducedDims; d++) { int dimension = daFiles.get(d).getDimension(); int objectCell = pva.getApproximation(dimension); pva.minDistP += dist.getPartialMinDist(dimension, objectCell); pva.maxDistP += dist.getPartialMaxDist(dimension, objectCell); } for(int d = reducedDims; d < subspaceDims; d++) { pva.maxDistP += dist.getPartialMaxMaxDist(daFiles.get(d).getDimension()); } if(minmaxdist.size() < k || pva.minDistP <= minmaxdist.peek()) { candidates1.add(pva); minmaxdist.add(pva.maxDistP, k); } } // Drop candidates that don't satisfy the latest minmaxdist final double minmax = minmaxdist.peek(); Iterator<PartialVACandidate> it = candidates1.iterator(); while(it.hasNext()) { PartialVACandidate pva = it.next(); if(pva.minDistP > minmax) { it.remove(); } } return candidates1; } /** * Computes IO costs (in bytes) needed for refining the candidates. * * @param size The nuber of candidates * @param subspaceDims the required subspace dimensions * @return the cost value (in bytes) */ private int getIOCosts(int size, int subspaceDims) { return size * (subspaceDims * ByteArrayUtil.SIZE_DOUBLE + 4); } /** * Computes IO costs (in bytes) needed for reading several DA-files. * * @param sample the DA-file specific costs * @param numberOfDAFiles the number of DA-files that have to be read * @return the cost value (in bytes) */ private int getIOCosts(DAFile sample, int numberOfDAFiles) { return sample.getIOCosts() * numberOfDAFiles; } /** * Order subspaces by their worst case distance. * * @param dist Distance function * @param subspace Subspace * @return Ordered list of dimension files */ public List<DAFile> getWorstCaseDistOrder(VALPNormDistance dist, long[] subspace) { int subspaceLength = BitsUtil.cardinality(subspace); List<DAFile> result = new ArrayList<>(subspaceLength); for(int i = BitsUtil.nextSetBit(subspace, 0); i >= 0; i = BitsUtil.nextSetBit(subspace, i + 1)) { result.add(daFiles.get(i)); } Collections.sort(result, new WorstCaseDistComparator(dist)); return result; } protected KNNList retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, long[] subspace, V query) { KNNHeap result = DBIDUtil.newHeap(k); for(PartialVACandidate va : sortedCandidates) { double stopdist = result.getKNNDistance(); DBID currentID = va.getId(); if(result.size() < k || va.minDistP < stopdist) { double dist = refine(currentID, query); stats.incrementRefinements(); if(dist < stopdist) { result.insert(dist, currentID); } } } return result.toKNNList(); } } /** * Compare DAfiles by their worst case distance. * * @apiviz.exclude */ protected static class WorstCaseDistComparator implements Comparator<DAFile> { private VALPNormDistance dist; public WorstCaseDistComparator(VALPNormDistance dist) { this.dist = dist; } @Override public int compare(DAFile a, DAFile b) { return Double.compare(dist.getPartialMaxMaxDist(a.getDimension()), dist.getPartialMaxMaxDist(b.getDimension())); } } /** * Index factory class. * * @author Erich Schubert * * @apiviz.stereotype factory * @apiviz.has PartialVAFile * * @param <V> Vector type */ public static class Factory<V extends NumberVector> implements IndexFactory<V, PartialVAFile<V>> { /** * Number of partitions to use in each dimension. * * <pre> * -vafile.partitions 8 * </pre> */ public static final OptionID PARTITIONS_ID = new OptionID("vafile.partitions", "Number of partitions to use in each dimension."); /** * Page size. */ int pagesize = 1; /** * Number of partitions. */ int numpart = 2; /** * Constructor. * * @param pagesize Page size * @param numpart Number of partitions */ public Factory(int pagesize, int numpart) { super(); this.pagesize = pagesize; this.numpart = numpart; } @Override public PartialVAFile<V> instantiate(Relation<V> relation) { return new PartialVAFile<>(pagesize, relation, numpart); } @Override public TypeInformation getInputTypeRestriction() { return TypeUtil.NUMBER_VECTOR_FIELD; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { /** * Page size. */ int pagesize = 1; /** * Number of partitions. */ int numpart = 2; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); IntParameter pagesizeP = new IntParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 1024); pagesizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(pagesizeP)) { pagesize = pagesizeP.getValue(); } IntParameter partitionsP = new IntParameter(Factory.PARTITIONS_ID); partitionsP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); if(config.grab(partitionsP)) { numpart = partitionsP.getValue(); } } @Override protected Factory<?> makeInstance() { return new Factory<>(pagesize, numpart); } } } }