/**
* GeDBIT.index.algorithms.PivotSelectionMethods 2006.06.16
*
* Copyright Information:
*
* Change Log:
* 2006.01.25: Added, by Willard
* 2012-2013: Modified by Kewei Ma(MarkNV)
*/
package GeDBIT.index.algorithms;
import java.util.List;
import java.util.Random;
import GeDBIT.dist.Metric;
import GeDBIT.type.IndexObject;
import GeDBIT.util.FindPivotWithLargestVar;
import GeDBIT.util.LargeDenseDoubleMatrix2D;
import cern.colt.matrix.DoubleMatrix2D;
import cern.colt.matrix.impl.DenseDoubleMatrix2D;
/**
* All the built-in pivot selection methods. FFT: use Farthest-First-Traversal
* to find the corners of the data. Linear time. CENTER: choose the centers of
* the internal clusters, a method similar to CLARA. Slow RANDOM: select pivots
* randomly, first, but no performance guarantee. FFTANDPCA: a combination of
* FFT and Principal Component Analysis. Slow, but performs the best
*
* @author Rui Mao, Willlard
* @version 2006.08.03
*/
public enum PivotSelectionMethods implements PivotSelectionMethod {
FFT {
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
return selectPivots(metric, data, 0, data.size(), numPivots);
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
int firstPivot = first;
// if ffopt == 0, use the old version FFT
if (0 != fftopt)
firstPivot = FindPivotWithLargestVar.findPivotByVarold(metric,
data);
// firstPivot = FindPivotWithLargestVar.findPivotByVar(metric,
// data, first, dataSize);
// Math.floor(first + Math.random() *
// dataSize);
return selectPivots(metric, data, first, dataSize, numPivots,
firstPivot);
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots, int firstPivot) {
if (numPivots >= dataSize) {
int[] pivots = new int[dataSize];
for (int i = first; i < dataSize + first; i++)
pivots[i - first] = i;
return IncrementalSelection.removeDuplicate(metric, data,
pivots);
}
boolean[] isCenter = new boolean[dataSize];
double[] minDist = new double[dataSize];
for (int i = 0; i < dataSize; i++) {
isCenter[i] = false;
minDist[i] = Double.POSITIVE_INFINITY;
}
isCenter[firstPivot] = true;
int[] indices; // offsets of the pivots in the original data list
if (2 > fftopt)
indices = new int[numPivots];
else
// if fft == 2, then choose the third point got from
// FFT as the second povit
indices = new int[numPivots + 1];
indices[0] = firstPivot;
for (int i = 1; i < indices.length; i++)
indices[i] = -1;
// array counter init to 1 since the first point is found already
for (int centerSize = 1; centerSize < indices.length; centerSize++) {
double currMaxDist = Double.NEGATIVE_INFINITY;
final IndexObject lastCenter = data
.get(indices[centerSize - 1]);
for (int i = 0; i < dataSize; i++) {
if (isCenter[i] == false) // if point is not a center
{
double tempDist = metric.getDistance(
data.get(i + first), lastCenter);
minDist[i] = (tempDist < minDist[i]) ? tempDist
: minDist[i];
// TODO
if (minDist[i] > currMaxDist) {
indices[centerSize] = i; // save the index the
// current farthest
// point
currMaxDist = minDist[i];
}
}
}
if (indices[centerSize] == -1)
break;
else
isCenter[indices[centerSize]] = true;
}
int returnSize = 0;
while ((returnSize < indices.length) && (indices[returnSize] >= 0))
returnSize++;
if (returnSize < indices.length) {
int[] result = new int[returnSize];
System.arraycopy(indices, 0, result, 0, returnSize);
return result;
} else if (2 > fftopt)
return indices;
else {
// if fft == 2, then choose the third point
// got from FFT as the second pivot
int[] reindecs = { 0, 0 };
reindecs[0] = indices[0];
reindecs[1] = indices[2];
return reindecs;
}
}
},
CENTER {
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
return selectPivots(metric, data, 0, data.size(), numPivots);
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
// TODO
return null;
}
},
RANDOM {
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
return selectPivots(metric, data, 0, data.size(), numPivots);
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
final int numPivots) {
return selectPivots(metric, data, first, dataSize, numPivots, false);
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, final int first,
int dataSize, final int numPivots, boolean debug) {
return randomPivot(metric, data.subList(first, first + dataSize),
numPivots);
}
/**
* select pivots randomly. No duplicates are allowed in return.
*
* @param metric
* @param data
* @param numP
* number of pivots to select
* @return an int array of subscripts of the pivots in the input data
* array.
*/
int[] randomPivot(Metric metric, List<? extends IndexObject> data,
int numP) {
// final boolean debug = true;
final int LoopConstant = 5;
final int size = data.size();
// if number of pivots to select is not smaller than the dataset
// size
// return all the not identical points
if (numP >= size) {
int[] result = new int[size];
int counter = 1; // number of pivots selected
result[0] = 0;
for (int i = 1; i < size; i++) {
if (!containsZeroDistance(metric, data, result,
data.get(i), 0, counter)) {
result[counter] = i;
counter++;
}
}
if (counter == size) // no duplicate
return result;
else {
int[] r = new int[counter];
System.arraycopy(result, 0, r, 0, counter);
return r;
}
}
// number of pivots is less than dataset size. linear scan to
// randomly choose pivots, be
// careful to the duplicate.
int counter = 0; // number of pivots selected
int[] result = new int[numP];
Random r = new Random();
for (int j = 0; j < LoopConstant; j++) {
for (int i = 0; i < size; i++) {
double d = (double) (numP - counter) / size;
double nd = r.nextDouble();
// System.out.println("d =" + d + ", nd = " + nd);
if ((d > nd)
&& !containsZeroDistance(metric, data, result,
data.get(i), 0, counter)) {
result[counter] = i;
counter++;
if (counter >= numP)
break;
}
}
if (counter >= numP)
break;
}
// if enough number of pivots are found, just return it.
if (counter >= numP)
return result;
// otherwise, which means too much duplicate, scan it again.
int[] subscript = new int[size];
for (int i = 0; i < size; i++)
subscript[i] = i;
int remain = size; // number of points among which to select
// pivots, the duplicates
// that have already been identified are not included.
int temp = 0;
while ((counter < numP) && (remain > 0)) {
for (int i = 0; i < remain; i++) {
if (r.nextDouble() < (double) (numP - counter) / remain) {
if (containsZeroDistance(metric, data, result,
data.get(subscript[i]), 0, counter)) {
remain--;
if (remain <= 0)
break;
temp = subscript[i];
subscript[i] = subscript[remain];
subscript[remain] = temp;
} else {
result[counter] = subscript[i];
counter++;
if (counter >= numP)
break;
}
}
}
if (counter >= numP)
break;
}
// if enough pivots are found, return it
if (counter >= numP)
return result;
// otherwise, return all the pivots found
int[] rr = new int[counter];
System.arraycopy(result, 0, rr, 0, counter);
return rr;
}
/**
* @param metric
* @param data
* @param subscript
* @param probe
* @param first
* @param last
* @return
*/
boolean containsZeroDistance(Metric metric,
List<? extends IndexObject> data, int[] subscript,
IndexObject probe, int first, int last) {
if (data == null)
return false;
boolean contains = false;
int i = first;
while ((i < last) && !contains) {
if (metric.getDistance(data.get(subscript[i]), probe) == 0) {
contains = true;
break;
} else
i++;
}
return contains;
}
},
PCA {
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
return selectPivots(metric, data, 0, data.size(), numPivots);
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
// compute the distance matrix
DoubleMatrix2D matrix = GeDBIT.index.algorithms.PCA
.pairWiseDistance(metric, data);
// compute PCA with EM method
matrix = GeDBIT.index.algorithms.PCA.EMPCA(matrix, numPivots);
// select pivots from the pca result
return GeDBIT.index.algorithms.PCA.pivotSelectionByPCAResultAngle(
matrix, numPivots);
}
},
EPCAF {
final int FFTScale = 30;
final int NumPCScale = 2;
final int NumPivotEachPC = 2;
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
int[] result = selectPivots(metric, data.subList(first, dataSize),
numPivots);
for (int i = 0; i < result.length; i++)
result[i] += first;
return result;
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
final int dataSize = data.size();
// run fft to get a candidate set
int[] fftResult = FFT.selectPivots(metric, data, numPivots
* FFTScale);
for (int i = 0; i < fftResult.length; i++)
System.out.print(fftResult[i] + " ");
System.out.println();
// compute the distance matrix
if (fftResult.length <= Math.min(dataSize, numPivots))
return fftResult;
DoubleMatrix2D dataMatrix = LargeDenseDoubleMatrix2D
.createDoubleMatrix2D(dataSize, fftResult.length);
for (int col = 0; col < fftResult.length; col++)
for (int row = 0; row < dataSize; row++)
dataMatrix.set(
row,
col,
metric.getDistance(data.get(row),
data.get(fftResult[col])));
// compute PCA with EM method, dataMatrix is centerized after the
// operation.
DoubleMatrix2D pcaResult = GeDBIT.index.algorithms.PCA.EMPCA(
dataMatrix, numPivots * NumPCScale);
// select pivots from the pca result
int[] result = GeDBIT.index.algorithms.PCA
.pivotSelectionByPCAResultProjection(dataMatrix, pcaResult,
numPivots * NumPCScale, numPivots * NumPCScale
* NumPivotEachPC);
for (int i = 0; i < result.length; i++)
System.out.print(result[i] + " ");
System.out.println();
return result;
}
},
LLEONFFT {
final int FFTScale = 100;
@SuppressWarnings("unused")
int count = 1;
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
// run fft to get a candidate set
int[] fftResult = FFT.selectPivots(metric, data, numPivots
* FFTScale);
for (int i = 0; i < fftResult.length; i++)
// compute the distance matrix
if (fftResult.length <= Math.min(dataSize, numPivots))
return fftResult;
DoubleMatrix2D dataMatrix = new DenseDoubleMatrix2D(dataSize,
fftResult.length);
for (int col = 0; col < fftResult.length; col++)
for (int row = 0; row < dataSize; row++)
dataMatrix.set(
row,
col,
metric.getDistance(data.get(row),
data.get(fftResult[col])));
DoubleMatrix2D lleResult = GeDBIT.index.algorithms.LLE.runLLE(
dataMatrix.viewDice(), 2);
int result[] = GeDBIT.index.algorithms.LLE.selectFromResult(
dataMatrix.viewDice(), lleResult);
for (int i = 0; i < result.length; i++)
result[i] += first;
return result;
}
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
final int dataSize = data.size();
System.out.println(dataSize);
if (data.size() > 18) {
return selectPivots(metric, data, 0, data.size(), numPivots);
} else {
return FFT
.selectPivots(metric, data, 0, data.size(), numPivots);
}
}
},
LLE {
int count;
/**
* @param metric
* @param data
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
System.out.println(data.size());
if (count == 0 || data.size() >= 10000) {
count = 1;
System.out.println("Using FFT");
return FFT
.selectPivots(metric, data, 0, data.size(), numPivots);
} else {
if (data.size() > 15)
return selectPivots(metric, data, 0, data.size(), numPivots);
else
return FFT.selectPivots(metric, data, 0, data.size(),
numPivots);
}
}
/**
* @param metric
* @param data
* @param first
* @param dataSize
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
// compute the distance matrix
DoubleMatrix2D matrix = GeDBIT.index.algorithms.LLE
.pairWiseDistance(metric, data);
// compute LLE
DoubleMatrix2D mat = GeDBIT.index.algorithms.LLE.runLLE(matrix,
numPivots);
// select pivots from the LLE result
return GeDBIT.index.algorithms.LLE.selectByCov(matrix, mat);
}
},
MDS {
int count;
/**
* @param metric
* @param data
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
System.out.print("datasize: ");
System.out.println(data.size());
if (count == 0 || data.size() > 40000) {
count = 1;
System.out.println("using FFT");
return FFT
.selectPivots(metric, data, 0, data.size(), numPivots);
} else
return selectPivots(metric, data, 0, data.size(), numPivots);
}
/**
* @param metric
* @param data
* @param first
* @param dataSize
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
// compute the distance matrix
DoubleMatrix2D matrix = GeDBIT.index.algorithms.LLE
.pairWiseDistance(metric, data);
// compute MDS
DoubleMatrix2D mat = GeDBIT.index.algorithms.MDS.runMDS(matrix,
numPivots);
// select pivots from the MDS result
return GeDBIT.index.algorithms.MDS.selectByCov(matrix,
mat.viewDice());
}
},
COV {
int count;
/**
* @param metric
* @param data
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
if (count == 0 || data.size() > 10000) {
count = 1;
return FFT
.selectPivots(metric, data, 0, data.size(), numPivots);
} else
return selectPivots(metric, data, 0, data.size(), numPivots);
}
/**
* @param metric
* @param data
* @param first
* @param dataSize
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
// compute the distance matrix
DoubleMatrix2D matrix = GeDBIT.index.algorithms.LLE
.pairWiseDistance(metric, data);
return GeDBIT.index.algorithms.Correlation
.runCor(matrix, numPivots);
}
},
COR {
int count;
/**
* @param metric
* @param data
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int numPivots) {
if (count == 0 || data.size() > 10000) {
count = 1;
return FFT
.selectPivots(metric, data, 0, data.size(), numPivots);
} else
return selectPivots(metric, data, 0, data.size(), numPivots);
}
/**
* @param metric
* @param data
* @param first
* @param dataSize
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric,
List<? extends IndexObject> data, int first, int dataSize,
int numPivots) {
// compute the distance matrix
DoubleMatrix2D matrix = GeDBIT.index.algorithms.LLE
.pairWiseDistance(metric, data);
return GeDBIT.index.algorithms.Covariance.runCov(matrix, numPivots);
}
};
public static int fftopt = 0;
}