/**
* GeDBIT.index.algorithms.IncrementalSelection 2007.07.12
*
* Copyright Information:
*
* Change Log:
* 2007.07.12: created by Rui Mao
*/
package GeDBIT.index.algorithms;
import java.util.List;
import java.util.Random;
import GeDBIT.dist.Metric;
import GeDBIT.type.IndexObject;
public class IncrementalSelection implements PivotSelectionMethod,
java.io.Serializable {
private static final long serialVersionUID = 6928847050089693231L;
private final int constantA;
private final int constantN;
public IncrementalSelection(int a, int n) {
this.constantA = a;
this.constantN = n;
}
/**
* @param metric
* @param data
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric, List<? extends IndexObject> data,
final int numPivots) {
return selectPivots(metric, data, 0, data.size(), numPivots);
}
/**
* @param metric
* @param data
* @param first
* @param dataSize
* @param numPivots
* @return
*/
public int[] selectPivots(Metric metric, List<? extends IndexObject> data,
int first, int dataSize, final int numPivots) {
final int NP = (numPivots > dataSize) ? dataSize : numPivots;
int[] pivots = new int[NP];
if (NP == dataSize) {
for (int i = first; i < dataSize + first; i++)
pivots[i - first] = i;
return removeDuplicate(metric, data, pivots);
}
int m = Math.max(dataSize * dataSize / 100, dataSize);
final int A = (constantA > m) ? m : constantA;
final int N = (constantN > dataSize) ? 2 : constantN;
// System.out.println("data size= " + dataSize + ", constantA = " +
// constantA + ", constantN = " + constantN + ", A = " + A + ", N= " +
// N);
Random r = new Random();
// generate the A set
int[][] setA = new int[2][A];
for (int i = 0; i < 2; i++)
for (int j = 0; j < A; j++)
setA[i][j] = r.nextInt(dataSize) + first;
double[] D = new double[A];
for (int i = 0; i < A; i++)
D[i] = Double.NEGATIVE_INFINITY;
double[][] ND = new double[N][A]; // store the distances for the
// candidates in setN to samples in
// set A
int[] setN = new int[N];
double sum = 0;
double largestSum = -1;
for (int k = 0; k < NP; k++) {
// generate set N
for (int i = 0; i < N; i++)
setN[i] = r.nextInt(dataSize) + first;
// compute ND
for (int i = 0; i < N; i++) {
sum = 0;
for (int j = 0; j < A; j++) {
ND[i][j] = Math.max(D[j], Math.abs(metric.getDistance(
data.get(setA[0][j]), data.get(setN[i]))
- metric.getDistance(data.get(setA[1][j]),
data.get(setN[i]))));
sum += ND[i][j];
}
if (sum > largestSum) {
largestSum = sum;
pivots[k] = i; // stores the largest row of ND, but not the
// offset of the pivot yet
}
}
System.arraycopy(ND[pivots[k]], 0, D, 0, A);
pivots[k] = setN[pivots[k]]; // now really stores the offset of the
// pivot
}
return removeDuplicate(metric, data, pivots);
}
/**
* check the array of pivots, remove the duplicate.
*
* @param metric
* @param data
* @param pivots
* @return
*/
public static int[] removeDuplicate(Metric metric,
List<? extends IndexObject> data, int[] pivots) {
final int size = pivots.length;
boolean[] isDuplicate = new boolean[size];
for (int i = 0; i < size; i++)
isDuplicate[i] = false;
for (int i = 0; i < size-1; i++) {
if (isDuplicate[i])
continue;
for (int j = i+1; j < size; j++) {
if (isDuplicate[j])
continue;
if (metric.getDistance(data.get(i), data.get(j)) == 0)
isDuplicate[j] = true;
}
}
int counter = 0;
for (int i = 0; i < size; i++)
if (isDuplicate[i])
counter++;
if (counter == size)
return pivots;
else {
int[] temp = new int[counter];
counter = 0;
for (int i = 0; i < size; i++)
if (isDuplicate[i])
temp[counter++] = pivots[i];
return temp;
}
}
}